diff --git a/src/swcio.hpp b/src/swcio.hpp index 1687fa60deeb726b5f77bcba6290c04545dbc7ee..630439049b56ed9074406eb9edd7af44ea5b44f8 100644 --- a/src/swcio.hpp +++ b/src/swcio.hpp @@ -2,8 +2,11 @@ #include <exception> #include <iostream> +#include <map> #include <sstream> #include <type_traits> +#include <unordered_set> +#include <vector> namespace nestmc { @@ -20,9 +23,10 @@ static bool starts_with(const std::string &str, const std::string &prefix) class cell_record { public: + using id_type = int; - // FIXME: enum's are not completely type-safe, since they can accept anything - // that can be casted to their underlying type. + // FIXME: enum's are not completely type-safe, since they can accept + // anything that can be casted to their underlying type. // // More on SWC files: http://research.mssm.edu/cnic/swc.html enum kind { @@ -48,22 +52,7 @@ public: , r_(r) , parent_id_(parent_id) { - // Check cell type as well; enum's do not offer complete type safety, - // since you can cast anything that fits to its underlying type - if (type_ < 0 || type_ > custom) - throw std::invalid_argument("unknown cell type"); - - if (id_ < 0) - throw std::invalid_argument("negative ids not allowed"); - - if (parent_id_ < -1) - throw std::invalid_argument("parent_id < -1 not allowed"); - - if (parent_id_ >= id_) - throw std::invalid_argument("parent_id >= id is not allowed"); - - if (r_ < 0) - throw std::invalid_argument("negative radii are not allowed"); + check_consistency(); } cell_record() @@ -123,12 +112,12 @@ public: return type_; } - int id() const + id_type id() const { return id_; } - int parent() const + id_type parent() const { return parent_id_; } @@ -158,12 +147,47 @@ public: return 2*r_; } + void renumber(id_type new_id, std::map<id_type, id_type> &idmap) + { + auto old_id = id_; + id_ = new_id; + + // Obtain parent_id from the map + auto new_parent_id = idmap.find(parent_id_); + if (new_parent_id != idmap.end()) { + parent_id_ = new_parent_id->second; + } + + check_consistency(); + idmap.insert(std::make_pair(old_id, new_id)); + } + private: + void check_consistency() const + { + // Check cell type as well; enum's do not offer complete type safety, + // since you can cast anything that fits to its underlying type + if (type_ < 0 || type_ > custom) + throw std::invalid_argument("unknown cell type"); + + if (id_ < 0) + throw std::invalid_argument("negative ids not allowed"); + + if (parent_id_ < -1) + throw std::invalid_argument("parent_id < -1 not allowed"); + + if (parent_id_ >= id_) + throw std::invalid_argument("parent_id >= id is not allowed"); + + if (r_ < 0) + throw std::invalid_argument("negative radii are not allowed"); + } + kind type_; // cell type - int id_; // cell id + id_type id_; // cell id float x_, y_, z_; // cell coordinates float r_; // cell radius - int parent_id_; // cell parent's id + id_type parent_id_; // cell parent's id }; class swc_parse_error : public std::runtime_error @@ -249,7 +273,7 @@ private: template<> cell_record::kind swc_parser::parse_value_strict(std::istream &is) { - int val; + cell_record::id_type val; check_parse_status(is >> val); // Let cell_record's constructor check for the type validity @@ -296,5 +320,54 @@ std::ostream &operator<<(std::ostream &os, const cell_record &cell) return os; } +// +// Reads cells from an input stream until an eof is encountered and returns a +// cleaned sequence of cell records. +// +// For more information check here: +// https://github.com/eth-cscs/cell_algorithms/wiki/SWC-file-parsing +// +std::vector<cell_record> swc_read_cells(std::istream &is) +{ + std::vector<cell_record> cells; + std::unordered_set<cell_record::id_type> ids; + + std::size_t num_trees = 0; + cell_record::id_type last_id = -1; + bool needsort = false; + + cell_record curr_cell; + while ( !(is >> curr_cell).eof()) { + if (curr_cell.parent() == -1 && ++num_trees > 1) + // only a single tree is allowed + break; + + auto inserted = ids.insert(curr_cell.id()); + if (inserted.second) { + // not a duplicate; insert cell + cells.push_back(curr_cell); + if (!needsort && curr_cell.id() < last_id) + needsort = true; + + last_id = curr_cell.id(); + } + } + + if (needsort) + std::sort(cells.begin(), cells.end()); + + // Renumber cells if necessary + std::map<cell_record::id_type, cell_record::id_type> idmap; + cell_record::id_type next_id = 0; + for (auto &c : cells) { + if (c.id() != next_id) + c.renumber(next_id, idmap); + + ++next_id; + } + + return std::move(cells); +} + } // end of nestmc::io } // end of nestmc diff --git a/tests/test_swcio.cpp b/tests/test_swcio.cpp index 923f39b4d8b6f88949da1705b9b9b393a16cb3ca..ba62c90bf4f30b23405fd15a0c546e8ea91e6edc 100644 --- a/tests/test_swcio.cpp +++ b/tests/test_swcio.cpp @@ -1,3 +1,4 @@ +#include <array> #include <iostream> #include <fstream> #include <numeric> @@ -191,7 +192,7 @@ TEST(swc_parser, valid_input) ++nr_records; } } catch (std::exception &e) { - ADD_FAILURE(); + ADD_FAILURE() << "unexpected exception thrown\n"; } } } @@ -216,3 +217,79 @@ TEST(swc_parser, from_allen_db) // verify that the correct number of nodes was read EXPECT_EQ(nodes.size(), 1058u); } + +TEST(swc_parser, input_cleaning) +{ + using namespace nestmc::io; + + { + // Check duplicates + std::stringstream is; + is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n"; + is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n"; + is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n"; + is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n"; + + auto cells = swc_read_cells(is); + EXPECT_EQ(2, cells.size()); + } + + { + // Check multiple trees + std::stringstream is; + is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n"; + is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n"; + is << "3 1 14.566132 34.873772 7.857000 0.717830 -1\n"; + is << "4 1 14.566132 34.873772 7.857000 0.717830 1\n"; + + auto cells = swc_read_cells(is); + EXPECT_EQ(2, cells.size()); + } + + { + // Check unsorted input + std::stringstream is; + is << "3 1 14.566132 34.873772 7.857000 0.717830 1\n"; + is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n"; + is << "4 1 14.566132 34.873772 7.857000 0.717830 1\n"; + is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n"; + + std::array<cell_record::id_type, 4> expected_id_list = {{ 0, 1, 2, 3 }}; + auto cells = swc_read_cells(is); + ASSERT_EQ(4, cells.size()); + + auto expected_id = expected_id_list.cbegin(); + for (const auto &c : cells) { + EXPECT_EQ(*expected_id, c.id()); + ++expected_id; + } + } + + { + // Check holes in numbering + std::stringstream is; + is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n"; + is << "21 1 14.566132 34.873772 7.857000 0.717830 1\n"; + is << "31 1 14.566132 34.873772 7.857000 0.717830 21\n"; + is << "41 1 14.566132 34.873772 7.857000 0.717830 21\n"; + is << "51 1 14.566132 34.873772 7.857000 0.717830 1\n"; + is << "61 1 14.566132 34.873772 7.857000 0.717830 51\n"; + + auto cells = swc_read_cells(is); + std::array<cell_record::id_type, 6> expected_id_list = + {{ 0, 1, 2, 3, 4, 5 }}; + std::array<cell_record::id_type, 6> expected_parent_list = + {{ -1, 0, 1, 1, 0, 4 }}; + ASSERT_EQ(6, cells.size()); + + auto expected_id = expected_id_list.cbegin(); + auto expected_parent = expected_parent_list.cbegin(); + for (const auto &c : cells) { + EXPECT_EQ(*expected_id, c.id()); + EXPECT_EQ(*expected_parent, c.parent()); + ++expected_id; + ++expected_parent; + } + + } +}