diff --git a/CMakeLists.txt b/CMakeLists.txt index ea981fe4e2f67149ce0f349175cc99d9e8346d17..61278f34ff79a0b1dc688835f369df0b144926e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ project (cell_algorithms) enable_language(CXX) # compilation flags -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread -Wall") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -std=c++11 -pthread -Wall") # this generates a .json file with full compilation command for each file set(CMAKE_EXPORT_COMPILE_COMMANDS "YES") diff --git a/src/swcio.cpp b/src/swcio.cpp index fe7abba464c21eb8362971eb864f3c7d66e0adf6..28e189a6f5a19df31c9ee961ffe0fe79be1a7676 100644 --- a/src/swcio.cpp +++ b/src/swcio.cpp @@ -13,7 +13,7 @@ namespace io // // cell_record implementation -// +// void cell_record::renumber(id_type new_id, std::map<id_type, id_type> &idmap) { auto old_id = id_; @@ -79,25 +79,26 @@ std::ostream &operator<<(std::ostream &os, const cell_record &cell) // // Utility functions -// +// bool starts_with(const std::string &str, const std::string &prefix) { return (str.find(prefix) == 0); } -void check_parse_status(const std::istream &is) +void check_parse_status(const std::istream &is, const swc_parser &parser) { - if (is.fail()) + if (is.fail()) { // If we try to read past the eof; fail bit will also be set - throw swc_parse_error("could not parse value"); + throw swc_parse_error("could not parse value", parser.lineno()); + } } template<typename T> -T parse_value_strict(std::istream &is) +T parse_value_strict(std::istream &is, const swc_parser &parser) { T val; - check_parse_status(is >> val); + check_parse_status(is >> val, parser); // everything's fine return val; @@ -105,10 +106,10 @@ T parse_value_strict(std::istream &is) // specialize parsing for cell types template<> -cell_record::kind parse_value_strict(std::istream &is) +cell_record::kind parse_value_strict(std::istream &is, const swc_parser &parser) { cell_record::id_type val; - check_parse_status(is >> val); + check_parse_status(is >> val, parser); // Let cell_record's constructor check for the type validity return static_cast<cell_record::kind>(val); @@ -116,13 +117,14 @@ cell_record::kind parse_value_strict(std::istream &is) // // swc_parser implementation -// +// std::istream &swc_parser::parse_record(std::istream &is, cell_record &cell) { while (!is.eof() && !is.bad()) { // consume empty and comment lines first std::getline(is, linebuff_); + ++lineno_; if (!linebuff_.empty() && !starts_with(linebuff_, comment_prefix_)) break; } @@ -139,23 +141,29 @@ std::istream &swc_parser::parse_record(std::istream &is, cell_record &cell) } if (is.fail()) { - throw swc_parse_error("too long line detected"); + throw swc_parse_error("too long line detected", lineno_); } std::istringstream line(linebuff_); - cell = parse_record(line); + try { + cell = parse_record(line); + } catch (std::invalid_argument &e) { + // Rethrow as a parse error + throw swc_parse_error(e.what(), lineno_); + } + return is; } cell_record swc_parser::parse_record(std::istringstream &is) { - auto id = parse_value_strict<int>(is); - auto type = parse_value_strict<cell_record::kind>(is); - auto x = parse_value_strict<float>(is); - auto y = parse_value_strict<float>(is); - auto z = parse_value_strict<float>(is); - auto r = parse_value_strict<float>(is); - auto parent_id = parse_value_strict<cell_record::id_type>(is); + auto id = parse_value_strict<int>(is, *this); + auto type = parse_value_strict<cell_record::kind>(is, *this); + auto x = parse_value_strict<float>(is, *this); + auto y = parse_value_strict<float>(is, *this); + auto z = parse_value_strict<float>(is, *this); + auto r = parse_value_strict<float>(is, *this); + auto parent_id = parse_value_strict<cell_record::id_type>(is, *this); // Convert to zero-based, leaving parent_id as-is if -1 if (parent_id != -1) { @@ -166,9 +174,8 @@ cell_record swc_parser::parse_record(std::istringstream &is) } -std::vector<cell_record> swc_read_cells(std::istream &is) +cell_record_range_clean::cell_record_range_clean(std::istream &is) { - std::vector<cell_record> cells; std::unordered_set<cell_record::id_type> ids; std::size_t num_trees = 0; @@ -176,40 +183,38 @@ std::vector<cell_record> swc_read_cells(std::istream &is) bool needsort = false; cell_record curr_cell; - while ( !(is >> curr_cell).eof()) { - if (curr_cell.parent() == -1 && ++num_trees > 1) { + for (auto c : swc_get_records<swc_io_raw>(is)) { + if (c.parent() == -1 && ++num_trees > 1) { // only a single tree is allowed break; } - auto inserted = ids.insert(curr_cell.id()); + auto inserted = ids.insert(c.id()); if (inserted.second) { // not a duplicate; insert cell - cells.push_back(curr_cell); - if (!needsort && curr_cell.id() < last_id) { + cells_.push_back(c); + if (!needsort && c.id() < last_id) { needsort = true; } - last_id = curr_cell.id(); + last_id = c.id(); } } if (needsort) { - std::sort(cells.begin(), cells.end()); + std::sort(cells_.begin(), cells_.end()); } // Renumber cells if necessary std::map<cell_record::id_type, cell_record::id_type> idmap; cell_record::id_type next_id = 0; - for (auto &c : cells) { + for (auto &c : cells_) { if (c.id() != next_id) { c.renumber(next_id, idmap); } ++next_id; } - - return std::move(cells); } } // end of nestmc::io diff --git a/src/swcio.hpp b/src/swcio.hpp index ba52720a7f1852c88cb60ae51b0f1aee9c10f9b9..e267c184b4e462b2015fb8c38866e2133d60b8b1 100644 --- a/src/swcio.hpp +++ b/src/swcio.hpp @@ -2,6 +2,7 @@ #include <exception> #include <iostream> +#include <iterator> #include <string> #include <vector> @@ -12,14 +13,14 @@ namespace io { -class cell_record +class cell_record { public: using id_type = int; // FIXME: enum's are not completely type-safe, since they can accept // anything that can be casted to their underlying type. - // + // // More on SWC files: http://research.mssm.edu/cnic/swc.html enum kind { undefined = 0, @@ -33,7 +34,7 @@ public: }; // cell records assume zero-based indexing; root's parent remains -1 - cell_record(kind type, int id, + cell_record(kind type, int id, float x, float y, float z, float r, int parent_id) : type_(type) @@ -46,7 +47,7 @@ public: { check_consistency(); } - + cell_record() : type_(cell_record::undefined) , id_(0) @@ -60,6 +61,16 @@ public: cell_record(const cell_record &other) = default; cell_record &operator=(const cell_record &other) = default; + bool strict_equals(const cell_record &other) const + { + return id_ == other.id_ && + x_ == other.x_ && + y_ == other.y_ && + z_ == other.z_ && + r_ == other.r_ && + parent_id_ == other.parent_id_; + } + // Equality and comparison operators friend bool operator==(const cell_record &lhs, const cell_record &rhs) @@ -151,16 +162,27 @@ private: id_type parent_id_; // cell parent's id }; + class swc_parse_error : public std::runtime_error { public: - explicit swc_parse_error(const char *msg) + explicit swc_parse_error(const char *msg, std::size_t lineno) : std::runtime_error(msg) + , lineno_(lineno) { } - explicit swc_parse_error(const std::string &msg) + explicit swc_parse_error(const std::string &msg, std::size_t lineno) : std::runtime_error(msg) + , lineno_(lineno) { } + + std::size_t lineno() const + { + return lineno_; + } + +private: + std::size_t lineno_; }; class swc_parser @@ -170,13 +192,20 @@ public: std::string comment_prefix) : delim_(delim) , comment_prefix_(comment_prefix) + , lineno_(0) { } swc_parser() : delim_(" ") , comment_prefix_("#") + , lineno_(0) { } + std::size_t lineno() const + { + return lineno_; + } + std::istream &parse_record(std::istream &is, cell_record &cell); private: @@ -186,11 +215,136 @@ private: std::string delim_; std::string comment_prefix_; std::string linebuff_; + std::size_t lineno_; }; std::istream &operator>>(std::istream &is, cell_record &cell); +class cell_record_stream_iterator : + public std::iterator<std::forward_iterator_tag, cell_record> +{ +public: + struct eof_tag { }; + + cell_record_stream_iterator(std::istream &is) + : is_(is) + , eof_(false) + { + is_.clear(); + is_.seekg(std::ios_base::beg); + read_next_record(); + } + + cell_record_stream_iterator(std::istream &is, eof_tag) + : is_(is) + , eof_(true) + { } + + cell_record_stream_iterator(const cell_record_stream_iterator &other) + : is_(other.is_) + , parser_(other.parser_) + , curr_record_(other.curr_record_) + , eof_(other.eof_) + { } + + cell_record_stream_iterator &operator++() + { + if (eof_) { + throw std::out_of_range("attempt to read past eof"); + } + + read_next_record(); + return *this; + } + + cell_record_stream_iterator operator++(int) + { + cell_record_stream_iterator ret(*this); + operator++(); + return ret; + } + + value_type operator*() + { + if (eof_) { + throw std::out_of_range("attempt to read past eof"); + } + + return curr_record_; + } + + bool operator==(const cell_record_stream_iterator &other) const + { + if (eof_ && other.eof_) { + return true; + } else { + return curr_record_.strict_equals(other.curr_record_); + } + } + + bool operator!=(const cell_record_stream_iterator &other) + { + return !(*this == other); + } + + friend std::ostream &operator<<(std::ostream &os, + const cell_record_stream_iterator &iter) + { + os << "{ is_.tellg(): " << iter.is_.tellg() << ", " + << "curr_record_: " << iter.curr_record_ << ", " + << "eof_: " << iter.eof_ << "}"; + + return os; + } + +private: + void read_next_record() + { + parser_.parse_record(is_, curr_record_); + if (is_.eof()) { + eof_ = true; + } + } + + std::istream &is_; + swc_parser parser_; + cell_record curr_record_; + + // indicator of eof; we need a way to define an end() iterator without + // seeking to the end of file + bool eof_; +}; + + +class cell_record_range_raw +{ +public: + using value_type = cell_record; + using reference = value_type &; + using const_referene = const value_type &; + using iterator = cell_record_stream_iterator; + using const_iterator = const cell_record_stream_iterator; + + cell_record_range_raw(std::istream &is) + : is_(is) + { } + + iterator begin() + { + return cell_record_stream_iterator(is_); + } + + iterator end() + { + iterator::eof_tag eof; + return cell_record_stream_iterator(is_, eof); + } + +private: + std::istream &is_; +}; + // // Reads cells from an input stream until an eof is encountered and returns a // cleaned sequence of cell records. @@ -198,7 +352,52 @@ std::istream &operator>>(std::istream &is, cell_record &cell); // For more information check here: // https://github.com/eth-cscs/cell_algorithms/wiki/SWC-file-parsing // -std::vector<cell_record> swc_read_cells(std::istream &is); + +class cell_record_range_clean +{ +public: + using value_type = cell_record; + using reference = value_type &; + using const_referene = const value_type &; + using iterator = std::vector<cell_record>::iterator; + using const_iterator = std::vector<cell_record>::const_iterator; + + cell_record_range_clean(std::istream &is); + + iterator begin() + { + return cells_.begin(); + } + + iterator end() + { + return cells_.end(); + } + + std::size_t size() + { + return cells_.size(); + } + +private: + std::vector<cell_record> cells_; +}; + +struct swc_io_raw +{ + using cell_range_type = cell_record_range_raw; +}; + +struct swc_io_clean +{ + using cell_range_type = cell_record_range_clean; +}; + +template<typename T = swc_io_clean> + typename T::cell_range_type swc_get_records(std::istream &is) +{ + return typename T::cell_range_type(is); +} } // end of nestmc::io } // end of nestmc diff --git a/tests/test_swcio.cpp b/tests/test_swcio.cpp index ca810e943b50ca0061e0a48e2cdb9edd70306439..7ecd5fc013d98e66428d7a85cb658360e4ca6f2d 100644 --- a/tests/test_swcio.cpp +++ b/tests/test_swcio.cpp @@ -1,4 +1,5 @@ #include <array> +#include <exception> #include <iostream> #include <fstream> #include <numeric> @@ -128,7 +129,7 @@ TEST(swc_parser, invalid_input) std::istringstream is( "1 10 14.566132 34.873772 7.857000 0.717830 -1\n"); cell_record cell; - EXPECT_THROW(is >> cell, std::invalid_argument); + EXPECT_THROW(is >> cell, swc_parse_error); } } @@ -185,16 +186,12 @@ TEST(swc_parser, valid_input) swc_input << c << "\n"; swc_input << "# this is a final comment\n"; - try { - std::size_t nr_records = 0; - cell_record cell; - while ( !(swc_input >> cell).eof()) { - ASSERT_LT(nr_records, cells_orig.size()); - expect_cell_equals(cells_orig[nr_records], cell); - ++nr_records; - } - } catch (std::exception &e) { - ADD_FAILURE() << "unexpected exception thrown\n"; + + std::size_t nr_records = 0; + for (auto cell : swc_get_records<swc_io_raw>(swc_input)) { + ASSERT_LT(nr_records, cells_orig.size()); + expect_cell_equals(cells_orig[nr_records], cell); + ++nr_records; } } } @@ -212,8 +209,7 @@ TEST(swc_parser, from_allen_db) // load the cell records into a std::vector std::vector<io::cell_record> nodes; - io::cell_record node; - while( !(fid >> node).eof()) { + for (auto node : io::swc_get_records<io::swc_io_raw>(fid)) { nodes.push_back(std::move(node)); } @@ -233,8 +229,7 @@ TEST(swc_parser, input_cleaning) is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n"; is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n"; - auto cells = swc_read_cells(is); - EXPECT_EQ(2u, cells.size()); + EXPECT_EQ(2u, swc_get_records(is).size()); } { @@ -245,7 +240,7 @@ TEST(swc_parser, input_cleaning) is << "3 1 14.566132 34.873772 7.857000 0.717830 -1\n"; is << "4 1 14.566132 34.873772 7.857000 0.717830 1\n"; - auto cells = swc_read_cells(is); + auto cells = swc_get_records(is); EXPECT_EQ(2u, cells.size()); } @@ -258,14 +253,15 @@ TEST(swc_parser, input_cleaning) is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n"; std::array<cell_record::id_type, 4> expected_id_list = {{ 0, 1, 2, 3 }}; - auto cells = swc_read_cells(is); - ASSERT_EQ(4u, cells.size()); auto expected_id = expected_id_list.cbegin(); - for (const auto &c : cells) { + for (auto c : swc_get_records(is)) { EXPECT_EQ(*expected_id, c.id()); ++expected_id; } + + // Check that we have read through the whole input + EXPECT_EQ(expected_id_list.end(), expected_id); } { @@ -278,21 +274,101 @@ TEST(swc_parser, input_cleaning) is << "51 1 14.566132 34.873772 7.857000 0.717830 1\n"; is << "61 1 14.566132 34.873772 7.857000 0.717830 51\n"; - auto cells = swc_read_cells(is); std::array<cell_record::id_type, 6> expected_id_list = {{ 0, 1, 2, 3, 4, 5 }}; std::array<cell_record::id_type, 6> expected_parent_list = {{ -1, 0, 1, 1, 0, 4 }}; - ASSERT_EQ(6u, cells.size()); auto expected_id = expected_id_list.cbegin(); auto expected_parent = expected_parent_list.cbegin(); - for (const auto &c : cells) { + for (auto c : swc_get_records(is)) { EXPECT_EQ(*expected_id, c.id()); EXPECT_EQ(*expected_parent, c.parent()); ++expected_id; ++expected_parent; } + // Check that we have read through the whole input + EXPECT_EQ(expected_id_list.end(), expected_id); + EXPECT_EQ(expected_parent_list.end(), expected_parent); + } +} + +TEST(cell_record_ranges, raw) +{ + using namespace nestmc::io; + + { + // Check valid usage + std::stringstream is; + is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n"; + is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n"; + is << "3 1 14.566132 34.873772 7.857000 0.717830 1\n"; + is << "4 1 14.566132 34.873772 7.857000 0.717830 1\n"; + + std::vector<cell_record> cells; + for (auto c : swc_get_records<swc_io_raw>(is)) { + cells.push_back(c); + } + + EXPECT_EQ(4u, cells.size()); + + bool entered = false; + auto citer = cells.begin(); + for (auto c : swc_get_records<swc_io_raw>(is)) { + expect_cell_equals(c, *citer++); + entered = true; + } + + EXPECT_TRUE(entered); + } + + { + // Check out of bounds reads + std::stringstream is; + is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n"; + + auto ibegin = swc_get_records<swc_io_raw>(is).begin(); + + EXPECT_NO_THROW(++ibegin); + EXPECT_THROW(*ibegin, std::out_of_range); + + } + + { + // Check iterator increments + std::stringstream is; + is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n"; + + auto iter = swc_get_records<swc_io_raw>(is).begin(); + auto iend = swc_get_records<swc_io_raw>(is).end(); + + cell_record c; + EXPECT_NO_THROW(c = *iter++); + EXPECT_EQ(-1, c.parent()); + EXPECT_EQ(iend, iter); + + // Try to read past eof + EXPECT_THROW(*iter, std::out_of_range); + } + + { + // Check parse error context + std::stringstream is; + is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n"; + is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n"; + is << "3 10 14.566132 34.873772 7.857000 0.717830 1\n"; + is << "4 1 14.566132 34.873772 7.857000 0.717830 1\n"; + + std::vector<cell_record> cells; + try { + for (auto c : swc_get_records<swc_io_raw>(is)) { + cells.push_back(c); + } + + ADD_FAILURE() << "expected an exception\n"; + } catch (const swc_parse_error &e) { + EXPECT_EQ(3u, e.lineno()); + } } } diff --git a/vector b/vector index a8dfadd460262ebbc1bc22b159efe9e33ad1d359..9c86d0a84efed0dd739888503d275378df67fe71 160000 --- a/vector +++ b/vector @@ -1 +1 @@ -Subproject commit a8dfadd460262ebbc1bc22b159efe9e33ad1d359 +Subproject commit 9c86d0a84efed0dd739888503d275378df67fe71