From aea81c6d9452ba730a9e969eebd36e9701bc1be1 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis <karakasis@cscs.ch> Date: Thu, 21 Jan 2016 09:37:14 +0100 Subject: [PATCH] Better and cleaner parser semantics. The parser's semantics are now cleaner and the implementation more robust. Cell parsing now follows the semantics of the standard input stream (same as of standard UNIX's read), but instead of reading in bytes or builtin types, we are reading full cell records. This has the following implications: 1. Empty and comment lines are consumed silently. 2. End-of-file will be set on the stream only if a read operation returns zero. Practically, this means, eof bit will be set in the stream at the first attempt to read a cell record after the final cell record in the stream was read. 3. Exceptional conditions in the stream (e.g., eof or bad bits) stop the parsing process without raising an exception and return control to the user which is responsible to deal with them. The only exceptional condition that we handle internally is the presence of very long lines, in which case we raise an std::runtime_error(). Thanks to these cleaner semantics, the parser is now able to deal with files not ending with a new line. The previous version would have missed the last record. Due to (2) the following code for reading from an input stream will yield an additional default constructed cell record at the end: while (!swc_input.eof()) { cell_record cell; swc_input >> cell; // do sth with the cell } According to (2), after the final record is read, eof is not yet set at the stream, which means the while loop will be entered one more time. The attempt to read a record from the stream then will raise the eof bit and no cell will be read in, leaving `cell' at its default constructed state. To avoid such a side-effect, the best way to read records from an input stream is the following: cell_record cell; while( !(swc_input >> cell).eof()) { // do sth with the cell } In this case, after the final record is read, the next attempt to read in a record will set the eof bit and won't enter the loop. --- main.cpp | 117 +++++++++++++++++++++++++++++++----------------------- swcio.hpp | 72 +++++++++++++++++++++++---------- 2 files changed, 118 insertions(+), 71 deletions(-) diff --git a/main.cpp b/main.cpp index 88517ede..ee24b127 100644 --- a/main.cpp +++ b/main.cpp @@ -1,6 +1,8 @@ +#include <array> #include <iostream> #include <fstream> #include <numeric> +#include <vector> #include "gtest/gtest.h" @@ -252,6 +254,18 @@ TEST(cell_tree, json_load) { } // SWC tests +void expect_cell_equals(const neuron::io::cell_record &expected, + const neuron::io::cell_record &actual) +{ + EXPECT_EQ(expected.id(), actual.id()); + EXPECT_EQ(expected.type(), actual.type()); + EXPECT_FLOAT_EQ(expected.x(), actual.x()); + EXPECT_FLOAT_EQ(expected.y(), actual.y()); + EXPECT_FLOAT_EQ(expected.z(), actual.z()); + EXPECT_FLOAT_EQ(expected.radius(), actual.radius()); + EXPECT_EQ(expected.parent(), actual.parent()); +} + TEST(cell_record, construction) { using namespace neuron::io; @@ -308,21 +322,14 @@ TEST(cell_record, construction) EXPECT_EQ(cell.z(), 1.); EXPECT_EQ(cell.radius(), 1.); EXPECT_EQ(cell.diameter(), 2*1.); - EXPECT_EQ(cell.parent_id(), -1); + EXPECT_EQ(cell.parent(), -1); } { // check copy constructor - cell_record proto_cell(cell_record::custom, 0, 1., 1., 1., 1., -1); - cell_record cell(proto_cell); - EXPECT_EQ(cell.id(), 0); - EXPECT_EQ(cell.type(), cell_record::custom); - EXPECT_EQ(cell.x(), 1.); - EXPECT_EQ(cell.y(), 1.); - EXPECT_EQ(cell.z(), 1.); - EXPECT_EQ(cell.radius(), 1.); - EXPECT_EQ(cell.diameter(), 2*1.); - EXPECT_EQ(cell.parent_id(), -1); + cell_record cell_orig(cell_record::custom, 0, 1., 1., 1., 1., -1); + cell_record cell(cell_orig); + expect_cell_equals(cell_orig, cell); } } @@ -330,20 +337,6 @@ TEST(swc_parser, invalid_input) { using namespace neuron::io; - { - // check empty file - cell_record cell; - std::istringstream is(""); - EXPECT_THROW(is >> cell, std::runtime_error); - } - - { - // check comment-only file - cell_record cell; - std::istringstream is("#comment\n#comment\n"); - EXPECT_THROW(is >> cell, std::runtime_error); - } - { // check incomplete lines; missing parent std::istringstream is("1 1 14.566132 34.873772 7.857000 0.717830\n"); @@ -351,14 +344,6 @@ TEST(swc_parser, invalid_input) EXPECT_THROW(is >> cell, std::logic_error); } - { - // check incomplete lines; missing newline - // FIXME: we should probably accept such files - std::istringstream is("1 1 14.566132 34.873772 7.857000 0.717830 -1"); - cell_record cell; - EXPECT_THROW(is >> cell, std::runtime_error); - } - { // Check long lines std::istringstream is(std::string(256, 'a') + "\n"); @@ -387,31 +372,65 @@ TEST(swc_parser, valid_input) using namespace neuron::io; { - // check valid input - std::istringstream is("\ -# this is a comment\n\ -# this is a comment\n\ -1 1 14.566132 34.873772 7.857000 0.717830 -1 # end-of-line comment\n\ -"); - cell_record cell; + // check empty file; no cell may be parsed + cell_record cell, cell_orig; + std::istringstream is(""); EXPECT_NO_THROW(is >> cell); - EXPECT_EQ(cell.id(), 0); // zero-based indexing - EXPECT_EQ(cell.type(), cell_record::soma); - EXPECT_FLOAT_EQ(cell.x(), 14.566132); - EXPECT_FLOAT_EQ(cell.y(), 34.873772); - EXPECT_FLOAT_EQ(cell.z(), 7.857000); - EXPECT_FLOAT_EQ(cell.radius(), 0.717830); - EXPECT_FLOAT_EQ(cell.parent_id(), -1); + expect_cell_equals(cell_orig, cell); } { - // Test multiple records + // check comment-only file not ending with a newline; + // no cell may be parsed + cell_record cell, cell_orig; + std::istringstream is("#comment\n#comment"); + EXPECT_NO_THROW(is >> cell); + expect_cell_equals(cell_orig, cell); } + { - // Test input ending with comments + // check last line case (no newline at the end) + std::istringstream is("1 1 14.566132 34.873772 7.857000 0.717830 -1"); + cell_record cell; + EXPECT_NO_THROW(is >> cell); + EXPECT_EQ(0, cell.id()); // zero-based indexing + EXPECT_EQ(cell_record::soma, cell.type()); + EXPECT_FLOAT_EQ(14.566132, cell.x()); + EXPECT_FLOAT_EQ(34.873772, cell.y()); + EXPECT_FLOAT_EQ( 7.857000, cell.z()); + EXPECT_FLOAT_EQ( 0.717830, cell.radius()); + EXPECT_FLOAT_EQ( -1, cell.parent()); } + { + // check valid input with a series of records + std::vector<cell_record> cells_orig = { + cell_record(cell_record::soma, 0, + 14.566132, 34.873772, 7.857000, 0.717830, -1), + cell_record(cell_record::dendrite, 1, + 14.566132+1, 34.873772+1, 7.857000+1, 0.717830+1, -1) + }; + + std::stringstream swc_input; + swc_input << "# this is a comment\n"; + swc_input << "# this is a comment\n"; + for (auto c : cells_orig) + swc_input << c << "\n"; + + swc_input << "# this is a final comment\n"; + try { + std::size_t nr_records = 0; + cell_record cell; + while ( !(swc_input >> cell).eof()) { + ASSERT_LT(nr_records, cells_orig.size()); + expect_cell_equals(cells_orig[nr_records], cell); + ++nr_records; + } + } catch (std::exception &e) { + ADD_FAILURE(); + } + } } int main(int argc, char **argv) { diff --git a/swcio.hpp b/swcio.hpp index 7e9725b7..9b2756f8 100644 --- a/swcio.hpp +++ b/swcio.hpp @@ -1,5 +1,6 @@ #pragma once +#include <cmath> #include <exception> #include <iostream> #include <sstream> @@ -68,48 +69,62 @@ public: , y_(0) , z_(0) , r_(0) - , parent_id_(0) + , parent_id_(-1) { } cell_record(const cell_record &other) = default; cell_record &operator=(const cell_record &other) = default; - kind type() + friend bool operator==(const cell_record &lhs, + const cell_record &rhs) + { + return lhs.id_ == rhs.id_; + } + + friend bool operator!=(const cell_record &lhs, + const cell_record &rhs) + { + return !(lhs == rhs); + } + + friend std::ostream &operator<<(std::ostream &os, const cell_record &cell); + + kind type() const { return type_; } - int id() + int id() const { return id_; } - int parent_id() + int parent() const { return parent_id_; } - float x() + float x() const { return x_; } - float y() + float y() const { return y_; } - float z() + float z() const { return z_; } - float radius() + float radius() const { return r_; } - float diameter() + float diameter() const { return 2*r_; } @@ -136,7 +151,6 @@ public: { init_linebuff(); } - swc_parser() : delim_(" ") @@ -152,7 +166,7 @@ public: delete[] linebuff_; } - cell_record parse_record(std::istream &is) + std::istream &parse_record(std::istream &is, cell_record &cell) { while (!is.eof() && !is.bad()) { // consume empty and comment lines first @@ -161,17 +175,21 @@ public: break; } - if (is.eof()) - throw std::runtime_error("unexpected eof found"); - if (is.bad()) - throw std::runtime_error("i/o error"); + // let the caller check for such events + return is; + + if (is.eof() && + (linebuff_[0] == 0 || linebuff_[0] == comment_prefix_)) + // last line is either empty or a comment; don't parse anything + return is; if (is.fail() && is.gcount() == max_line_ - 1) throw std::runtime_error("too long line detected"); std::istringstream line(linebuff_); - return parse_record(line); + cell = parse_record(line); + return is; } private: @@ -186,16 +204,15 @@ private: // If we try to read past the eof; fail bit will also be set // FIXME: better throw a custom parse_error exception throw std::logic_error("could not parse value"); - - if (is.bad()) - throw std::runtime_error("i/o error"); } + // FIXME: need not to be a member function template<typename T> T parse_value_strict(std::istream &is) { T val; is >> val; + // std::cout << val << "\n"; check_parse_status(is); // everything's fine @@ -246,12 +263,23 @@ cell_record swc_parser::parse_record(std::istringstream &is) std::istream &operator>>(std::istream &is, cell_record &cell) { swc_parser parser; - cell = parser.parse_record(is); + parser.parse_record(is, cell); return is; } -std::ostream &operator<<(std::ostream &os, const cell_record &cell); - +std::ostream &operator<<(std::ostream &os, const cell_record &cell) +{ + // output in one-based indexing + os << cell.id_+1 << " " + << cell.type_ << " " + << std::setprecision(7) << cell.x_ << " " + << std::setprecision(7) << cell.y_ << " " + << std::setprecision(7) << cell.z_ << " " + << std::setprecision(7) << cell.r_ << " " + << ((cell.parent_id_ == -1) ? cell.parent_id_ : cell.parent_id_+1); + + return os; +} } // end of neuron::io } // end of neuron -- GitLab