diff --git a/arbor/s_expr.cpp b/arbor/s_expr.cpp index b16b730b1cd6b6cfba7dce87a9924a9077f1db39..f916700b74649225d6ef4b30d8a3028d82afc3d5 100644 --- a/arbor/s_expr.cpp +++ b/arbor/s_expr.cpp @@ -87,14 +87,14 @@ static std::unordered_map<std::string, tok> keyword_to_tok = { }; class lexer { - transmogrifier line_start_; - transmogrifier stream_; + const char* line_start_; + const char* stream_; unsigned line_; token token_; public: - lexer(transmogrifier begin): + lexer(const char* begin): line_start_(begin), stream_(begin), line_(0) { // Prime the first token. @@ -183,7 +183,7 @@ private: token_ = {loc(), tok::error, "Unexpected end of input."}; return; } - char c = stream_.peek(1); + char c = peek(1); if (std::isdigit(c) or c=='.') { token_ = number(); return; @@ -207,6 +207,14 @@ private: return; } + // Look ahead n characters in the input stream. + // If peek to or past the end of the stream return '\0'. + char peek(int n) { + const char* c = stream_; + while (*c && n--) ++c; + return *c; + } + // Consumes characters in the stream until end of stream or a new line. // Assumes that the current location is the `;` that starts the comment. void eat_comment() { @@ -320,8 +328,8 @@ private: } } else if (!uses_scientific_notation && (c=='e' || c=='E')) { - if ( std::isdigit(stream_.peek(1)) || - (is_plusminus(stream_.peek(1)) && std::isdigit(stream_.peek(2)))) + if ( std::isdigit(peek(1)) || + (is_plusminus(peek(1)) && std::isdigit(peek(2)))) { uses_scientific_notation++; str += c; @@ -475,8 +483,8 @@ s_expr parse(lexer& L) { } -s_expr parse_s_expr(transmogrifier begin) { - lexer l(begin); +s_expr parse_s_expr(const std::string& line) { + lexer l(line.c_str()); s_expr result = impl::parse(l); const bool err = result.is_atom()? result.atom().kind==tok::error: false; if (!err) { @@ -489,25 +497,4 @@ s_expr parse_s_expr(transmogrifier begin) { return result; } -s_expr parse_s_expr(const std::string& in) { - return parse_s_expr(transmogrifier{in}); -} - -// For parsing a file with multiple high level s expressions. -// Returns a vector of the expressions. -// If an error occured, terminate early and the last expression will be an error. -std::vector<s_expr> parse_multi_s_expr(transmogrifier begin) { - std::vector<s_expr> result; - lexer l(begin); - bool error = false; - while (!error && l.current().kind!=tok::eof) { - result.push_back(impl::parse(l)); - const auto& e = result.back(); - error = e.is_atom() && e.atom().kind==tok::error; - } - - return result; -} - - } // namespace arb diff --git a/arbor/s_expr.hpp b/arbor/s_expr.hpp index 6e93aee9d90d7ad05deabc31ba14e0280eab3a11..c3d1309076b6204a92ac6e25482d909815d510e7 100644 --- a/arbor/s_expr.hpp +++ b/arbor/s_expr.hpp @@ -13,128 +13,6 @@ namespace arb { -// Forward iterator that can translate a raw stream to valid s_expr input if, -// perchance, you want to parse a half-hearted attempt at an s-expression -// (looking at you, the guy who invented the Neurolucida .asc format). -// -// I am not fond of .asc files, which would be s-expressions, if they -// didn't sometimes contain '|' and ',' characters which translate to ")(" and -// " " respectively (not mentioning the spine syntax). -// -// To remedy such situations, the transmogrifier performs user-provided string -// substitution on a characters in the input. -// -// For example, if you are unfortuinate enough to parse an asc file, you might want -// to try the following: -// -// transmogrifier(str, {{',', " "}, -// {'|', ")("}, -// {'<', "(spine "}, -// {'>', ")"}}); - -class transmogrifier { - using sub_map = std::unordered_map<char, std::string>; - using iterator_type = std::string::const_iterator; - using difference_type = std::string::difference_type; - using iterator = transmogrifier; - - iterator_type pos_; - iterator_type end_; - sub_map sub_map_; - - const char* sub_pos_ = nullptr; - - void set_state() { - sub_pos_ = nullptr; - char next = *pos_; - if (auto it=sub_map_.find(next); it!=sub_map_.end()) { - sub_pos_ = it->second.c_str(); - } - } - - public: - - transmogrifier(const std::string& s, sub_map map={}): - pos_(s.cbegin()), - end_(s.cend()), - sub_map_(std::move(map)) - { - if (pos_!=end_) { - set_state(); - } - } - - char operator*() const { - if (pos_==end_) { - return '\0'; - } - if (sub_pos_) { - return *sub_pos_; - } - return *pos_; - } - - iterator& operator++() { - // If already at the end don't advance. - if (pos_==end_) { - return *this; - } - - // If currently substituting a string, advance by one and - // test whether we have reached the end of the string. - if (sub_pos_) { - ++sub_pos_; - if (*sub_pos_=='\0') { // test for end of string - sub_pos_ = nullptr; - } - else { - return *this; - } - } - - ++pos_; - - set_state(); - return *this; - } - - iterator operator++(int) { - iterator it = *this; - - ++(*this); - - return it; - } - - iterator operator+(unsigned n) { - iterator it = *this; - - while (n--) ++it; - - return it; - } - - char peek(unsigned i) { - return *(*this+i); - } - - bool operator==(const transmogrifier& other) const { - return pos_==other.pos_ && sub_pos_==other.sub_pos_; - } - - bool operator!=(const transmogrifier& other) { - return !(*this==other); - } - - operator bool() const { - return pos_ != end_; - } - - difference_type operator-(const transmogrifier& rhs) const { - return pos_ - rhs.pos_; - } -}; - struct src_location { unsigned line = 0; unsigned column = 0; @@ -349,8 +227,6 @@ std::size_t length(const s_expr& l); src_location location(const s_expr& l); s_expr parse_s_expr(const std::string& line); -s_expr parse_s_expr(transmogrifier begin); -std::vector<s_expr> parse_multi_s_expr(transmogrifier begin); } // namespace arb diff --git a/test/unit/test_s_expr.cpp b/test/unit/test_s_expr.cpp index eb050f5526bca34feb0d4d4d86a534837fbea207..a70169b54890798658cfe389238afce1aa00ab2d 100644 --- a/test/unit/test_s_expr.cpp +++ b/test/unit/test_s_expr.cpp @@ -13,30 +13,6 @@ using namespace arb; using namespace std::string_literals; -TEST(s_expr, transmogrify) { - using map_t = std::unordered_map<char, std::string>; - auto transform = [](std::string in, map_t map) { - auto t = transmogrifier(in, map); - std::string s; - while (t) s.push_back(*t++); - return s; - }; - EXPECT_EQ(transform("(42,24)", {{',', " "}}), "(42 24)"); - EXPECT_EQ(transform("(42,24)", {{',', "hello"}}), "(42hello24)"); - EXPECT_EQ(transform("(42,24)", {{',', " "}}), "(42 24)"); - EXPECT_EQ(transform("(42,,24)", {{',', " "}}), "(42 24)"); - map_t asc_map = {{',', " "}, - {'|', ")("}, - {'<', "(spine "}, - {'>', ")"}}; - EXPECT_EQ(transform("(RGB 128,128,128)", asc_map), "(RGB 128 128 128)"); - EXPECT_EQ(transform("<color blue>", asc_map), "(spine color blue)"); - EXPECT_EQ(transform("(1 2 3 | 4 5 6)", asc_map), "(1 2 3 )( 4 5 6)"); - EXPECT_EQ(transform("", asc_map), ""); - EXPECT_EQ(transform("<>", asc_map), "(spine )"); - EXPECT_EQ(transform("<32|>", asc_map), "(spine 32)()"); -} - TEST(s_expr, atoms) { auto get_atom = [](s_expr e) { return e.atom();