From 454bac7171c19b32f6d1ad5bb6e2ae3a9e31b78f Mon Sep 17 00:00:00 2001
From: Ben Cumming <bcumming@cscs.ch>
Date: Wed, 17 Feb 2021 18:37:27 +0100
Subject: [PATCH] Remove transmogrify layer from s-expression parser (#1383)

Remove transmogrify layer from s-expression parser.

The transmogrify layer isn't needed if asc files won't be parsed as s-expressions.
It complicates and slows down parsing of s-expressions.
---
 arbor/s_expr.cpp          |  45 +++++---------
 arbor/s_expr.hpp          | 124 --------------------------------------
 test/unit/test_s_expr.cpp |  24 --------
 3 files changed, 16 insertions(+), 177 deletions(-)

diff --git a/arbor/s_expr.cpp b/arbor/s_expr.cpp
index b16b730b..f916700b 100644
--- a/arbor/s_expr.cpp
+++ b/arbor/s_expr.cpp
@@ -87,14 +87,14 @@ static std::unordered_map<std::string, tok> keyword_to_tok = {
 };
 
 class lexer {
-    transmogrifier line_start_;
-    transmogrifier stream_;
+    const char* line_start_;
+    const char* stream_;
     unsigned line_;
     token token_;
 
 public:
 
-    lexer(transmogrifier begin):
+    lexer(const char* begin):
         line_start_(begin), stream_(begin), line_(0)
     {
         // Prime the first token.
@@ -183,7 +183,7 @@ private:
                             token_ = {loc(), tok::error, "Unexpected end of input."};
                             return;
                         }
-                        char c = stream_.peek(1);
+                        char c = peek(1);
                         if (std::isdigit(c) or c=='.') {
                             token_ = number();
                             return;
@@ -207,6 +207,14 @@ private:
         return;
     }
 
+    // Look ahead n characters in the input stream.
+    // If peek to or past the end of the stream return '\0'.
+    char peek(int n) {
+        const char* c = stream_;
+        while (*c && n--) ++c;
+        return *c;
+    }
+
     // Consumes characters in the stream until end of stream or a new line.
     // Assumes that the current location is the `;` that starts the comment.
     void eat_comment() {
@@ -320,8 +328,8 @@ private:
                 }
             }
             else if (!uses_scientific_notation && (c=='e' || c=='E')) {
-                if ( std::isdigit(stream_.peek(1)) ||
-                    (is_plusminus(stream_.peek(1)) && std::isdigit(stream_.peek(2))))
+                if ( std::isdigit(peek(1)) ||
+                    (is_plusminus(peek(1)) && std::isdigit(peek(2))))
                 {
                     uses_scientific_notation++;
                     str += c;
@@ -475,8 +483,8 @@ s_expr parse(lexer& L) {
 
 }
 
-s_expr parse_s_expr(transmogrifier begin) {
-    lexer l(begin);
+s_expr parse_s_expr(const std::string& line) {
+    lexer l(line.c_str());
     s_expr result = impl::parse(l);
     const bool err = result.is_atom()? result.atom().kind==tok::error: false;
     if (!err) {
@@ -489,25 +497,4 @@ s_expr parse_s_expr(transmogrifier begin) {
     return result;
 }
 
-s_expr parse_s_expr(const std::string& in) {
-    return parse_s_expr(transmogrifier{in});
-}
-
-// For parsing a file with multiple high level s expressions.
-// Returns a vector of the expressions.
-// If an error occured, terminate early and the last expression will be an error.
-std::vector<s_expr> parse_multi_s_expr(transmogrifier begin) {
-    std::vector<s_expr> result;
-    lexer l(begin);
-    bool error = false;
-    while (!error && l.current().kind!=tok::eof) {
-        result.push_back(impl::parse(l));
-        const auto& e = result.back();
-        error = e.is_atom() && e.atom().kind==tok::error;
-    }
-
-    return result;
-}
-
-
 } // namespace arb
diff --git a/arbor/s_expr.hpp b/arbor/s_expr.hpp
index 6e93aee9..c3d13090 100644
--- a/arbor/s_expr.hpp
+++ b/arbor/s_expr.hpp
@@ -13,128 +13,6 @@
 
 namespace arb {
 
-// Forward iterator that can translate a raw stream to valid s_expr input if,
-// perchance, you want to parse a half-hearted attempt at an s-expression
-// (looking at you, the guy who invented the Neurolucida .asc format).
-//
-// I am not fond of .asc files, which would be s-expressions, if they
-// didn't sometimes contain '|' and ',' characters which translate to ")(" and
-// " " respectively (not mentioning the spine syntax).
-//
-// To remedy such situations, the transmogrifier performs user-provided string
-// substitution on a characters in the input.
-//
-// For example, if you are unfortuinate enough to parse an asc file, you might want
-// to try the following:
-//
-// transmogrifier(str, {{',', " "},
-//                      {'|', ")("},
-//                      {'<', "(spine "},
-//                      {'>', ")"}});
-
-class transmogrifier {
-    using sub_map = std::unordered_map<char, std::string>;
-    using iterator_type = std::string::const_iterator;
-    using difference_type = std::string::difference_type;
-    using iterator = transmogrifier;
-
-    iterator_type pos_;
-    iterator_type end_;
-    sub_map sub_map_;
-
-    const char* sub_pos_ = nullptr;
-
-    void set_state() {
-        sub_pos_ = nullptr;
-        char next = *pos_;
-        if (auto it=sub_map_.find(next); it!=sub_map_.end()) {
-            sub_pos_ = it->second.c_str();
-        }
-    }
-
-    public:
-
-    transmogrifier(const std::string& s, sub_map map={}):
-        pos_(s.cbegin()),
-        end_(s.cend()),
-        sub_map_(std::move(map))
-    {
-        if (pos_!=end_) {
-            set_state();
-        }
-    }
-
-    char operator*() const {
-        if (pos_==end_) {
-            return '\0';
-        }
-        if (sub_pos_) {
-            return *sub_pos_;
-        }
-        return *pos_;
-    }
-
-    iterator& operator++() {
-        // If already at the end don't advance.
-        if (pos_==end_) {
-            return *this;
-        }
-
-        // If currently substituting a string, advance by one and
-        // test whether we have reached the end of the string.
-        if (sub_pos_) {
-            ++sub_pos_;
-            if (*sub_pos_=='\0') { // test for end of string
-                sub_pos_ = nullptr;
-            }
-            else {
-                return *this;
-            }
-        }
-
-        ++pos_;
-
-        set_state();
-        return *this;
-    }
-
-    iterator operator++(int) {
-        iterator it = *this;
-
-        ++(*this);
-
-        return it;
-    }
-
-    iterator operator+(unsigned n) {
-        iterator it = *this;
-
-        while (n--) ++it;
-
-        return it;
-    }
-
-    char peek(unsigned i) {
-        return *(*this+i);
-    }
-
-    bool operator==(const transmogrifier& other) const {
-        return pos_==other.pos_ && sub_pos_==other.sub_pos_;
-    }
-
-    bool operator!=(const transmogrifier& other) {
-        return !(*this==other);
-    }
-
-    operator bool() const {
-        return pos_ != end_;
-    }
-
-    difference_type operator-(const transmogrifier& rhs) const {
-        return pos_ - rhs.pos_;
-    }
-};
-
 struct src_location {
     unsigned line = 0;
     unsigned column = 0;
@@ -349,8 +227,6 @@ std::size_t length(const s_expr& l);
 src_location location(const s_expr& l);
 
 s_expr parse_s_expr(const std::string& line);
-s_expr parse_s_expr(transmogrifier begin);
-std::vector<s_expr> parse_multi_s_expr(transmogrifier begin);
 
 } // namespace arb
 
diff --git a/test/unit/test_s_expr.cpp b/test/unit/test_s_expr.cpp
index eb050f55..a70169b5 100644
--- a/test/unit/test_s_expr.cpp
+++ b/test/unit/test_s_expr.cpp
@@ -13,30 +13,6 @@
 using namespace arb;
 using namespace std::string_literals;
 
-TEST(s_expr, transmogrify) {
-    using map_t = std::unordered_map<char, std::string>;
-    auto transform = [](std::string in, map_t map) {
-        auto t = transmogrifier(in, map);
-        std::string s;
-        while (t) s.push_back(*t++);
-        return s;
-    };
-    EXPECT_EQ(transform("(42,24)", {{',', " "}}), "(42 24)");
-    EXPECT_EQ(transform("(42,24)", {{',', "hello"}}), "(42hello24)");
-    EXPECT_EQ(transform("(42,24)", {{',', " "}}), "(42 24)");
-    EXPECT_EQ(transform("(42,,24)", {{',', " "}}), "(42  24)");
-    map_t asc_map = {{',', " "},
-                     {'|', ")("},
-                     {'<', "(spine "},
-                     {'>', ")"}};
-    EXPECT_EQ(transform("(RGB 128,128,128)", asc_map), "(RGB 128 128 128)");
-    EXPECT_EQ(transform("<color blue>", asc_map), "(spine color blue)");
-    EXPECT_EQ(transform("(1 2 3 | 4 5 6)", asc_map), "(1 2 3 )( 4 5 6)");
-    EXPECT_EQ(transform("", asc_map), "");
-    EXPECT_EQ(transform("<>", asc_map), "(spine )");
-    EXPECT_EQ(transform("<32|>", asc_map), "(spine 32)()");
-}
-
 TEST(s_expr, atoms) {
     auto get_atom = [](s_expr e) {
         return e.atom();
-- 
GitLab