From 7284fba8f53ec97615c7d8537d94264f30ca3141 Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Wed, 20 Jan 2016 10:51:34 +0100
Subject: [PATCH] SWC file parser.

A very basic parser of SWC files. The role of the parser is to extract
cell records from an SWC file line-by-line, which will then be used to
construct the neuron cell models.

The parser currently does only a simple linewise processing of the cell
records. This practically means that the validity of the input is only
checked at the cell record level.

More advanced input processing, such as detection of duplicate cell ids,
holes in cell id numbering or multiple cell trees in a file are not
currently treated.
---
 main.cpp  | 164 ++++++++++++++++++++++++++++++++++
 swcio.hpp | 257 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 421 insertions(+)
 create mode 100644 swcio.hpp

diff --git a/main.cpp b/main.cpp
index 1f70eac1..88517ede 100644
--- a/main.cpp
+++ b/main.cpp
@@ -5,6 +5,7 @@
 #include "gtest/gtest.h"
 
 #include "cell_tree.hpp"
+#include "swcio.hpp"
 #include "json/src/json.hpp"
 
 using json = nlohmann::json;
@@ -250,6 +251,169 @@ TEST(cell_tree, json_load) {
     }
 }
 
+// SWC tests
+TEST(cell_record, construction)
+{
+    using namespace neuron::io;
+
+    {
+        // force an invalid type
+        cell_record::kind invalid_type = static_cast<cell_record::kind>(100);
+        EXPECT_THROW(cell_record cell(invalid_type, 7, 1., 1., 1., 1., 5),
+                     std::invalid_argument);
+    }
+
+    {
+        // invalid id
+        EXPECT_THROW(cell_record cell(
+                         cell_record::custom, -3, 1., 1., 1., 1., 5),
+                     std::invalid_argument);
+    }
+
+    {
+        // invalid parent id
+        EXPECT_THROW(cell_record cell(
+                         cell_record::custom, 0, 1., 1., 1., 1., -5),
+                     std::invalid_argument);
+    }
+
+    {
+        // invalid radius
+        EXPECT_THROW(cell_record cell(
+                         cell_record::custom, 0, 1., 1., 1., -1., -1),
+                     std::invalid_argument);
+    }
+
+    {
+        // parent_id > id
+        EXPECT_THROW(cell_record cell(
+                         cell_record::custom, 0, 1., 1., 1., 1., 2),
+                     std::invalid_argument);
+    }
+
+    {
+        // parent_id == id
+        EXPECT_THROW(cell_record cell(
+                         cell_record::custom, 0, 1., 1., 1., 1., 0),
+                     std::invalid_argument);
+    }
+
+    {
+        // check standard construction by value
+        cell_record cell(cell_record::custom, 0, 1., 1., 1., 1., -1);
+        EXPECT_EQ(cell.id(), 0);
+        EXPECT_EQ(cell.type(), cell_record::custom);
+        EXPECT_EQ(cell.x(), 1.);
+        EXPECT_EQ(cell.y(), 1.);
+        EXPECT_EQ(cell.z(), 1.);
+        EXPECT_EQ(cell.radius(), 1.);
+        EXPECT_EQ(cell.diameter(), 2*1.);
+        EXPECT_EQ(cell.parent_id(), -1);
+    }
+
+    {
+        // check copy constructor
+        cell_record proto_cell(cell_record::custom, 0, 1., 1., 1., 1., -1);
+        cell_record cell(proto_cell);
+        EXPECT_EQ(cell.id(), 0);
+        EXPECT_EQ(cell.type(), cell_record::custom);
+        EXPECT_EQ(cell.x(), 1.);
+        EXPECT_EQ(cell.y(), 1.);
+        EXPECT_EQ(cell.z(), 1.);
+        EXPECT_EQ(cell.radius(), 1.);
+        EXPECT_EQ(cell.diameter(), 2*1.);
+        EXPECT_EQ(cell.parent_id(), -1);
+    }
+}
+
+TEST(swc_parser, invalid_input)
+{
+    using namespace neuron::io;
+
+    {
+        // check empty file
+        cell_record cell;
+        std::istringstream is("");
+        EXPECT_THROW(is >> cell, std::runtime_error);
+    }
+
+    {
+        // check comment-only file
+        cell_record cell;
+        std::istringstream is("#comment\n#comment\n");
+        EXPECT_THROW(is >> cell, std::runtime_error);
+    }
+
+    {
+        // check incomplete lines; missing parent
+        std::istringstream is("1 1 14.566132 34.873772 7.857000 0.717830\n");
+        cell_record cell;
+        EXPECT_THROW(is >> cell, std::logic_error);
+    }
+
+    {
+        // check incomplete lines; missing newline
+        // FIXME: we should probably accept such files
+        std::istringstream is("1 1 14.566132 34.873772 7.857000 0.717830 -1");
+        cell_record cell;
+        EXPECT_THROW(is >> cell, std::runtime_error);
+    }
+
+    {
+        // Check long lines
+        std::istringstream is(std::string(256, 'a') + "\n");
+        cell_record cell;
+        EXPECT_THROW(is >> cell, std::runtime_error);
+    }
+
+    {
+        // Check non-parsable values
+        std::istringstream is("1a 1 14.566132 34.873772 7.857000 0.717830 -1\n");
+        cell_record cell;
+        EXPECT_THROW(is >> cell, std::logic_error);
+    }
+
+    {
+        // Check invalid cell value
+        std::istringstream is("1 10 14.566132 34.873772 7.857000 0.717830 -1\n");
+        cell_record cell;
+        EXPECT_THROW(is >> cell, std::invalid_argument);
+    }
+}
+
+
+TEST(swc_parser, valid_input)
+{
+    using namespace neuron::io;
+
+    {
+        // check valid input
+        std::istringstream is("\
+# this is a comment\n\
+# this is a comment\n\
+1 1 14.566132 34.873772 7.857000 0.717830 -1  # end-of-line comment\n\
+");
+        cell_record cell;
+        EXPECT_NO_THROW(is >> cell);
+        EXPECT_EQ(cell.id(), 0);    // zero-based indexing
+        EXPECT_EQ(cell.type(), cell_record::soma);
+        EXPECT_FLOAT_EQ(cell.x(), 14.566132);
+        EXPECT_FLOAT_EQ(cell.y(), 34.873772);
+        EXPECT_FLOAT_EQ(cell.z(),  7.857000);
+        EXPECT_FLOAT_EQ(cell.radius(), 0.717830);
+        EXPECT_FLOAT_EQ(cell.parent_id(), -1);
+    }
+
+    {
+        // Test multiple records
+    }
+
+    {
+        // Test input ending with comments
+    }
+
+}
+
 int main(int argc, char **argv) {
     ::testing::InitGoogleTest(&argc, argv);
     return RUN_ALL_TESTS();
diff --git a/swcio.hpp b/swcio.hpp
new file mode 100644
index 00000000..7e9725b7
--- /dev/null
+++ b/swcio.hpp
@@ -0,0 +1,257 @@
+#pragma once
+
+#include <exception>
+#include <iostream>
+#include <sstream>
+#include <type_traits>
+
+namespace neuron
+{
+
+namespace io
+{
+
+
+class cell_record 
+{
+public:
+
+    // FIXME: enum's are not completely type-safe, since they can accept anything
+    // that can be casted to their underlying type.
+    // 
+    // More on SWC files: http://research.mssm.edu/cnic/swc.html
+    enum kind {
+        undefined = 0,
+        soma,
+        axon,
+        dendrite,
+        apical_dendrite,
+        fork_point,
+        end_point,
+        custom
+    };
+
+    // cell records assume zero-based indexing; root's parent remains -1
+    cell_record(kind type, int id, 
+                float x, float y, float z, float r,
+                int parent_id)
+        : type_(type)
+        , id_(id)
+        , x_(x)
+        , y_(y)
+        , z_(z)
+        , r_(r)
+        , parent_id_(parent_id)
+    {
+        // Check cell type as well; enum's do not offer complete type safety,
+        // since you can cast anything that fits to its underlying type
+        if (type_ < 0 || type_ > custom)
+            throw std::invalid_argument("unknown cell type");
+
+        if (id_ < 0)
+            throw std::invalid_argument("negative ids not allowed");
+        
+        if (parent_id_ < -1)
+            throw std::invalid_argument("parent_id < -1 not allowed");
+
+        if (parent_id_ >= id_)
+            throw std::invalid_argument("parent_id >= id is not allowed");
+
+        if (r_ < 0)
+            throw std::invalid_argument("negative radii are not allowed");
+    }
+    
+    cell_record()
+        : type_(cell_record::undefined)
+        , id_(0)
+        , x_(0)
+        , y_(0)
+        , z_(0)
+        , r_(0)
+        , parent_id_(0)
+    { }
+
+    cell_record(const cell_record &other) = default;
+    cell_record &operator=(const cell_record &other) = default;
+
+    kind type()
+    {
+        return type_;
+    }
+
+    int id()
+    {
+        return id_;
+    }
+
+    int parent_id()
+    {
+        return parent_id_;
+    }
+
+    float x()
+    {
+        return x_;
+    }
+
+    float y()
+    {
+        return y_;
+    }
+
+    float z()
+    {
+        return z_;
+    }
+
+    float radius()
+    {
+        return r_;
+    }
+
+    float diameter()
+    {
+        return 2*r_;
+    }
+
+private:
+    kind type_;         // cell type
+    int id_;            // cell id
+    float x_, y_, z_;   // cell coordinates
+    float r_;           // cell radius
+    int parent_id_;     // cell parent's id
+};
+
+class swc_parser
+{
+public:
+    swc_parser(const std::string &delim,
+               char comment_prefix,
+               std::size_t max_fields,
+               std::size_t max_line)
+        : delim_(delim)
+        , comment_prefix_(comment_prefix)
+        , max_fields_(max_fields)
+        , max_line_(max_line)
+    {
+        init_linebuff();
+    }
+               
+
+    swc_parser()
+        : delim_(" ")
+        , comment_prefix_('#')
+        , max_fields_(7)
+        , max_line_(256)
+    {
+        init_linebuff();
+    }
+
+    ~swc_parser()
+    {
+        delete[] linebuff_;
+    }
+
+    cell_record parse_record(std::istream &is)
+    {
+        while (!is.eof() && !is.bad()) {
+            // consume empty and comment lines first
+            is.getline(linebuff_, max_line_);
+            if (linebuff_[0] && linebuff_[0] != comment_prefix_)
+                break;
+        }
+
+        if (is.eof())
+            throw std::runtime_error("unexpected eof found");
+
+        if (is.bad())
+            throw std::runtime_error("i/o error");
+
+        if (is.fail() && is.gcount() == max_line_ - 1)
+            throw std::runtime_error("too long line detected");
+
+        std::istringstream line(linebuff_);
+        return parse_record(line);
+    }
+
+private:
+    void init_linebuff()
+    {
+        linebuff_ = new char[max_line_];
+    }
+
+    void check_parse_status(const std::istream &is)
+    {
+        if (is.fail())
+            // If we try to read past the eof; fail bit will also be set
+            // FIXME: better throw a custom parse_error exception
+            throw std::logic_error("could not parse value");
+
+        if (is.bad())
+            throw std::runtime_error("i/o error");
+    }
+
+    template<typename T>
+    T parse_value_strict(std::istream &is)
+    {
+        T val;
+        is >> val;
+        check_parse_status(is);
+
+        // everything's fine
+        return val;
+    }
+
+    // Read the record from a string stream; will be treated like a single line
+    cell_record parse_record(std::istringstream &is);
+
+    std::string delim_;
+    char comment_prefix_;
+    std::size_t max_fields_;
+    std::size_t max_line_;
+    char *linebuff_;
+};
+
+
+// specialize parsing for cell types
+template<>
+cell_record::kind swc_parser::parse_value_strict(std::istream &is)
+{
+    int val;
+    is >> val;
+    check_parse_status(is);
+
+    // Let cell_record's constructor check for the type validity
+    return static_cast<cell_record::kind>(val);
+}
+
+cell_record swc_parser::parse_record(std::istringstream &is)
+{
+    auto id = parse_value_strict<int>(is);
+    auto type = parse_value_strict<cell_record::kind>(is);
+    auto x = parse_value_strict<float>(is);
+    auto y = parse_value_strict<float>(is);
+    auto z = parse_value_strict<float>(is);
+    auto r = parse_value_strict<float>(is);
+    auto parent_id = parse_value_strict<int>(is);
+
+    // Convert to zero-based, leaving parent_id as-is if -1
+    if (parent_id != -1)
+        parent_id--;
+
+    return cell_record(type, id-1, x, y, z, r, parent_id);
+}
+
+
+std::istream &operator>>(std::istream &is, cell_record &cell)
+{
+    swc_parser parser;
+    cell = parser.parse_record(is);
+    return is;
+}
+
+std::ostream &operator<<(std::ostream &os, const cell_record &cell);
+
+
+}   // end of neuron::io
+}   // end of neuron
-- 
GitLab