From e8bfa5b725cf8e56f79ba5e3e5110a221cac59c3 Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <karakasis@cscs.ch>
Date: Mon, 1 Feb 2016 14:03:22 +0100
Subject: [PATCH] Cell record cleaning.

Read in cell records and do a basic cleaning:
  1. Remove duplicates.
     Only the first occurance is considered.
  2. Sort the records if not sorted.
  3. Ignore subsequent trees if more than one are specified.
  4. Renumber records if numbering is not contiguous.
---
 src/swcio.hpp        | 119 ++++++++++++++++++++++++++++++++++---------
 tests/test_swcio.cpp |  79 +++++++++++++++++++++++++++-
 2 files changed, 174 insertions(+), 24 deletions(-)

diff --git a/src/swcio.hpp b/src/swcio.hpp
index 1687fa60..63043904 100644
--- a/src/swcio.hpp
+++ b/src/swcio.hpp
@@ -2,8 +2,11 @@
 
 #include <exception>
 #include <iostream>
+#include <map>
 #include <sstream>
 #include <type_traits>
+#include <unordered_set>
+#include <vector>
 
 namespace nestmc
 {
@@ -20,9 +23,10 @@ static bool starts_with(const std::string &str, const std::string &prefix)
 class cell_record 
 {
 public:
+    using id_type = int;
 
-    // FIXME: enum's are not completely type-safe, since they can accept anything
-    // that can be casted to their underlying type.
+    // FIXME: enum's are not completely type-safe, since they can accept
+    // anything that can be casted to their underlying type.
     // 
     // More on SWC files: http://research.mssm.edu/cnic/swc.html
     enum kind {
@@ -48,22 +52,7 @@ public:
         , r_(r)
         , parent_id_(parent_id)
     {
-        // Check cell type as well; enum's do not offer complete type safety,
-        // since you can cast anything that fits to its underlying type
-        if (type_ < 0 || type_ > custom)
-            throw std::invalid_argument("unknown cell type");
-
-        if (id_ < 0)
-            throw std::invalid_argument("negative ids not allowed");
-        
-        if (parent_id_ < -1)
-            throw std::invalid_argument("parent_id < -1 not allowed");
-
-        if (parent_id_ >= id_)
-            throw std::invalid_argument("parent_id >= id is not allowed");
-
-        if (r_ < 0)
-            throw std::invalid_argument("negative radii are not allowed");
+        check_consistency();
     }
     
     cell_record()
@@ -123,12 +112,12 @@ public:
         return type_;
     }
 
-    int id() const
+    id_type id() const
     {
         return id_;
     }
 
-    int parent() const
+    id_type parent() const
     {
         return parent_id_;
     }
@@ -158,12 +147,47 @@ public:
         return 2*r_;
     }
 
+    void renumber(id_type new_id, std::map<id_type, id_type> &idmap)
+    {
+        auto old_id = id_;
+        id_ = new_id;
+
+        // Obtain parent_id from the map
+        auto new_parent_id = idmap.find(parent_id_);
+        if (new_parent_id != idmap.end()) {
+            parent_id_ = new_parent_id->second;
+        }
+
+        check_consistency();
+        idmap.insert(std::make_pair(old_id, new_id));
+    }
+
 private:
+    void check_consistency() const
+    {
+        // Check cell type as well; enum's do not offer complete type safety,
+        // since you can cast anything that fits to its underlying type
+        if (type_ < 0 || type_ > custom)
+            throw std::invalid_argument("unknown cell type");
+
+        if (id_ < 0)
+            throw std::invalid_argument("negative ids not allowed");
+        
+        if (parent_id_ < -1)
+            throw std::invalid_argument("parent_id < -1 not allowed");
+
+        if (parent_id_ >= id_)
+            throw std::invalid_argument("parent_id >= id is not allowed");
+
+        if (r_ < 0)
+            throw std::invalid_argument("negative radii are not allowed");
+    }
+
     kind type_;         // cell type
-    int id_;            // cell id
+    id_type id_;        // cell id
     float x_, y_, z_;   // cell coordinates
     float r_;           // cell radius
-    int parent_id_;     // cell parent's id
+    id_type parent_id_; // cell parent's id
 };
 
 class swc_parse_error : public std::runtime_error
@@ -249,7 +273,7 @@ private:
 template<>
 cell_record::kind swc_parser::parse_value_strict(std::istream &is)
 {
-    int val;
+    cell_record::id_type val;
     check_parse_status(is >> val);
 
     // Let cell_record's constructor check for the type validity
@@ -296,5 +320,54 @@ std::ostream &operator<<(std::ostream &os, const cell_record &cell)
     return os;
 }
 
+//
+// Reads cells from an input stream until an eof is encountered and returns a
+// cleaned sequence of cell records.
+//
+// For more information check here:
+//   https://github.com/eth-cscs/cell_algorithms/wiki/SWC-file-parsing
+//
+std::vector<cell_record> swc_read_cells(std::istream &is)
+{
+    std::vector<cell_record> cells;
+    std::unordered_set<cell_record::id_type> ids;
+
+    std::size_t          num_trees = 0;
+    cell_record::id_type last_id   = -1;
+    bool                 needsort  = false;
+
+    cell_record curr_cell;
+    while ( !(is >> curr_cell).eof()) {
+        if (curr_cell.parent() == -1 && ++num_trees > 1)
+            // only a single tree is allowed
+            break;
+
+        auto inserted = ids.insert(curr_cell.id());
+        if (inserted.second) {
+            // not a duplicate; insert cell
+            cells.push_back(curr_cell);
+            if (!needsort && curr_cell.id() < last_id)
+                needsort = true;
+
+            last_id = curr_cell.id();
+        }
+    }
+
+    if (needsort)
+        std::sort(cells.begin(), cells.end());
+
+    // Renumber cells if necessary
+    std::map<cell_record::id_type, cell_record::id_type> idmap;
+    cell_record::id_type next_id = 0;
+    for (auto &c : cells) {
+        if (c.id() != next_id)
+            c.renumber(next_id, idmap);
+
+        ++next_id;
+    }
+
+    return std::move(cells);
+}
+
 }   // end of nestmc::io
 }   // end of nestmc
diff --git a/tests/test_swcio.cpp b/tests/test_swcio.cpp
index 923f39b4..ba62c90b 100644
--- a/tests/test_swcio.cpp
+++ b/tests/test_swcio.cpp
@@ -1,3 +1,4 @@
+#include <array>
 #include <iostream>
 #include <fstream>
 #include <numeric>
@@ -191,7 +192,7 @@ TEST(swc_parser, valid_input)
                 ++nr_records;
             }
         } catch (std::exception &e) {
-            ADD_FAILURE();
+            ADD_FAILURE() << "unexpected exception thrown\n";
         }
     }
 }
@@ -216,3 +217,79 @@ TEST(swc_parser, from_allen_db)
     // verify that the correct number of nodes was read
     EXPECT_EQ(nodes.size(), 1058u);
 }
+
+TEST(swc_parser, input_cleaning)
+{
+    using namespace nestmc::io;
+    
+    {
+        // Check duplicates
+        std::stringstream is;
+        is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n";
+        is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n";
+        is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n";
+        is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n";
+
+        auto cells = swc_read_cells(is);
+        EXPECT_EQ(2, cells.size());
+    }
+
+    {
+        // Check multiple trees
+        std::stringstream is;
+        is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n";
+        is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n";
+        is << "3 1 14.566132 34.873772 7.857000 0.717830 -1\n";
+        is << "4 1 14.566132 34.873772 7.857000 0.717830 1\n";
+
+        auto cells = swc_read_cells(is);
+        EXPECT_EQ(2, cells.size());
+    }
+
+    {
+        // Check unsorted input
+        std::stringstream is;
+        is << "3 1 14.566132 34.873772 7.857000 0.717830 1\n";
+        is << "2 1 14.566132 34.873772 7.857000 0.717830 1\n";
+        is << "4 1 14.566132 34.873772 7.857000 0.717830 1\n";
+        is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n";
+
+        std::array<cell_record::id_type, 4> expected_id_list = {{ 0, 1, 2, 3 }};
+        auto cells = swc_read_cells(is);
+        ASSERT_EQ(4, cells.size());
+        
+        auto expected_id = expected_id_list.cbegin();
+        for (const auto &c : cells) {
+            EXPECT_EQ(*expected_id, c.id());
+            ++expected_id;
+        }
+    }
+
+    {
+        // Check holes in numbering
+        std::stringstream is;
+        is << "1 1 14.566132 34.873772 7.857000 0.717830 -1\n";
+        is << "21 1 14.566132 34.873772 7.857000 0.717830 1\n";
+        is << "31 1 14.566132 34.873772 7.857000 0.717830 21\n";
+        is << "41 1 14.566132 34.873772 7.857000 0.717830 21\n";
+        is << "51 1 14.566132 34.873772 7.857000 0.717830 1\n";
+        is << "61 1 14.566132 34.873772 7.857000 0.717830 51\n";
+
+        auto cells = swc_read_cells(is);
+        std::array<cell_record::id_type, 6> expected_id_list =
+            {{ 0, 1, 2, 3, 4, 5 }};
+        std::array<cell_record::id_type, 6> expected_parent_list =
+            {{ -1, 0, 1, 1, 0, 4 }};
+        ASSERT_EQ(6, cells.size());
+
+        auto expected_id = expected_id_list.cbegin();
+        auto expected_parent = expected_parent_list.cbegin();
+        for (const auto &c : cells) {
+            EXPECT_EQ(*expected_id, c.id());
+            EXPECT_EQ(*expected_parent, c.parent());
+            ++expected_id;
+            ++expected_parent;
+        }
+        
+    }
+}
-- 
GitLab