From 3c453b640ade419432d761480fc77c7bf90de4f8 Mon Sep 17 00:00:00 2001 From: Sam Yates <halfflat@gmail.com> Date: Fri, 25 Sep 2020 15:54:20 +0200 Subject: [PATCH] Add NeuroML2 morphology support in new library. (#1148) * Add CMake infrastructure for new `arbornml` library comprising Arbor's NeuroML2 (C++) support. * Implement NeuroML2 parsing and interpretation, using libxml2 for XML parsing. * Add associated documentation, unit tests. * Replace `arb::util::optional` with `std::optional` in stitch morphology interface. * Add optional prefix to `arb::label_dict` import. * Update CI to test arbornml, with associated workarounds for OS X targets. * Remove glob functionality from `sup`, as it is no longer needed (it was used for lmorpho) and it triggers yet another OS X issue. Fixes #1088. --- .travis.yml | 48 +- CMakeLists.txt | 46 +- arbor/include/arbor/morph/label_dict.hpp | 2 +- arbor/include/arbor/morph/stitch.hpp | 4 +- arbor/morph/label_dict.cpp | 6 +- arbor/morph/stitch.cpp | 2 +- arbornml/CMakeLists.txt | 32 ++ arbornml/arbornml.cpp | 96 ++++ arbornml/include/arbornml/arbornml.hpp | 74 +++ arbornml/include/arbornml/nmlexcept.hpp | 67 +++ arbornml/include/arbornml/with_xml.hpp | 24 + arbornml/nmlexcept.cpp | 57 ++ arbornml/parse_morphology.cpp | 557 ++++++++++++++++++++ arbornml/parse_morphology.hpp | 10 + arbornml/with_xml.cpp | 22 + arbornml/xmlwrap.cpp | 128 +++++ arbornml/xmlwrap.hpp | 317 +++++++++++ cmake/CompilerOptions.cmake | 9 + cmake/arbor-config.cmake.in | 23 +- doc/cpp_neuroml.rst | 154 ++++++ doc/index.rst | 1 + doc/install.rst | 40 +- scripts/travis/build.sh | 7 +- sup/CMakeLists.txt | 8 - sup/glob_basic.cpp | 247 --------- sup/glob_basic_wrap.cpp | 13 - sup/glob_posix.cpp | 49 -- sup/include/sup/glob.hpp | 101 ---- test/unit/CMakeLists.txt | 12 +- test/unit/test_glob_basic.cpp | 225 -------- test/unit/test_nml_morphology.cpp | 641 +++++++++++++++++++++++ 31 files changed, 2330 insertions(+), 692 deletions(-) create mode 100644 arbornml/CMakeLists.txt create mode 100644 arbornml/arbornml.cpp create mode 100644 arbornml/include/arbornml/arbornml.hpp create mode 100644 arbornml/include/arbornml/nmlexcept.hpp create mode 100644 arbornml/include/arbornml/with_xml.hpp create mode 100644 arbornml/nmlexcept.cpp create mode 100644 arbornml/parse_morphology.cpp create mode 100644 arbornml/parse_morphology.hpp create mode 100644 arbornml/with_xml.cpp create mode 100644 arbornml/xmlwrap.cpp create mode 100644 arbornml/xmlwrap.hpp create mode 100644 doc/cpp_neuroml.rst delete mode 100644 sup/glob_basic.cpp delete mode 100644 sup/glob_basic_wrap.cpp delete mode 100644 sup/glob_posix.cpp delete mode 100644 sup/include/sup/glob.hpp delete mode 100644 test/unit/test_glob_basic.cpp create mode 100644 test/unit/test_nml_morphology.cpp diff --git a/.travis.yml b/.travis.yml index 87a734c5..66ddee2a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ ######## Testing minimal compiler requirements ######## # GCC 8.1.0 -# Clang 7.0 +# Clang 8.0 # Apple Clang 1100.0.33.16 ####################################################### @@ -16,45 +16,45 @@ matrix: ## test gcc8 - single node/rank with threading backend ## - name: "osx, gcc, serial, py" os: osx - osx_image: xcode11.3 + osx_image: xcode12 python: 3.6 env: - MATRIX_EVAL="brew install gcc@8 && brew link --force --overwrite gcc@8 && brew install cmake && CC=gcc-8 && CXX=g++-8" - BUILD_NAME=cthread-osx-gcc-py - - WITH_DISTRIBUTED=serial WITH_PYTHON=true PY=3 ARCH=native + - WITH_DISTRIBUTED=serial WITH_PYTHON=true PY=3 WITH_NEUROML=ON ARCH=native compiler: gcc-8 ## test gcc8 - mpi with threading backend ## - name: "osx, gcc, mpi, py" os: osx - osx_image: xcode11.3 + osx_image: xcode12 python: 3.6 env: - MATRIX_EVAL="brew install gcc@8 && brew link --force --overwrite gcc@8 && brew install cmake && CC=gcc-8 && CXX=g++-8" - BUILD_NAME=mpi-osx-gcc-py - - WITH_DISTRIBUTED=mpi WITH_PYTHON=true PY=3 ARCH=native + - WITH_DISTRIBUTED=mpi WITH_PYTHON=true PY=3 WITH_NEUROML=ON ARCH=native compiler: gcc-8 ## test clang9 - single node/rank with threading backend ## - name: "osx, apple clang, serial, py" os: osx - osx_image: xcode11.3 + osx_image: xcode12 python: 3.6 env: - MATRIX_EVAL="CC=clang && CXX=clang++" - BUILD_NAME=cthread-osx-clang-py - - WITH_DISTRIBUTED=serial WITH_PYTHON=true PY=3 ARCH=native + - WITH_DISTRIBUTED=serial WITH_PYTHON=true PY=3 WITH_NEUROML=ON ARCH=native compiler: clang ## test clang9 - mpi with threading backend ## - name: "osx, apple clang, mpi, py" os: osx - osx_image: xcode11.3 + osx_image: xcode12 python: 3.6 env: - MATRIX_EVAL="CC=clang && CXX=clang++" - BUILD_NAME=mpi-osx-clang - - WITH_DISTRIBUTED=mpi WITH_PYTHON=true PY=3 ARCH=native + - WITH_DISTRIBUTED=mpi WITH_PYTHON=true PY=3 WITH_NEUROML=ON ARCH=native compiler: clang ######################### LINUX ######################### @@ -69,10 +69,11 @@ matrix: - g++-8 - openmpi-bin - libopenmpi-dev + - libxml2 env: - MATRIX_EVAL="CC=gcc-8 && CXX=g++-8" - BUILD_NAME=cthread-linux-gcc-py - - WITH_DISTRIBUTED=serial WITH_PYTHON=true PY=3 ARCH=haswell + - WITH_DISTRIBUTED=serial WITH_PYTHON=true PY=3 WITH_NEUROML=ON ARCH=haswell compiler: gcc-8 ## test gcc8 - mpi with threading backend ## @@ -86,13 +87,15 @@ matrix: - g++-8 - openmpi-bin - libopenmpi-dev + - libxml2 env: - MATRIX_EVAL="CC=gcc-8 && CXX=g++-8" - BUILD_NAME=mpi-linux-gcc-py - - WITH_DISTRIBUTED=mpi WITH_PYTHON=true PY=3 ARCH=haswell + - WITH_DISTRIBUTED=mpi WITH_PYTHON=true PY=3 WITH_NEUROML=ON ARCH=haswell compiler: gcc-8 -## test clang7 - single node/rank with threading backend ## +## test clang8 - single node/rank with threading backend ## +# Note: need g++8 for C++17 stdlib. - name: "linux, clang, serial, py" os: linux dist: bionic @@ -100,15 +103,19 @@ matrix: apt: sources: packages: + - g++-8 + - clang-8 - openmpi-bin - libopenmpi-dev + - libxml2 env: - - MATRIX_EVAL="CC=clang && CXX=clang++" + - MATRIX_EVAL="CC=clang-8 && CXX=clang++-8" - BUILD_NAME=cthread-linux-clang-py - - WITH_DISTRIBUTED=serial WITH_PYTHON=true PY=3 ARCH=native - compiler: clang-7.0 + - WITH_DISTRIBUTED=serial WITH_PYTHON=true PY=3 WITH_NEUROML=ON ARCH=native + compiler: clang-8.0 -## test clang7 - mpi with threading backend ## +## test clang8 - mpi with threading backend ## +# Note: need g++8 for C++17 stdlib. - name: "linux, clang, mpi, py" os: linux dist: bionic @@ -116,13 +123,16 @@ matrix: apt: sources: packages: + - g++-8 + - clang-8 - openmpi-bin - libopenmpi-dev + - libxml2 env: - - MATRIX_EVAL="CC=clang && CXX=clang++" + - MATRIX_EVAL="CC=clang-8 && CXX=clang++-8" - BUILD_NAME=mpi-linux-clang-py - - WITH_DISTRIBUTED=mpi WITH_PYTHON=true PY=3 ARCH=native - compiler: clang-7.0 + - WITH_DISTRIBUTED=mpi WITH_PYTHON=true PY=3 WITH_NEUROML=ON ARCH=native + compiler: clang-8.0 before_install: - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export HOMEBREW_NO_AUTO_UPDATE=1; brew cask uninstall --force oclint; fi diff --git a/CMakeLists.txt b/CMakeLists.txt index 25b81d6d..9bedee3d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,11 +28,6 @@ option(ARB_VECTORIZE "use explicit SIMD code in generated mechanisms" OFF) set(ARB_MODCC "" CACHE STRING "path to external modcc NMODL compiler") -# For sup::glop, just wrap POSIX glob(3)? -# Turn off for platforms without glob(3) in libc, e.g. Android Bionic. - -option(ARB_USE_POSIX_GLOB "wrap POSIX glob(3) for glob functionality" ON) - # Use libunwind to generate stack traces on errors? option(ARB_UNWIND "Use libunwind for stack trace printing if available" OFF) @@ -52,6 +47,12 @@ option(ARB_WITH_PROFILING "use built-in profiling" OFF) option(ARB_WITH_ASSERTIONS "enable arb_assert() assertions in code" OFF) +#---------------------------------------------------------- +# NeuroML support library: +#---------------------------------------------------------- + +option(ARB_WITH_NEUROML "build NeuroML support library" OFF) + #---------------------------------------------------------- # Python front end for Arbor: #---------------------------------------------------------- @@ -153,8 +154,8 @@ set(CMAKE_CXX_EXTENSIONS OFF) # in the same CMakeLists.txt in which the target is defined. # Interface library `arbor-config-defs` collects configure-time defines -# for arbor and arborenv, of the form ARB_HAVE_XXX. These defines should -# _not_ be used in any installed public headers. +# for arbor, arborenv and arbornml, of the form ARB_HAVE_XXX. These +# defines should _not_ be used in any installed public headers. add_library(arbor-config-defs INTERFACE) install(TARGETS arbor-config-defs EXPORT arbor-targets) @@ -173,6 +174,13 @@ add_library(arborenv-private-deps INTERFACE) target_link_libraries(arborenv-private-deps INTERFACE arbor-config-defs) install(TARGETS arborenv-private-deps EXPORT arbor-targets) +# Interface library `arbornml-private-deps` collects dependencies, options etc. +# for the arbornml library. + +add_library(arbornml-private-deps INTERFACE) +target_link_libraries(arbornml-private-deps INTERFACE arbor-config-defs) +install(TARGETS arbornml-private-deps EXPORT arbor-targets) + # Interface library `arbor-public-deps` collects requirements for the # users of the arbor library (e.g. mpi) that will become part # of arbor's PUBLIC interface. @@ -180,6 +188,13 @@ install(TARGETS arborenv-private-deps EXPORT arbor-targets) add_library(arbor-public-deps INTERFACE) install(TARGETS arbor-public-deps EXPORT arbor-targets) +# Interface library `arbornml-public-deps` collects requirements for the +# users of the arbornml library (e.g. xml libs) that will become part +# of arbornml's PUBLIC interface. + +add_library(arbornml-public-deps INTERFACE) +install(TARGETS arbornml-public-deps EXPORT arbornml-targets) + # External libraries in `ext` sub-directory: json, tinyopt and randon123. # Creates interface libraries `ext-json`, `ext-tinyopt` and `ext-random123` @@ -191,7 +206,8 @@ add_subdirectory(ext) set(arbor_export_dependencies) # Keep track of which 'components' of arbor are included (this is -# currently just captures MPI support) +# currently just 'MPI' support and 'neuroml' for NeuroML support in +# libarbornml.) set(arbor_supported_components) @@ -388,6 +404,11 @@ add_subdirectory(arbor) # arborenv, arborenv-public-headers: add_subdirectory(arborenv) +# arbornml, arbornml-public-headers: +if(ARB_WITH_NEUROML) + add_subdirectory(arbornml) +endif() + # unit, unit-mpi, unit-local, unit-modcc add_subdirectory(test) @@ -398,9 +419,9 @@ add_subdirectory(example) add_subdirectory(doc) # python interface: -if (ARB_WITH_PYTHON) +if(ARB_WITH_PYTHON) add_subdirectory(python) -endif () +endif() #---------------------------------------------------------- # Generate CMake config/version files for install. @@ -437,6 +458,7 @@ endif() set(arbor_override_import_lang) set(arbor_add_import_libs) set(arborenv_add_import_libs) +set(arbornml_add_import_libs) if(ARB_WITH_GPU) set(arbor_override_import_lang CXX) @@ -444,8 +466,10 @@ if(ARB_WITH_GPU) set(arborenv_add_import_libs ${CUDA_LIBRARIES}) endif() +# (We remove old generated one so that the generation happens every time we run cmake.) +file(REMOVE "${CMAKE_CURRENT_BINARY_DIR}/arbor-config.cmake") configure_file( - cmake/arbor-config.cmake.in + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/arbor-config.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/arbor-config.cmake" @ONLY) diff --git a/arbor/include/arbor/morph/label_dict.hpp b/arbor/include/arbor/morph/label_dict.hpp index f15630d6..750b7871 100644 --- a/arbor/include/arbor/morph/label_dict.hpp +++ b/arbor/include/arbor/morph/label_dict.hpp @@ -16,7 +16,7 @@ class label_dict { reg_map regions_; public: - void import(const label_dict& other); + void import(const label_dict& other, const std::string& prefix = ""); void set(const std::string& name, locset ls); void set(const std::string& name, region reg); diff --git a/arbor/include/arbor/morph/stitch.hpp b/arbor/include/arbor/morph/stitch.hpp index 1d0287f9..b90e56df 100644 --- a/arbor/include/arbor/morph/stitch.hpp +++ b/arbor/include/arbor/morph/stitch.hpp @@ -1,5 +1,6 @@ #pragma once +#include <optional> #include <memory> #include <string> #include <vector> @@ -8,7 +9,6 @@ #include <arbor/morph/primitives.hpp> #include <arbor/morph/label_dict.hpp> #include <arbor/morph/region.hpp> -#include <arbor/util/optional.hpp> namespace arb { @@ -25,7 +25,7 @@ namespace arb { struct mstitch { std::string id; - util::optional<mpoint> prox; + std::optional<mpoint> prox; mpoint dist; int tag; diff --git a/arbor/morph/label_dict.cpp b/arbor/morph/label_dict.cpp index b62fa563..0d15b567 100644 --- a/arbor/morph/label_dict.cpp +++ b/arbor/morph/label_dict.cpp @@ -25,12 +25,12 @@ void label_dict::set(const std::string& name, arb::region reg) { regions_[name] = std::move(reg); } -void label_dict::import(const label_dict& other) { +void label_dict::import(const label_dict& other, const std::string& prefix) { for (const auto& entry: other.locsets()) { - set(entry.first, entry.second); + set(prefix+entry.first, entry.second); } for (const auto& entry: other.regions()) { - set(entry.first, entry.second); + set(prefix+entry.first, entry.second); } } diff --git a/arbor/morph/stitch.cpp b/arbor/morph/stitch.cpp index fafd8460..341ee714 100644 --- a/arbor/morph/stitch.cpp +++ b/arbor/morph/stitch.cpp @@ -1,4 +1,5 @@ #include <memory> +#include <optional> #include <unordered_map> #include <vector> @@ -6,7 +7,6 @@ #include <arbor/morph/morphexcept.hpp> #include <arbor/morph/locset.hpp> #include <arbor/morph/region.hpp> -#include <arbor/util/optional.hpp> #include "util/ordered_forest.hpp" #include "util/maputil.hpp" diff --git a/arbornml/CMakeLists.txt b/arbornml/CMakeLists.txt new file mode 100644 index 00000000..a4d94498 --- /dev/null +++ b/arbornml/CMakeLists.txt @@ -0,0 +1,32 @@ +set(arbornml-sources + arbornml.cpp + nmlexcept.cpp + parse_morphology.cpp + with_xml.cpp + xmlwrap.cpp +) + +find_package(LibXml2 REQUIRED) + +add_library(arbornml ${arbornml-sources}) + +add_library(arbornml-public-headers INTERFACE) +target_include_directories(arbornml-public-headers INTERFACE + $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> + $<INSTALL_INTERFACE:include> +) + +target_link_libraries(arbornml PUBLIC arbor arbornml-public-headers LibXml2::LibXml2) +target_link_libraries(arbornml PRIVATE arbor-config-defs arbornml-private-deps) + +install(DIRECTORY include/arbornml + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + FILES_MATCHING PATTERN "*.hpp") + +install(TARGETS arbornml-public-headers EXPORT arbor-targets) +install(TARGETS arbornml EXPORT arbor-targets ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +list(APPEND arbor_export_dependencies "LibXml2") +set(arbor_export_dependencies "${arbor_export_dependencies}" PARENT_SCOPE) +list(APPEND arbor_supported_components "neuroml") +set(arbor_supported_components "${arbor_supported_components}" PARENT_SCOPE) diff --git a/arbornml/arbornml.cpp b/arbornml/arbornml.cpp new file mode 100644 index 00000000..15c5f555 --- /dev/null +++ b/arbornml/arbornml.cpp @@ -0,0 +1,96 @@ +#include <memory> +#include <optional> +#include <string> +#include <vector> + +#include <arbornml/arbornml.hpp> +#include <arbornml/nmlexcept.hpp> + +#include "parse_morphology.hpp" +#include "xmlwrap.hpp" + +using std::optional; +using std::nullopt; + +namespace arbnml { + +struct neuroml_impl { + xml_doc doc; + + neuroml_impl() {} + + explicit neuroml_impl(std::string text) { + xml_error_scope err; + doc = xml_doc(text); + } + + xml_xpathctx make_context() const { + if (!doc) throw no_document{}; + + auto ctx = xpath_context(doc); + ctx.register_ns("nml", "http://www.neuroml.org/schema/neuroml2"); + return ctx; + } +}; + +neuroml::neuroml(): impl_(new neuroml_impl) {} +neuroml::neuroml(std::string nml_document): impl_(new neuroml_impl{nml_document}) {} + +neuroml::neuroml(neuroml&&) = default; +neuroml& neuroml::operator=(neuroml&&) = default; + +neuroml::~neuroml() = default; + +std::vector<std::string> neuroml::cell_ids() const { + xml_error_scope err; + std::vector<std::string> result; + + auto ctx = impl_->make_context(); + auto matches = ctx.query("//nml:neuroml/nml:cell/@id"); + + result.reserve(matches.size()); + for (auto node: matches) { + result.push_back(std::string(node.content())); + } + + return result; +} + +std::vector<std::string> neuroml::morphology_ids() const { + xml_error_scope err; + std::vector<std::string> result; + + auto ctx = impl_->make_context(); + auto matches = ctx.query("//nml:neuroml/nml:morphology/@id"); + + result.reserve(matches.size()); + for (auto node: matches) { + result.push_back(std::string(node.content())); + } + + return result; +} + +optional<morphology_data> neuroml::morphology(const std::string& morph_id) const { + xml_error_scope err; + auto ctx = impl_->make_context(); + auto matches = ctx.query("//nml:neuroml/nml:morphology[@id="+xpath_escape(morph_id)+"]"); + + return matches.empty()? nullopt: optional(parse_morphology_element(ctx, matches[0])); +} + +optional<morphology_data> neuroml::cell_morphology(const std::string& cell_id) const { + xml_error_scope err; + auto ctx = impl_->make_context(); + auto matches = ctx.query( + "( //nml:neuroml/nml:morphology[@id=string((//nml:neuroml/nml:cell[@id="+xpath_escape(cell_id)+"]/@morphology)[1])] | " + " //nml:neuroml/nml:cell[@id="+xpath_escape(cell_id)+"]/nml:morphology )[1]"); + + if (matches.empty()) return nullopt; + + morphology_data M = parse_morphology_element(ctx, matches[0]); + M.cell_id = cell_id; + return M; +} + +} // namespace arbnml diff --git a/arbornml/include/arbornml/arbornml.hpp b/arbornml/include/arbornml/arbornml.hpp new file mode 100644 index 00000000..b925aaed --- /dev/null +++ b/arbornml/include/arbornml/arbornml.hpp @@ -0,0 +1,74 @@ +#pragma once + +#include <optional> +#include <memory> +#include <string> +#include <unordered_map> +#include <vector> + +#include <arbor/morph/label_dict.hpp> +#include <arbor/morph/morphology.hpp> + +namespace arbnml { + +// Collect and parse morphology elements from XML. +// No validation is performed against the NeuroML v2 schema. + +// Note: segment id values are interpreted as unsigned long long values; +// parsing larger segment ids will throw an exception. + +struct morphology_data { + // Cell id, or empty if morphology was taken from a top-level <morphology> element. + std::optional<std::string> cell_id; + + // Morphology id. + std::string id; + + // Morphology constructed from a signle NeuroML <morphology> element. + arb::morphology morphology; + + // One region expression for each segment id. + arb::label_dict segments; + + // One region expression for each name applied to one or more segments. + arb::label_dict named_segments; + + // One region expression for each segmentGroup id. + arb::label_dict groups; + + // Map from segmentGroup ids to their corresponding segment ids. + std::unordered_map<std::string, std::vector<unsigned long long>> group_segments; +}; + +// Represent NeuroML data determined by provided string. + +struct neuroml_impl; + +struct neuroml { + neuroml(); + explicit neuroml(std::string nml_document); + + neuroml(neuroml&&); + neuroml(const neuroml&) = delete; + + neuroml& operator=(neuroml&&); + neuroml& operator=(const neuroml&) = delete; + + // Query top-level cells and (standalone) morphologies. + + std::vector<std::string> cell_ids() const; + std::vector<std::string> morphology_ids() const; + + // Parse and retrieve top-level morphology or morphology associated with a cell. + // Return nullopt if not found. + + std::optional<morphology_data> morphology(const std::string& morph_id) const; + std::optional<morphology_data> cell_morphology(const std::string& cell_id) const; + + ~neuroml(); + +private: + std::unique_ptr<neuroml_impl> impl_; +}; + +} // namespace arbnml diff --git a/arbornml/include/arbornml/nmlexcept.hpp b/arbornml/include/arbornml/nmlexcept.hpp new file mode 100644 index 00000000..bba15a80 --- /dev/null +++ b/arbornml/include/arbornml/nmlexcept.hpp @@ -0,0 +1,67 @@ +#pragma once + +#include <cstddef> +#include <stdexcept> +#include <string> + +namespace arbnml { + +// Common base-class for arbnml run-time errors. + +struct neuroml_exception: std::runtime_error { + neuroml_exception(const std::string& what_arg): + std::runtime_error(what_arg) + {} +}; + +// Generic XML error (as reported by libxml2). + +struct xml_error: neuroml_exception { + xml_error(const std::string& xml_error_msg, unsigned line = 0); + std::string xml_error_msg; + unsigned line; +}; + +// Can't parse NeuroML if we don't have a document. + +struct no_document: neuroml_exception { + no_document(); +}; + +// Generic error parsing NeuroML data. + +struct parse_error: neuroml_exception { + parse_error(const std::string& error_msg, unsigned line = 0); + std::string error_msg; + unsigned line; +}; + +// NeuroML morphology error: improper segment data, e.g. bad id specification, +// segment parent does not exist, fractionAlong is out of bounds, missing +// required <proximal> data. + +struct bad_segment: neuroml_exception { + bad_segment(unsigned long long segment_id, unsigned line = 0); + unsigned long long segment_id; + unsigned line; +}; + +// NeuroML morphology error: improper segmentGroup data, e.g. malformed +// element data, missing referenced segments or groups, etc. + +struct bad_segment_group: neuroml_exception { + bad_segment_group(const std::string& group_id, unsigned line = 0); + std::string group_id; + unsigned line; +}; + +// A segment or segmentGroup ultimately refers to itself via `parent` +// or `include` elements respectively. + +struct cyclic_dependency: neuroml_exception { + cyclic_dependency(const std::string& id, unsigned line = 0); + std::string id; + unsigned line; +}; + +} // namespace arbnml diff --git a/arbornml/include/arbornml/with_xml.hpp b/arbornml/include/arbornml/with_xml.hpp new file mode 100644 index 00000000..c308451f --- /dev/null +++ b/arbornml/include/arbornml/with_xml.hpp @@ -0,0 +1,24 @@ +#pragma once + +// Wrap initialization and cleanup of libxml2 library. +// +// Use of `with_xml` is only necessary if arbornml is being +// used in a multithreaded context and the client code is +// not managing libxml2 initialization and cleanup. + +namespace arbnml { + +struct with_xml { + with_xml(); + ~with_xml(); + + with_xml(with_xml&&); + with_xml(const with_xml&) = delete; + + with_xml& operator=(const with_xml&) = delete; + with_xml& operator=(with_xml&&) = delete; + + bool run_cleanup_; +}; + +} // namespace arbnml diff --git a/arbornml/nmlexcept.cpp b/arbornml/nmlexcept.cpp new file mode 100644 index 00000000..7ad1c8ca --- /dev/null +++ b/arbornml/nmlexcept.cpp @@ -0,0 +1,57 @@ +#include <string> + +#include <arbornml/nmlexcept.hpp> + +namespace arbnml { + +static std::string fmt_error(const char* prefix, const std::string& err, unsigned line) { + return prefix + (line==0? err: "line " + std::to_string(line) + ": " + err); +} + +xml_error::xml_error(const std::string& xml_error_msg, unsigned line): + neuroml_exception(fmt_error("xml error: ", xml_error_msg, line)), + xml_error_msg(xml_error_msg), + line(line) +{} + +no_document::no_document(): + neuroml_exception("no NeuroML document to parse") +{} + +parse_error::parse_error(const std::string& error_msg, unsigned line): + neuroml_exception(fmt_error("parse error: ", error_msg, line)), + error_msg(error_msg), + line(line) +{} + +bad_segment::bad_segment(unsigned long long segment_id, unsigned line): + neuroml_exception( + fmt_error( + "bad morphology segment: ", + "segment "+(segment_id+1==0? "unknown": "\""+std::to_string(segment_id)+"\""), + line)), + segment_id(segment_id), + line(line) +{} + +bad_segment_group::bad_segment_group(const std::string& group_id, unsigned line): + neuroml_exception( + fmt_error( + "bad morphology segmentGroup: ", + "segmentGroup id "+(group_id.empty()? "unknown": "\""+group_id+"\""), + line)), + group_id(group_id), + line(line) +{} + +cyclic_dependency::cyclic_dependency(const std::string& id, unsigned line): + neuroml_exception( + fmt_error( + "cyclic dependency: ", + "element id \""+id+"\"", + line)), + id(id), + line(line) +{} + +} // namespace arbnml diff --git a/arbornml/parse_morphology.cpp b/arbornml/parse_morphology.cpp new file mode 100644 index 00000000..3f3cbf55 --- /dev/null +++ b/arbornml/parse_morphology.cpp @@ -0,0 +1,557 @@ +#include <algorithm> +#include <optional> +#include <numeric> +#include <stack> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include <arbor/assert.hpp> +#include <arbor/morph/locset.hpp> +#include <arbor/morph/primitives.hpp> +#include <arbor/morph/region.hpp> +#include <arbor/morph/stitch.hpp> +#include <arbor/util/expected.hpp> + +#include <arbornml/arbornml.hpp> +#include <arbornml/nmlexcept.hpp> + +#include "parse_morphology.hpp" +#include "xmlwrap.hpp" + +using std::optional; +using arb::region; +using arb::util::expected; +using arb::util::unexpected; + +namespace arbnml { + +// Box is a container of size 0 or 1. + +template <typename X> +struct box { + optional<X> x; + + X* begin() { return x? &(*x): nullptr; } + X* end() { return x? &(*x)+1: nullptr; } + + const X* begin() const { return x? &(*x): nullptr; } + const X* end() const { return x? &(*x)+1: nullptr; } + + box() = default; + box(const X& x): x(x) {} + box(X&& x): x(std::move(x)) {} + + std::size_t size() const { return !empty(); } + bool empty() const { return !x; } +}; + +// Merge two sorted sequences, discarding duplicate values. + +template <typename Input1, typename Input2, typename Output> +void unique_merge(Input1 i1, Input1 end1, Input2 i2, Input2 end2, Output out) { + while (i1!=end1 || i2!=end2) { + if (i1!=end1) { + while (i2!=end2 && *i2==*i1) ++i2; + } + + if (i2==end2 || (i1!=end1 && !(*i2<*i1))) { + const auto& v = *i1++; + *out++ = v; + while (i1!=end1 && *i1==v) ++i1; + } + else { + const auto& v = *i2++; + *out++ = v; + while (i2!=end2 && *i2==v) ++i2; + } + } +} + +// Return vector of depths; sorting object collection by depth will +// give a topological order. +// +// The functional Inset takes an index into the collection of objects, +// and returns a range or collection of indices to that object's precedessors. +// +// If a cycle is encountered, return detected_cycle{i} where i +// is the index of an item in the cycle. + +struct cycle_detected { std::size_t index; }; + +template <typename Inset> +expected<std::vector<std::size_t>, cycle_detected> topological_sort(std::size_t n, Inset inset) { + using std::begin; + using std::end; + + constexpr std::size_t unknown = -1; + constexpr std::size_t cycle = -2; + + std::vector<std::size_t> depth(n, unknown); + std::stack<std::size_t> stack; + + for (std::size_t i = 0; i<n; ++i) { + if (depth[i]!=unknown) continue; + + depth[i] = cycle; + stack.push(i); + + while (!stack.empty()) { + std::size_t j = stack.top(); + std::size_t d = 0; + bool resolve = true; + + auto in = inset(j); + for (auto k = begin(in); k!=end(in); ++k) { + switch (depth[*k]) { + case cycle: + return unexpected(cycle_detected{*k}); + case unknown: + depth[*k] = cycle; + stack.push(*k); + resolve = false; + break; + default: + d = std::max(d, 1+depth[*k]); + } + } + + if (resolve) { + depth[j] = d; + stack.pop(); + } + } + } + + return depth; +} + +// Internal representations of NeuroML segment and segmentGroup data: + +struct neuroml_segment { + // Morhpological data: + non_negative id; + std::string name; + optional<arb::mpoint> proximal; + arb::mpoint distal; + optional<non_negative> parent_id; + double along = 1; + + // Data for error reporting: + unsigned line = 0; + + // Topological depth: + std::size_t tdepth = 0; +}; + +struct neuroml_segment_group_subtree { + // Interval determined by segment ids. + // Represents both `<path>` and `<subTree>` elements. + optional<non_negative> from, to; + + // Data for error reporting: + unsigned line = 0; +}; + +struct neuroml_segment_group_info { + std::string id; + std::vector<non_negative> segments; + std::vector<std::string> includes; + std::vector<neuroml_segment_group_subtree> subtrees; + + // Data for error reporting: + unsigned line = 0; +}; + +// Processing of parsed segment/segmentGroup data: + +struct neuroml_segment_tree { + // Segments in topological order: + auto begin() const { return segments_.begin(); } + auto end() const { return segments_.end(); } + + // How many segments? + std::size_t size() const { return segments_.size(); } + bool empty() const { return segments_.empty(); } + + // Segment by id: + const neuroml_segment operator[](non_negative id) const { + return segments_.at(index_.at(id)); + } + + // Children of segment with id. + const std::vector<non_negative>& children(non_negative id) const { + static std::vector<non_negative> none{}; + auto iter = children_.find(id); + return iter!=children_.end()? iter->second: none; + } + + // Does segment id exist? + bool contains(non_negative id) const { + return index_.count(id); + } + + // Construct from vector of segments. Will happily throw if something doesn't add up. + explicit neuroml_segment_tree(std::vector<neuroml_segment> segs): + segments_(std::move(segs)) + { + if (segments_.empty()) return; + const std::size_t n_seg = segments_.size(); + + // Build index, throw on duplicate id. + for (std::size_t i = 0; i<n_seg; ++i) { + if (!index_.insert({segments_[i].id, i}).second) { + throw bad_segment(segments_[i].id, segments_[i].line); + } + } + + // Check parent relationship is sound. + for (const auto& s: segments_) { + if (s.parent_id && !index_.count(*s.parent_id)) { + throw bad_segment(s.id, s.line); // No such parent id. + } + } + + // Perform topological sort. + auto inset = [this](std::size_t i) { + auto& s = segments_[i]; + return s.parent_id? box{index_.at(*s.parent_id)}: box<std::size_t>{}; + }; + if (auto depths = topological_sort(n_seg, inset)) { + const auto& d = depths.value(); + for (std::size_t i = 0; i<n_seg; ++i) { + segments_[i].tdepth = d[i]; + } + } + else { + const auto& seg = segments_[depths.error().index]; + throw cyclic_dependency(nl_to_string(seg.id), seg.line); + } + std::sort(segments_.begin(), segments_.end(), [](auto& a, auto& b) { return a.tdepth<b.tdepth; }); + + // Check for multiple roots: + if (n_seg>1 && segments_[1].tdepth==0) throw bad_segment(segments_[1].id, segments_[1].line); + + // Update index: + for (std::size_t i = 0; i<n_seg; ++i) { + index_.at(segments_[i].id) = i; + } + + // Build child tree: + for (const auto& seg: segments_) { + if (seg.parent_id) { + children_[*seg.parent_id].push_back(seg.id); + } + } + } + +private: + std::vector<neuroml_segment> segments_; + std::unordered_map<non_negative, std::size_t> index_; + std::unordered_map<non_negative, std::vector<non_negative>> children_; +}; + +std::unordered_map<std::string, std::vector<non_negative>> evaluate_segment_groups( + std::vector<neuroml_segment_group_info> groups, + const neuroml_segment_tree& segtree) +{ + const std::size_t n_group = groups.size(); + + // Expand subTree/path specifications: + for (auto& g: groups) { + unsigned line = g.line; + try { + for (auto& subtree: g.subtrees) { + line = subtree.line; + + if (!subtree.from && !subtree.to) { + // Matches all segments: + for (auto& seg: segtree) { + g.segments.push_back(seg.id); + } + } + else if (!subtree.to) { + // Add 'from' and all of its descendents. + std::stack<non_negative> pending; + pending.push(*subtree.from); + + while (!pending.empty()) { + auto top = pending.top(); + pending.pop(); + + g.segments.push_back(top); + for (auto id: segtree.children(top)) { + pending.push(id); + } + } + } + else { + // Note: if from is not an ancestor of to, the path is regarded as empty. + std::vector<non_negative> path; + auto opt_id = subtree.to; + for (; opt_id && opt_id!=subtree.from; opt_id = segtree[*opt_id].parent_id) { + path.push_back(*opt_id); + } + if (opt_id==subtree.from) { + if (subtree.from) g.segments.push_back(*subtree.from); + g.segments.insert(g.segments.end(), path.begin(), path.end()); + } + } + } + } + catch (...) { + throw bad_segment_group(g.id, line); + } + } + + // Build index, throw on duplicate id. + std::unordered_map<std::string, std::size_t> index; + for (std::size_t i = 0; i<n_group; ++i) { + if (!index.insert({groups[i].id, i}).second) { + throw bad_segment_group(groups[i].id, groups[i].line); + } + } + + // Build group index -> indices of included groups map. + std::vector<std::vector<std::size_t>> index_to_included_indices(n_group); + for (std::size_t i = 0; i<n_group; ++i) { + const auto& includes = groups[i].includes; + index_to_included_indices[i].reserve(includes.size()); + for (auto& id: includes) { + if (!index.count(id)) throw bad_segment_group(groups[i].id, groups[i].line); + index_to_included_indices[i].push_back(index.at(id)); + } + } + + // Get topological order wrt include relationship. + std::vector<std::size_t> topo_order(n_group); + if (auto depths = topological_sort(n_group, [&](auto i) { return index_to_included_indices[i]; })) { + const auto& d = depths.value(); + std::iota(topo_order.begin(), topo_order.end(), std::size_t(0)); + std::sort(topo_order.begin(), topo_order.end(), [&d](auto& a, auto& b) { return d[a]<d[b]; }); + } + else { + const auto& group = groups[depths.error().index]; + throw cyclic_dependency(group.id, group.line); + } + + // Accumulate included group segments, following topological order. + for (auto i: topo_order) { + auto& g = groups[i]; + std::sort(g.segments.begin(), g.segments.end()); + + if (index_to_included_indices[i].empty()) { + g.segments.erase(std::unique(g.segments.begin(), g.segments.end()), g.segments.end()); + } + else { + std::vector<non_negative> merged; + for (auto j: index_to_included_indices[i]) { + merged.clear(); + unique_merge(g.segments.begin(), g.segments.end(), + groups[j].segments.begin(), groups[j].segments.end(), std::back_inserter(merged)); + std::swap(g.segments, merged); + } + } + } + + // Move final segment lists into map. + std::unordered_map<std::string, std::vector<non_negative>> group_seg_map; + for (auto& g: groups) { + group_seg_map[g.id] = std::move(g.segments); + } + + return group_seg_map; +} + +arb::stitched_morphology construct_morphology(const neuroml_segment_tree& segtree) { + arb::stitch_builder builder; + if (segtree.empty()) return arb::stitched_morphology{builder}; + + // Construct result from topologically sorted segments. + + for (auto& s: segtree) { + arb::mstitch stitch(nl_to_string(s.id), s.distal); + stitch.prox = s.proximal; + + if (s.parent_id) { + builder.add(stitch, nl_to_string(s.parent_id.value()), s.along); + } + else { + builder.add(stitch); + } + } + + return arb::stitched_morphology(std::move(builder)); +} + +morphology_data parse_morphology_element(xml_xpathctx ctx, xml_node morph) { + morphology_data M; + M.id = morph.prop<std::string>("id", std::string{}); + + std::vector<neuroml_segment> segments; + + // TODO: precompile xpath queries for nml:distal, nml:proximal, nml:parent. + const char* q_parent = "./nml:parent"; + const char* q_proximal = "./nml:proximal"; + const char* q_distal = "./nml:distal"; + + for (auto n: ctx.query(morph, "./nml:segment")) { + neuroml_segment seg; + int line = n.line(); // for error context! + + try { + seg.id = -1; + seg.id = n.prop<non_negative>("id"); + std::string name = n.prop<std::string>("name", std::string{}); + + auto result = ctx.query(n, q_parent); + if (!result.empty()) { + line = result[0].line(); + seg.parent_id = result[0].prop<non_negative>("segment"); + seg.along = result[0].prop<double>("fractionAlong", 1.0); + } + + result = ctx.query(n, q_proximal); + if (!result.empty()) { + line = result[0].line(); + double x = result[0].prop<double>("x"); + double y = result[0].prop<double>("y"); + double z = result[0].prop<double>("z"); + double diameter = result[0].prop<double>("diameter"); + if (diameter<0) throw bad_segment(seg.id, n.line()); + + seg.proximal = arb::mpoint{x, y, z, diameter/2}; + } + + if (!seg.parent_id && !seg.proximal) throw bad_segment(seg.id, n.line()); + + result = ctx.query(n, q_distal); + if (!result.empty()) { + line = result[0].line(); + double x = result[0].prop<double>("x"); + double y = result[0].prop<double>("y"); + double z = result[0].prop<double>("z"); + double diameter = result[0].prop<double>("diameter"); + if (diameter<0) throw bad_segment(seg.id, n.line()); + + seg.distal = arb::mpoint{x, y, z, diameter/2}; + } + else { + throw bad_segment(seg.id, n.line()); + } + } + catch (parse_error& e) { + throw bad_segment(seg.id, line); + } + + seg.line = n.line(); + segments.push_back(std::move(seg)); + } + + if (segments.empty()) return M; + + // Compute tree now to save further parsing if something goes wrong. + neuroml_segment_tree segtree(std::move(segments)); + + // TODO: precompile xpath queries for following: + const char* q_member = "./nml:member"; + const char* q_include = "./nml:include"; + const char* q_path = "./nml:path"; + const char* q_from = "./nml:from"; + const char* q_to = "./nml:to"; + const char* q_subtree = "./nml:subTree"; + + std::vector<neuroml_segment_group_info> groups; + + for (auto n: ctx.query(morph, "./nml:segmentGroup")) { + neuroml_segment_group_info group; + int line = n.line(); // for error context! + + try { + group.id = n.prop<std::string>("id"); + for (auto elem: ctx.query(n, q_member)) { + line = elem.line(); + auto seg_id = elem.prop<non_negative>("segment"); + if (!segtree.contains(seg_id)) throw bad_segment_group(group.id, line); + group.segments.push_back(elem.prop<non_negative>("segment")); + } + for (auto elem: ctx.query(n, q_include)) { + line = elem.line(); + group.includes.push_back(elem.prop<std::string>("segmentGroup")); + } + + // Treat `<path>` and `<subTree>` identically: + auto parse_subtree_elem = [&](auto& elem) { + line = elem.line(); + auto froms = ctx.query(elem, q_from); + auto tos = ctx.query(elem, q_to); + + neuroml_segment_group_subtree sub; + sub.line = line; + if (!froms.empty()) { + line = froms[0].line(); + sub.from = froms[0].template prop<non_negative>("segment"); + } + if (!tos.empty()) { + line = tos[0].line(); + sub.to = tos[0].template prop<non_negative>("segment"); + } + + return sub; + }; + + for (auto elem: ctx.query(n, q_path)) { + group.subtrees.push_back(parse_subtree_elem(elem)); + } + for (auto elem: ctx.query(n, q_subtree)) { + group.subtrees.push_back(parse_subtree_elem(elem)); + } + } + catch (parse_error& e) { + throw bad_segment_group(group.id, line); + } + + group.line = n.line(); + groups.push_back(std::move(group)); + } + + M.group_segments = evaluate_segment_groups(std::move(groups), segtree); + + // Build morphology and label dictionaries: + + arb::stitched_morphology stitched = construct_morphology(segtree); + M.morphology = stitched.morphology(); + M.segments = stitched.labels(); + + std::unordered_multimap<std::string, non_negative> name_to_ids; + std::unordered_set<std::string> names; + + for (auto& s: segments) { + if (!s.name.empty()) { + name_to_ids.insert({s.name, s.id}); + names.insert(s.name); + } + } + + for (const auto& name: names) { + arb::region r; + auto ids = name_to_ids.equal_range(name); + for (auto i = ids.first; i!=ids.second; ++i) { + r = join(std::move(r), M.segments.regions().at(nl_to_string(i->second))); + } + M.named_segments.set(name, std::move(r)); + } + + for (const auto& [group_id, segment_ids]: M.group_segments) { + arb::region r; + for (auto id: segment_ids) { + r = join(std::move(r), M.segments.regions().at(nl_to_string(id))); + } + M.groups.set(group_id, std::move(r)); + } + + return M; +} + +} // namespace arbnml diff --git a/arbornml/parse_morphology.hpp b/arbornml/parse_morphology.hpp new file mode 100644 index 00000000..305dcb6b --- /dev/null +++ b/arbornml/parse_morphology.hpp @@ -0,0 +1,10 @@ +#pragma once + +#include <arbornml/arbornml.hpp> +#include "xmlwrap.hpp" + +namespace arbnml { + +morphology_data parse_morphology_element(xml_xpathctx ctx, xml_node morph); + +} // namespace arbnml diff --git a/arbornml/with_xml.cpp b/arbornml/with_xml.cpp new file mode 100644 index 00000000..c7ae41d0 --- /dev/null +++ b/arbornml/with_xml.cpp @@ -0,0 +1,22 @@ +#include <arbornml/with_xml.hpp> + +#include <libxml/parser.h> + +namespace arbnml { + +with_xml::with_xml(): run_cleanup_(true) { + // Initialize before any multithreaded access by library or client code. + xmlInitParser(); +} + +with_xml::with_xml(with_xml&& other): run_cleanup_(other.run_cleanup_) { + other.run_cleanup_ = false; +} + +with_xml::~with_xml() { + if (run_cleanup_) { + xmlCleanupParser(); + } +} + +} // namespace arbnml diff --git a/arbornml/xmlwrap.cpp b/arbornml/xmlwrap.cpp new file mode 100644 index 00000000..c46b1292 --- /dev/null +++ b/arbornml/xmlwrap.cpp @@ -0,0 +1,128 @@ +#include <cerrno> +#include <charconv> +#include <cstdarg> +#include <cstddef> +#include <cstdlib> +#include <cstring> +#include <limits> +#include <locale> +#include <sstream> +#include <string> +#include <vector> + +#include <libxml/xmlerror.h> + +#include <arbornml/nmlexcept.hpp> + +#include "xmlwrap.hpp" + +namespace arbnml { + +namespace detail { + +// Note: widely missing library support for floating point std::from_chars. + +template <typename V> +bool from_cstr_(V& out, const char* s) { + auto [p, ec] = std::from_chars(s, s+std::strlen(s), out); + return ec==std::errc{} && !*p; +} + +template <typename V> +std::string nl_to_string_(const V& v, unsigned digits_estimate) { + std::vector<char> digits(digits_estimate); + for (;;) { + if (auto [p, ec] = std::to_chars(digits.data(), digits.data()+digits.size(), v); ec == std::errc{}) { + return std::string(digits.data(), p); + } + + digits_estimate *= 2; + digits.resize(digits_estimate); + } +} + +} // namespace detail + + +bool nl_from_cstr(std::string& out, const char* content) { + out = content; + return true; +} + +bool nl_from_cstr(long long& out, const char* content) { + return detail::from_cstr_(out, content); +} + +bool nl_from_cstr(non_negative& out, const char* content) { + return detail::from_cstr_(out, content); +} + +bool nl_from_cstr(double& out, const char* content) { + // Note: library support is widely missing for floating point std::from_chars, + // so can't just do: + // return detail::from_cstr_(out, content); + // + // std::strtod() will use the current C locale, so that's out: anticipating the + // decimal point character is a race condition. + + std::istringstream is{std::string(content)}; + is.imbue(std::locale::classic()); + + double x; + is >> x; + if (!is || !is.eof()) return false; + out = x; + return true; +} + +std::string nl_to_string(non_negative n) { + return detail::nl_to_string_(n, std::numeric_limits<non_negative>::digits10); +} + +std::string nl_to_string(long long n) { + return detail::nl_to_string_(n, 1+std::numeric_limits<long long>::digits10); +} + +void throw_on_xml_generic_error(void *, const char* msg, ...) { + va_list va, vb; + va_start(va, msg); + va_copy(vb, va); + + int r = vsnprintf(nullptr, 0, msg, va); + va_end(va); + + std::string err(r+1, '\0'); + vsnprintf(&err[0], err.size(), msg, vb); + va_end(vb); + + throw ::arbnml::xml_error(err); +} + +void throw_on_xml_structured_error(void *ctx, xmlErrorPtr errp) { + if (errp->level!=1) { // ignore warnings! + std::string msg(errp->message); + if (!msg.empty() && msg.back()=='\n') msg.pop_back(); + throw ::arbnml::xml_error(msg, errp->line); + } +} + +xml_error_scope::xml_error_scope() { + generic_handler_ = xmlGenericError; + generic_context_ = xmlGenericErrorContext; + + structured_handler_ = xmlStructuredError; + structured_context_ = xmlStructuredErrorContext; + + xmlSetGenericErrorFunc(nullptr, &throw_on_xml_generic_error); + xmlSetStructuredErrorFunc((void*)this, &throw_on_xml_structured_error); +} + +xml_error_scope::~xml_error_scope() { + xmlGenericError = generic_handler_; + xmlGenericErrorContext = generic_context_; + + xmlStructuredError = structured_handler_; + xmlStructuredErrorContext = structured_context_; +} + +} // namespace arbnml diff --git a/arbornml/xmlwrap.hpp b/arbornml/xmlwrap.hpp new file mode 100644 index 00000000..d914bc69 --- /dev/null +++ b/arbornml/xmlwrap.hpp @@ -0,0 +1,317 @@ +#pragma once + +// RAII and iterator wrappers for some libxml2 objects. + +#include <any> +#include <cstddef> +#include <cstdlib> +#include <memory> +#include <optional> +#include <string> +#include <utility> + +#include <libxml/parser.h> +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> + +#include <arbornml/nmlexcept.hpp> + +namespace arbnml { + +// `non_negative` represents the corresponding constraint in the schema, which +// can mean any arbitrarily large non-negtative integer value. +// +// A faithful representation would use an arbitrary-size 'big' integer or +// a string, but for ease of implementation (and a bit more speed) we restrict +// it to whatever we can fit in an unsigned long long. + +using non_negative = unsigned long long; + +// String wrappers around `to_chars` for attribute types we care about. +// (`nl` is meant to stand for "no locale".) + +std::string nl_to_string(non_negative); +std::string nl_to_string(long long); + +// Parse attribute content as the representation of a specific type. +// Return true if successful. + +bool nl_from_cstr(std::string& out, const char* content); +bool nl_from_cstr(non_negative& out, const char* content); +bool nl_from_cstr(long long& out, const char* content); +bool nl_from_cstr(double& out, const char* content); + +// Wrap xmlChar* NUL-terminated string that requires deallocation. + +struct xml_string { + explicit xml_string(const xmlChar* p): p_(p, xml_string::deleter) {} + + operator const char*() const { + return reinterpret_cast<const char*>(p_.get()); + } + +private: + std::shared_ptr<const xmlChar> p_; + static void deleter(const xmlChar* x) { xmlFree((void*)x); } +}; + +// Wrappers below are generally constructed with two arguments, +// a pointer corresponding to the libxml2 object, and a dependency +// object (typically shared_ptr<X> for some X) that guards the +// lifetime of another object upon which this depends. + +template <typename XmlType> +void trivial_dealloc(XmlType*) {} + +template <typename XmlType, void (*xml_dealloc)(XmlType *) = &trivial_dealloc<XmlType>> +struct xml_base { + xml_base(XmlType* p, std::any depends = {}): + p_(p, xml_dealloc), + depends_(std::move(depends)) + {} + +protected: + // Access to raw wrapped pointer type. + XmlType* get() const { return p_.get(); } + + // Copy of shared_ptr<> governing lifetime of referenced object. + auto self() const { return p_; } + + // Copy of dependency object. + std::any depends() const { return depends_; } + +private: + std::shared_ptr<XmlType> p_; + std::any depends_; +}; + +// xmlNode RAII wrapper (non-owning). + +struct xml_node: protected xml_base<xmlNode> { + using base = xml_base<xmlNode>; + explicit xml_node(xmlNode* p, std::any depends): + base(p, std::move(depends)) + {} + + bool is_element() const { return get()->type==XML_ELEMENT_NODE; } + bool is_attr() const { return get()->type==XML_ATTRIBUTE_NODE; } + xml_string content() const { return xml_string(xmlNodeGetContent(get())); } + unsigned line() const { return get()->line; } + + bool has_prop(const char* name) const { return xmlHasProp(get(), (const xmlChar*)name); } + + template <typename T> + T prop(const char* name, std::optional<T> default_value = std::nullopt) const { + xmlChar* c = xmlGetProp(get(), (const xmlChar*)(name)); + if (!c) { + return default_value? default_value.value(): throw parse_error("missing required attribute", get()->line); + } + + T v; + return nl_from_cstr(v, reinterpret_cast<const char*>(c))? v: throw parse_error("attribute type error", get()->line); + } + + using base::get; // (unsafe access) +}; + +// xmlNodeSet RAII wrapper; resource lifetime is governed by an xmlXPathObject. + +struct xml_nodeset: protected xml_base<xmlNodeSet> { + using base = xml_base<xmlNodeSet>; + + xml_nodeset(): xml_nodeset(nullptr, std::any{}) {} + + xml_nodeset(xmlNodeSet* p, std::any depends): + base(p, std::move(depends)) + {} + + struct iterator { + using value_type = xml_node; + using difference_type = std::ptrdiff_t; + using reference = value_type; // yeah, not a real random access iterator + using pointer = value_type*; + using iterator_category = std::random_access_iterator_tag; + + explicit iterator(xmlNodePtr* p, const xml_nodeset* ns_ptr): p_(p), ns_ptr_(ns_ptr) {} + + bool operator==(iterator i) const { return p_==i.p_; } + bool operator!=(iterator i) const { return p_!=i.p_; } + bool operator<(iterator i) const { return p_<i.p_; } + bool operator<=(iterator i) const { return p_<=i.p_; } + bool operator>(iterator i) const { return p_>i.p_; } + bool operator>=(iterator i) const { return p_>=i.p_; } + + reference operator*() const { return ns_ptr_->mk_xml_node(*p_); } + + struct ptr_proxy { + xml_node inner_; + const xml_node* operator->() const { return &inner_; } + }; + ptr_proxy operator->() const { return ptr_proxy{ns_ptr_->mk_xml_node(*p_)}; } + + iterator& operator++() { return ++p_, *this; } + iterator operator++(int) { + iterator x(*this); + return ++p_, x; + } + + iterator& operator--() { return --p_, *this; } + iterator operator--(int) { + iterator x(*this); + return --p_, x; + } + + iterator& operator+=(ptrdiff_t n) { return p_ += n, *this; } + iterator& operator-=(ptrdiff_t n) { return p_ -= n, *this; } + reference operator[](ptrdiff_t n) { return *(*this+n); } + + iterator operator+(ptrdiff_t n) { + iterator i(*this); + return i += n; + } + friend iterator operator+(ptrdiff_t n, iterator i) { return i+n; } + + iterator operator-(ptrdiff_t n) { + iterator i(*this); + return i -= n; + } + friend iterator operator-(ptrdiff_t n, iterator i) { return i-n; } + + ptrdiff_t operator=(iterator i) { return p_-i.p_; } + + private: + xmlNode** p_; + const xml_nodeset* ns_ptr_; + }; + + iterator begin() const { return iterator{get()? get()->nodeTab: nullptr, this}; } + iterator end() const { return iterator{get()? get()->nodeTab+get()->nodeNr: nullptr, this}; } + + iterator::reference operator[](int i) const { return begin()[i]; } + std::size_t size() const { return get()? get()->nodeNr: 0u; } + bool empty() const { return size()==0u; } + +private: + // Construct xml_node wrapper with the same lifetime dependency as this node set. + xml_node mk_xml_node(xmlNode* p) const { + return xml_node{p, depends()}; + } +}; + +// xmlPathObj RAII wrapper; lifetime of xmlPathObj governs lifetime of node set. + +struct xml_xpathobj: protected xml_base<xmlXPathObject, xmlXPathFreeObject> { + using base = xml_base<xmlXPathObject, xmlXPathFreeObject>; + + explicit xml_xpathobj(xmlXPathObject* p, std::any depends): + base(p, std::move(depends)) + {} + + xml_nodeset nodes() { + return get()->type==XPATH_NODESET? xml_nodeset{get()->nodesetval, self()}: xml_nodeset{}; + } +}; + +// xmlXPathContext RAII wrapper. + +struct xml_xpathctx: protected xml_base<xmlXPathContext, xmlXPathFreeContext> { + using base = xml_base<xmlXPathContext, xmlXPathFreeContext>; + + explicit xml_xpathctx(xmlXPathContext* p, std::any depends): + base(p, std::move(depends)) + {} + + void register_ns(const char* ns, const char* uri) { + xmlXPathRegisterNs(get(), (const xmlChar*)ns, (const xmlChar*)uri); + } + + xml_nodeset query(const char* q) { + return xml_xpathobj{xmlXPathEvalExpression((xmlChar*)q, get()), self()}.nodes(); + } + xml_nodeset query(const std::string& q) { return query(q.c_str()); } + + xml_nodeset query(xml_node context, const char* q) { + return xml_xpathobj{xmlXPathNodeEval(context.get(), (xmlChar*)q, get()), self()}.nodes(); + } + xml_nodeset query(xml_node context, const std::string& q) { return query(std::move(context), q.c_str()); } +}; + +// xmlDoc RAII wrapper. + +struct xml_doc: protected xml_base<xmlDoc, xmlFreeDoc> { + using base = xml_base<xmlDoc, xmlFreeDoc>; + + xml_doc(): xml_doc(nullptr) {} + + explicit xml_doc(std::string the_doc): + // 'Pretty sure' we don't need to keep the string after the tree is built. Pretty sure. + xml_doc(xmlReadMemory(the_doc.c_str(), the_doc.length(), "", nullptr, xml_options)) + {} + + // TODO: (... add other ctors ...) + + friend xml_xpathctx xpath_context(const xml_doc& doc) { + return xml_xpathctx{xmlXPathNewContext(doc.get()), doc.self()}; + } + + explicit operator bool() const { return get(); } + +private: + explicit xml_doc(xmlDoc* p): base(p) {} + static constexpr int xml_options = XML_PARSE_NOENT | XML_PARSE_NONET; +}; + +// Escape a string for use as string expression within an XPath expression. + +inline std::string xpath_escape(const std::string& x) { + auto npos = std::string::npos; + if (x.find_first_of("'")==npos) { + return "'"+x+"'"; + } + else if (x.find_first_of("\"")==npos) { + return "\""+x+"\""; + } + else { + std::string r = "concat("; + std::string::size_type i = 0; + for (;;) { + auto j = x.find_first_of("'", i); + r += "'"; + r.append(x, i, j==npos? j: j-i); + r += "'"; + if (j==npos) break; + r += ",\""; + i = j+1; + j = x.find_first_not_of("'",i); + r.append(x, i, j==npos? j: j-i); + r += "\""; + if (j==npos) break; + r += ","; + i = j+1; + } + r += ")"; + return r; + } +} + +// Error management: +// +// Use xml_error_scope to catch libxml2 warnings and errors. The +// xml_error_scope object will restore the original error handling +// behaviour on destruction. +// +// Errors are turned into arbnml::xml_error exceptions and thrown, +// while warnings are ignored (libxml2 warnings are highly innocuous). + +struct xml_error_scope { + xml_error_scope(); + ~xml_error_scope(); + + xmlGenericErrorFunc generic_handler_; + void* generic_context_; + + xmlStructuredErrorFunc structured_handler_; + void* structured_context_; +}; + +} // namespace arbnml diff --git a/cmake/CompilerOptions.cmake b/cmake/CompilerOptions.cmake index 6734beaf..9780d534 100644 --- a/cmake/CompilerOptions.cmake +++ b/cmake/CompilerOptions.cmake @@ -40,6 +40,15 @@ set(CXXOPT_WALL $<IF:$<CXX_COMPILER_ID:Clang>,-Wno-potentially-evaluated-expression,> $<IF:$<CXX_COMPILER_ID:AppleClang>,-Wno-potentially-evaluated-expression,> + # Clang (Apple): + # + # * Disable 'range-loop-analysis' warning: disabled by default in + # clang, but enabled in Apple clang, this will flag loops of the form + # `for (auto& x: y)` where iterators for `y` dereference to proxy objects. + # Such code is correct, and the warning is spurious. + + $<IF:$<CXX_COMPILER_ID:AppleClang>,-Wno-range-loop-analysis,> + # Clang: # # * Disable 'unused-function' warning: this will flag the unused diff --git a/cmake/arbor-config.cmake.in b/cmake/arbor-config.cmake.in index 04a3532a..246d92d5 100644 --- a/cmake/arbor-config.cmake.in +++ b/cmake/arbor-config.cmake.in @@ -21,8 +21,10 @@ endforeach() set(_override_lang @arbor_override_import_lang@) if(_override_lang) - foreach(target arbor::arbor arbor::arborenv) - set_target_properties(${target} PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES_@arbor_build_config@ "${_override_lang}") + foreach(target arbor::arbor arbor::arborenv arbor::arbornml) + if(TARGET ${target}) + set_target_properties(${target} PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES_@arbor_build_config@ "${_override_lang}") + endif() endforeach() endif() @@ -30,16 +32,19 @@ endif() # (See though arbor-sim/arbor issue #678). function(_append_property target property) - set(p_append ${ARGN}) - get_target_property(p ${target} ${property}) - if(p) - list(APPEND p ${p_append}) - else() - set(interface_libs ${p_append}) + if (TARGET ${target}) + set(p_append ${ARGN}) + get_target_property(p ${target} ${property}) + if(p) + list(APPEND p ${p_append}) + else() + set(interface_libs ${p_append}) + endif() + set_target_properties(${target} PROPERTIES ${property} "${p}") endif() - set_target_properties(${target} PROPERTIES ${property} "${p}") endfunction() _append_property(arbor::arbor INTERFACE_LINK_LIBRARIES @arbor_add_import_libs@) _append_property(arbor::arborenv INTERFACE_LINK_LIBRARIES @arborenv_add_import_libs@) +_append_property(arbor::arbornml INTERFACE_LINK_LIBRARIES @arbornml_add_import_libs@) diff --git a/doc/cpp_neuroml.rst b/doc/cpp_neuroml.rst new file mode 100644 index 00000000..d495b661 --- /dev/null +++ b/doc/cpp_neuroml.rst @@ -0,0 +1,154 @@ +.. _cppneuroml: + +NeuroML support +=============== + +Arbor offers limited support for models described in +`NeuroML version 2 <https://neuroml.org/neuromlv2>`_. +This is not built by default, but can be enabled by +providing the `-DARB_NEUROML=ON` argument to CMake at +configuration time (see :ref:`install-neuroml`). This will +build the ``arbornml`` libray and defines the corresponding +``arbor::arbornml`` CMake target. + +The ``arbornml`` library uses `libxml2 <http://xmlsoft.org/>`_ +for XML parsing. Applications using ``arbornml`` will need to +link against ``libxml2`` in addition, though this is performed +implicitly within CMake projects that add ``arbor::arbornml`` +as a link library. + +All classes and functions provided by the ``arbornml`` library +are provided in the ``arbnml`` namespace. + + +Libxml2 interface +----------------- + +Libxml2 offers threadsafe XML parsing, but not by default. If +the application uses ``arbornml`` in an unthreaded context, or +has already explicitly initialized ``libxml2``, nothing more +needs to be done. Otherwise, the ``libxml2`` function ``xmlInitParser()`` +must be called explicitly. + +``arbornml`` provides a helper guard object for this purpose, defined +in ``arbornml/with_xml.hpp``: + +.. cpp:namespace:: arbnml + +.. cpp:class:: with_xml + + An RAII guard object that calls ``xmlInitParser()`` upon construction, and + ``xmlCleanupParser()`` upon destruction. The constructor takes no parameters. + + +NeuroML 2 morphology support +---------------------------- + +NeuroML documents are represented by the ``arbnml::neuroml`` class, +which in turn provides methods for the identification and translation +of morphology data. ``neuroml`` objects are moveable and move-assignable, but not copyable. + +An implementation limitation restrictes valid segment id values to +those which can be represented by an ``unsigned long long`` value. + +.. cpp:class:: neuroml + + .. cpp:function:: neuroml(std::string) + + Build a NeuroML document representation from the supplied string. + + .. cpp:function:: std::vector<std::string> cell_ids() const + + Return the id of each ``<cell>`` element defined in the NeuroML document. + + .. cpp:function:: std::vector<std::string> morphology_ids() const + + Return the id of each top-level ``<morphology>`` element defined in the NeuroML document. + + .. cpp:function:: std::optional<morphology_data> morphology(const std::string&) const + + Return a representation of the top-level morphology with the supplied identifier, or + ``std::nullopt`` if no such morphology could be found. Parse errors or an inconsistent + representation will raise an exception derived from ``neuroml_exception``. + + .. cpp:function:: std::optional<morphology_data> cell_morphology(const std::string&) const + + Return a representation of the morphology associated with the cell with the supplied identifier, + or ``std::nullopt`` if the cell or its morphology could not be found. Parse errors or an + inconsistent representation will raise an exception derived from ``neuroml_exception``. + +The morphology representation contains the corresponding Arbor ``arb::morphology`` object, +label dictionaries for regions corresponding to its segments and segment groups by name +and id, and a map providing the explicit list of segments contained within each defined +segment group. + +.. cpp:class:: morphology_data + + .. cpp:member:: std::optional<std::string> cell_id + + The id attribute of the cell that was used to find the morphology in the NeuroML document, if any. + + .. cpp:member:: std::string id + + The id attribute of the morphology. + + .. cpp:member:: arb::morphology morphology + + The corresponding Arbor morphology. + + .. cpp:member:: arb::label_dict segments + + A label dictionary with a region entry for each segment, keyed by the segment id (as a string). + + .. cpp:member:: arb::label_dict named_segments + + A label dictionary with a region entry for each name attribute given to one or more segments. + The region corresponds to the union of all segments sharing the same name attribute. + + .. cpp:member:: arb::label_dict groups + + A label dictionary with a region entry for each defined segment group + + .. cpp:member:: std::unordered_map<std::string, std::vector<unsigned long long>> group_segments + + A map from taking each segment group id to its corresponding collection of segments. + + +Exceptions +---------- + +All NeuroML-specific exceptions are defined in ``arbornml/nmlexcept.hpp``, and are +derived from ``arbnml::neuroml_exception`` which in turn is derived from ``std::runtime_error``. +With the exception of the ``no_document`` exception, all contain an unsigned member ``line`` +which is intended to identify the problematic construct within the document. + +.. cpp:class:: xml_error: neuroml_exception + + A generic XML error generated by the ``libxml2`` library. + +.. cpp:class:: no_document: neuroml_exception + + A request was made on an :cpp:class:`neuroml` document without any content. + +.. cpp:class:: parse_error: neuroml_exception + + Failure parsing an element or attribute in the NeuroML document. These + can be generated if the document does not confirm to the NeuroML2 schema, + for example. + +.. cpp:class:: bad_segment: neuroml_exception + + A ``<segment>`` element has an improper ``id`` attribue, refers to a non-existent + parent, is missing a required parent or proximal element, or otherwise is missing + a mandatory child element or has a malformed child element. + +.. cpp:class:: bad_segment_group: neuroml_exception + + A ``<segmentGroup>`` element has a malformed child element or references + a non-existent segment group or segment. + +.. cpp:class:: cyclic_dependency: neuroml_exception + + A segment or segment group ultimately refers to itself via ``parent`` + or ``include`` elements respectively. + diff --git a/doc/index.rst b/doc/index.rst index c1319eed..e83b29dd 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -83,6 +83,7 @@ Alternative citation formats for the paper can be `downloaded here <https://ieee cpp_domdec cpp_simulation cpp_cable_cell + cpp_neuroml .. toctree:: :caption: Developers: diff --git a/doc/install.rst b/doc/install.rst index 97f22e04..46e731b6 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -48,7 +48,7 @@ We recommend using GCC or Clang, for which Arbor has been tested and optimised. Compiler Min version Notes =========== ============ ============================================ GCC 8.4.0 - Clang 7.0 Needs GCC 8 or later for standard library. + Clang 8.0 Needs GCC 8 or later for standard library. Apple Clang 9 Apple LLVM version 9.0.0 (clang-900.0.39.2) Hip Clang Rocm 3.6 HIP support is currently experimental. =========== ============ ============================================ @@ -126,6 +126,14 @@ In order to use MPI in combination with the python frontend the `mpi4py <https://mpi4py.readthedocs.io/en/stable/install.html#>`_ Python package is recommended. See :ref:`install-python` for more information. +NeuroML +~~~~~~~ + +Arbor supports reading cell morphologies defined in NeuroML version 2 through +an additional NeuroML support library ``arbornml``. This library requires +`libxml2 <http://xmlsoft.org>`_ for the parsing of NeuroML2 XML. See :ref:`install-neuroml` for +more information. + Documentation ~~~~~~~~~~~~~~ @@ -334,6 +342,7 @@ with AVX, AVX2 or AVX512 ISA extensions, and for ARM architectures with support GPU Backend ----------- + Compiling for the GPU backend is controlled by the ``ARB_GPU`` CMake option which is used to select between NVIDIA and AMD GPUs as well as specify the chosen GPU compiler. @@ -456,6 +465,35 @@ variable before configuring and building Arbor: cmake -DARB_WITH_PYTHON=ON -DARB_WITH_MPI=ON +.. _install-neuroml: + +NeuroML support +--------------- + +Arbor has limited support for NeuroML version 2 through an additional library +``arbornml``. This library will be built if the option ``-DARB_WITH_NEUROML=ON`` +is passed to CMake at configuration time. ``arbornml`` depends upon the +the ``libxml2`` library for XML parsing. + +With NeuroML support enabled, Arbor will additionally install the static library +``libarbornml.a``. Applications using this functionality will need to link +against this library in addition to the main Arbor library and ``libxml2``. +For example: + +.. code-block:: bash + + g++ -std=c++17 -pthread mycode.cpp -larbornml -larbor -lxml2 + +For projects using CMake, Arbor NeuroML support can be required with the +component ``neuroml``. The corresponding CMake library target is ``arbor::arbornml``. + +.. code-block:: cmake + + find_package(arbor COMPONENTS neuroml) + # ... + target_link_libraries(myapp arbor::arbornml) + + .. _install: Installation diff --git a/scripts/travis/build.sh b/scripts/travis/build.sh index 28010aa9..b81f3ae6 100755 --- a/scripts/travis/build.sh +++ b/scripts/travis/build.sh @@ -76,7 +76,12 @@ cd $build_path # progress "Configuring with cmake" -cmake_flags="-DARB_WITH_ASSERTIONS=ON -DARB_WITH_MPI=${WITH_MPI} -DARB_WITH_PYTHON=${ARB_WITH_PYTHON} -DARB_ARCH=${ARCH} ${CXX_FLAGS} ${PY_FLAGS}" +# Fix CMake/Homebrew/XCode mess. See: https://github.com/apple/swift/pull/32436 +if which xcrun >/dev/null; then + typeset -x CMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}":$(xcrun --sdk macosx --show-sdk-path)/usr +fi + +cmake_flags="-DARB_WITH_ASSERTIONS=ON -DARB_WITH_NEUROML=${WITH_NEUROML} -DARB_WITH_MPI=${WITH_MPI} -DARB_WITH_PYTHON=${ARB_WITH_PYTHON} -DARB_ARCH=${ARCH} ${CXX_FLAGS} ${PY_FLAGS}" echo "cmake flags: ${cmake_flags}" cmake .. ${cmake_flags} || error "unable to configure cmake" diff --git a/sup/CMakeLists.txt b/sup/CMakeLists.txt index f81a24e0..0a9e7306 100644 --- a/sup/CMakeLists.txt +++ b/sup/CMakeLists.txt @@ -1,17 +1,9 @@ set(sup-sources - glob_basic.cpp ioutil.cpp json_meter.cpp path.cpp ) -if(ARB_USE_POSIX_GLOB) - list(APPEND sup-sources glob_posix.cpp) -else() - list(APPEND sup-sources glob_basic_wrap.cpp) -endif() - - add_library(arbor-sup ${sup-sources}) # Compile sup library with the same optimization flags as libarbor. diff --git a/sup/glob_basic.cpp b/sup/glob_basic.cpp deleted file mode 100644 index 1be55b0e..00000000 --- a/sup/glob_basic.cpp +++ /dev/null @@ -1,247 +0,0 @@ -#include <list> -#include <string> -#include <vector> - -#include <sup/glob.hpp> -#include <sup/path.hpp> - -namespace sup { - -struct glob_sup_fs_provider { - using action_type = std::function<void (const sup::path&)>; - - bool is_directory(const sup::path& p) const { - return sup::is_directory(p); - }; - - bool exists(const sup::path& p) const { - return sup::exists(p); - } - - void for_each_directory(const sup::path& p, action_type action) const { - std::error_code ec; - for (const auto& e: get_iterator(p)) { - if (sup::is_directory(e.path(), ec)) action(e.path()); - } - } - - void for_each_entry(const sup::path& p, action_type action) const { - for (const auto& e: get_iterator(p)) { - action(e.path()); - } - } - -private: - static directory_iterator get_iterator(const sup::path& p) { - return directory_iterator(p.empty()? ".": p, - directory_options::skip_permission_denied); - } -}; - -glob_fs_provider glob_native_provider{glob_sup_fs_provider{}}; - -static bool match_char_class(const char*& p, char c) { - // Special cases for first character: - // ! => negate test defined from following char. - // - => treat '-' as literal. - // ] => treat ']' as literal. - - if (*p!='[') return false; - ++p; - - bool negate = false; - bool match = false; - - if (*p=='!') { - negate = true; - ++p; - } - - bool first = true; - char lrange = 0; - for (; !match && *p && (first || *p!=']'); ++p) { - - bool last = *p && p[1]==']'; - if (*p=='-' && lrange && !first && !last) { - match = c>=lrange && c<=*++p; - lrange = 0; - continue; - } - - lrange = *p; - match = c==*p; - first = false; - } - - while (*p && *p!=']') ++p; - if (!*p) return false; - - return match^negate; -} - -// Special exception for filename globbing: an initial period '.' can only be matched -// by an intial '.' in the pattern. - -bool glob_basic_match(const char* p, const char* t) { - // NFA state represented by pointers into directly into pattern. - std::list<const char*> state = {p}; - - char c; - bool initial_dot = *t=='.'; - do { - c = *t++; - for (auto i = state.begin(); i!=state.end();) { - switch (**i) { - case '*': - if (initial_dot) goto fail; - if (i==state.begin() || *std::prev(i)!=*i) { - state.insert(i, *i); - } - while (**i=='*') ++*i; - continue; - case '?': - if (initial_dot) goto fail; - if (c) goto advance; - else goto fail; - case '[': - if (initial_dot) goto fail; - if (c && match_char_class(*i, c)) goto advance; - else goto fail; - case '\\': - ++*i; // fall-through - default: - if (**i==c) goto advance; - else goto fail; - } - - fail: - i = state.erase(i); - continue; - - advance: - *i += !!c; - ++i; - continue; - } - initial_dot = false; - } while (c && !state.empty()); - - return !state.empty() && !*state.back(); -} - -// Return first component, overwriting delimitter with NUL. -// Set pattern to beginning of next path component, skipping delimiters. - -struct pattern_component { - const char* pattern = nullptr; - bool literal = false; - bool directory = false; -}; - -static pattern_component tokenize(char*& pattern) { - if (!*pattern) return {pattern, true, false}; - - char* s = nullptr; - char* p = pattern; - bool meta = false; - - do { - while (*p=='/') ++p; - - bool in_charclass = false; - bool escape = false; - for (;*p && *p!='/'; ++p) { - switch (*p) { - case '[': - if (!escape) { - in_charclass = true; - meta = true; - } - break; - case '*': - if (!escape) meta = true; - break; - case '?': - if (!escape) meta = true; - break; - case '\\': - if (!escape && !in_charclass) escape = true; - break; - case ']': - if (in_charclass) in_charclass = false; - break; - default: ; - } - } - if (!meta) s = p; - } while (!meta && *p); - - pattern_component k = { pattern }; - k.literal = (bool)s; - - if (!s) s = p; - k.directory = !!*s; - - pattern = s; - while (*pattern=='/') ++pattern; - - *s = 0; - return k; -} - -// Return matching paths, unsorted, based on supplied pattern. -// Performs breadth-first search of the directory tree. - -std::vector<path> glob_basic(const std::string& pattern, const glob_fs_provider& fs) { - if (pattern.empty()) return {}; - - // Make a mutable copy for tokenization. - std::vector<char> pcopy(pattern.begin(), pattern.end()); - pcopy.push_back(0); - - char* c = pcopy.data(); - if (!*c) return {}; - - std::vector<sup::path> paths, new_paths; - paths.push_back(""); - - if (*c=='/') { - while (*c=='/') ++c; - paths[0] = "/"; - } - - do { - pattern_component component = tokenize(c); - - if (component.literal) { - for (auto p: paths) { - p /= component.pattern; - - if (component.directory) { - if (fs.is_directory(p)) new_paths.push_back(std::move(p)); - } - else { - if (fs.exists(p)) new_paths.push_back(std::move(p)); - } - } - } - else { - auto push_if_match = [&new_paths, pattern = component.pattern](const sup::path& p) { - if (glob_basic_match(pattern, p.filename().c_str())) new_paths.push_back(p); - }; - - for (auto p: paths) { - if (component.directory) fs.for_each_directory(p.c_str(), push_if_match); - else fs.for_each_entry(p.c_str(), push_if_match); - } - } - - std::swap(paths, new_paths); - new_paths.clear(); - } while (*c); - - return paths; -} - -} // namespace sup - diff --git a/sup/glob_basic_wrap.cpp b/sup/glob_basic_wrap.cpp deleted file mode 100644 index 1765dd80..00000000 --- a/sup/glob_basic_wrap.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include <string> -#include <vector> - -#include <sup/glob.hpp> - -namespace sup { - -std::vector<path> glob(const std::string& pattern) { - return glob_basic(pattern); -} - -} // namespace sup - diff --git a/sup/glob_posix.cpp b/sup/glob_posix.cpp deleted file mode 100644 index ecec061e..00000000 --- a/sup/glob_posix.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// POSIX headers -extern "C" { -#define _POSIX_C_SOURCE 200809L -#include <glob.h> -} - -// GLOB_TILDE and GLOB_BRACE are non-standard but convenient and common -// flags for glob(). - -#ifndef GLOB_TILDE -#define GLOB_TILDE 0 -#endif -#ifndef GLOB_BRACE -#define GLOB_BRACE 0 -#endif - -#include <cerrno> - -#include <arbor/util/scope_exit.hpp> -#include <sup/path.hpp> - -using arb::util::on_scope_exit; - -namespace sup { - -std::vector<path> glob(const std::string& pattern) { - std::vector<path> paths; - glob_t matches; - - int flags = GLOB_MARK | GLOB_NOCHECK | GLOB_TILDE | GLOB_BRACE; - auto r = ::glob(pattern.c_str(), flags, nullptr, &matches); - auto glob_guard = on_scope_exit([&]() { ::globfree(&matches); }); - - if (r==GLOB_NOSPACE) { - throw std::bad_alloc{}; - } - else if (r==0) { - // success - paths.reserve(matches.gl_pathc); - for (auto pathp = matches.gl_pathv; *pathp; ++pathp) { - paths.push_back(*pathp); - } - } - - return paths; -} - -} // namespace sup - diff --git a/sup/include/sup/glob.hpp b/sup/include/sup/glob.hpp deleted file mode 100644 index 1f734f52..00000000 --- a/sup/include/sup/glob.hpp +++ /dev/null @@ -1,101 +0,0 @@ -#pragma once - -// Glob implementation via glob (3) wrapper or fallback implementation. - -#include <functional> -#include <string> -#include <vector> - -#include <sup/path.hpp> - -namespace sup { - -// Wrapper (provided by either glob_posix.cpp or -// glob_basic_wrapper.cpp based on configuration.) - -std::vector<path> glob(const std::string& pattern); - -// Basic globber. -// -// Uses `glob_fs_provider` to provide required filesystem -// operations, defaults to implementation based around -// sup-provided directory iterators. - -struct glob_fs_provider { - using action_type = std::function<void (const sup::path&)>; - - template <typename Impl> - glob_fs_provider(Impl impl): inner_(new wrap<Impl>(std::move(impl))) {} - - glob_fs_provider(const glob_fs_provider& x): inner_(x.inner_->clone()) {} - - bool is_directory(const sup::path& p) const { - return inner_->is_directory(p); - } - - bool exists(const sup::path& p) const { - return inner_->exists(p); - } - - void for_each_directory(const sup::path& p, action_type action) const { - inner_->for_each_directory(p, action); - } - - void for_each_entry(const sup::path& p, action_type action) const { - inner_->for_each_entry(p, action); - } - -private: - struct base { - virtual bool is_directory(const sup::path&) const = 0; - virtual bool exists(const sup::path&) const = 0; - virtual void for_each_directory(const sup::path&, action_type action) const = 0; - virtual void for_each_entry(const sup::path&, action_type action) const = 0; - virtual base* clone() const = 0; - virtual ~base() {} - }; - - template <typename Impl> - struct wrap: base { - wrap(Impl impl): impl_(std::move(impl)) {} - - bool is_directory(const sup::path& p) const override { - return impl_.is_directory(p); - } - - bool exists(const sup::path& p) const override { - return impl_.exists(p); - } - - void for_each_directory(const sup::path& p, action_type action) const override { - impl_.for_each_directory(p, action); - } - - void for_each_entry(const sup::path& p, action_type action) const override { - impl_.for_each_entry(p, action); - } - - base* clone() const override { - return new wrap(impl_); - } - - Impl impl_; - }; - - std::unique_ptr<base> inner_; -}; - -extern glob_fs_provider glob_native_provider; - -std::vector<path> glob_basic(const std::string& pattern, const glob_fs_provider& = glob_native_provider); - -// Expose glob filename expression matcher for unit testing. -// -// Follows glob(7) description except for: -// * No character class support, e.g. [:alpha:]. -// * Ignores LC_COLLATE for character ranges, and does not accommodate multibyte encodings. - -bool glob_basic_match(const char* p, const char* t); - -} // namespace sup - diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index d51ba314..7bf826b1 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -104,7 +104,6 @@ set(unit_sources test_filter.cpp test_fvm_layout.cpp test_fvm_lowered.cpp - test_glob_basic.cpp test_kinetic_linear.cpp test_lexcmp.cpp test_lif_cell_group.cpp @@ -189,6 +188,13 @@ if(ARB_WITH_GPU) ) endif() +if(ARB_WITH_NEUROML) + list(APPEND unit_sources + + test_nml_morphology.cpp + ) +endif() + if(ARB_WITH_CUDA_CLANG OR ARB_WITH_HIP_CLANG) set_source_files_properties(${unit_sources} PROPERTIES LANGUAGE CXX) set_source_files_properties(${test_mech_sources} PROPERTIES LANGUAGE CXX) @@ -216,3 +222,7 @@ target_compile_options(unit PRIVATE ${ARB_CXXOPT_ARCH}) target_compile_definitions(unit PRIVATE "-DDATADIR=\"${CMAKE_CURRENT_SOURCE_DIR}/swc\"") target_include_directories(unit PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") target_link_libraries(unit PRIVATE gtest arbor arborenv arbor-private-headers arbor-sup) + +if(ARB_WITH_NEUROML) + target_link_libraries(unit PRIVATE arbornml) +endif() diff --git a/test/unit/test_glob_basic.cpp b/test/unit/test_glob_basic.cpp deleted file mode 100644 index 114ee002..00000000 --- a/test/unit/test_glob_basic.cpp +++ /dev/null @@ -1,225 +0,0 @@ -#include "../gtest.h" - -#include <sup/glob.hpp> -#include <sup/path.hpp> - -using namespace sup; - -#include <iterator> -#include <string> -#include <unordered_map> - -TEST(glob, pattern) { - EXPECT_TRUE( glob_basic_match( "literal", "literal")); - EXPECT_FALSE(glob_basic_match("literal", "Literal")); - - EXPECT_TRUE( glob_basic_match("[a-z][A-Z]", "aA")); - EXPECT_TRUE( glob_basic_match("[a-z][A-Z]", "zZ")); - EXPECT_TRUE( glob_basic_match("[a-z][A-Z]", "bQ")); - EXPECT_FALSE(glob_basic_match("[a-z][A-Z]", "AA")); - EXPECT_FALSE(glob_basic_match("[a-z][A-Z]", "A@")); - - EXPECT_TRUE (glob_basic_match("[!0-9a]", "A")); - EXPECT_FALSE(glob_basic_match("[!0-9a]", "0")); - EXPECT_FALSE(glob_basic_match("[!0-9a]", "5")); - EXPECT_FALSE(glob_basic_match("[!0-9a]", "9")); - EXPECT_FALSE(glob_basic_match("[!0-9a]", "a")); - - EXPECT_TRUE (glob_basic_match("[-q]", "-")); - EXPECT_TRUE (glob_basic_match("[-q]", "q")); - EXPECT_FALSE(glob_basic_match("[-q]", "p")); - - EXPECT_TRUE (glob_basic_match("[q-]", "-")); - EXPECT_TRUE (glob_basic_match("[q-]", "q")); - EXPECT_FALSE(glob_basic_match("[-q]", "p")); - - EXPECT_TRUE (glob_basic_match("[!a-]", "b")); - EXPECT_FALSE(glob_basic_match("[!a-]", "a")); - EXPECT_FALSE(glob_basic_match("[!a-]", "-")); - - EXPECT_TRUE (glob_basic_match("[]-]z", "-z")); - EXPECT_TRUE (glob_basic_match("[]-]z", "]z")); - EXPECT_FALSE(glob_basic_match("[]-]z", "[z")); - - EXPECT_TRUE( glob_basic_match("?", "a")); - EXPECT_TRUE( glob_basic_match("?", " ")); - EXPECT_FALSE(glob_basic_match("?", " a")); - EXPECT_FALSE(glob_basic_match("?", "")); - - EXPECT_TRUE( glob_basic_match("a*b", "ab")); - EXPECT_TRUE( glob_basic_match("a*b", "abb")); - EXPECT_TRUE( glob_basic_match("a*b", "a01234b")); - EXPECT_FALSE(glob_basic_match("a*b", "ac")); - EXPECT_FALSE(glob_basic_match("a*b", "cb")); - - EXPECT_TRUE( glob_basic_match("a****b", "ab")); - EXPECT_TRUE( glob_basic_match("a****b", "a01b")); - EXPECT_FALSE(glob_basic_match("a****b", "a01")); - - EXPECT_TRUE( glob_basic_match("\\*", "*")); - EXPECT_FALSE(glob_basic_match("\\*", "z")); - - EXPECT_TRUE( glob_basic_match("\\?", "?")); - EXPECT_FALSE(glob_basic_match("\\?", "z")); - - EXPECT_TRUE( glob_basic_match("\\[p-q]", "[p-q]")); - EXPECT_FALSE(glob_basic_match("\\[p-q]", "\\p")); - EXPECT_TRUE( glob_basic_match("\\\\[p-q]", "\\p")); - - // Check for dodgy exponential behaviour... - EXPECT_FALSE( glob_basic_match( - "*x*x*x*x*x*x*x*x*x*x*x*x*x*x_", - "xxxxxxxxxxxxxxxxxxxxxxxxxxxx")); - - // Check special-case handling for initial period: - - EXPECT_FALSE(glob_basic_match("*", ".foo")); - EXPECT_TRUE( glob_basic_match(".*", ".foo")); - - EXPECT_FALSE(glob_basic_match("??", ".f")); - EXPECT_TRUE( glob_basic_match(".?", ".f")); - - EXPECT_FALSE(glob_basic_match("[.a][.a][.a]", "..a")); - EXPECT_TRUE( glob_basic_match(".[.a][.a]", "..a")); - - EXPECT_TRUE( glob_basic_match("\\.*", ".foo")); -} - -struct mock_fs_provider { - using action_type = glob_fs_provider::action_type; - - std::unordered_multimap<path, path> tree; - - mock_fs_provider() = default; - - template <typename... Tail> - mock_fs_provider(const char* name, Tail... tail) { - add_path(name, tail...); - } - - void add_path() const {} - - template <typename... Tail> - void add_path(const char* name, Tail... tail) { - if (!*name) return; - - const char* p = *name=='/'? name+1: name; - - for (const char* c = p; *p; p = c++) { - while (*c && *c!='/') ++c; - - std::pair<path, path> entry{path{name, p}, path{name, c}}; - if (tree.find(entry.second)==tree.end()) { - tree.insert(entry); - tree.insert({entry.second, path{}}); - } - } - - add_path(tail...); - } - - static path canonical_key(const path& p) { - return p.has_filename()? p: p.parent_path(); - } - - bool is_directory(const path& p) const { - auto r = tree.equal_range(canonical_key(p)); - return r.first!=r.second && std::next(r.first)!=r.second; - } - - bool exists(const path& p) const { - return tree.find(canonical_key(p))!=tree.end(); - } - - void for_each_directory(const path& p, action_type action) const { - auto r = tree.equal_range(canonical_key(p)); - for (auto i = r.first; i!=r.second; ++i) { - auto entry = i->second; - if (entry.empty()) continue; - - auto s = tree.equal_range(entry); - if (s.first!=s.second && std::next(s.first)!=s.second) action(entry); - } - } - - void for_each_entry(const path& p, action_type action) const { - auto r = tree.equal_range(canonical_key(p)); - for (auto i = r.first; i!=r.second; ++i) { - auto entry = i->second; - if (!entry.empty()) action(entry); - } - } -}; - -std::vector<path> sort_glob(const char* pattern, const glob_fs_provider& fs) { - auto results = glob_basic(pattern, fs); - std::sort(results.begin(), results.end()); - return results; -} - -TEST(glob, simple_patterns) { - glob_fs_provider fs = mock_fs_provider{"fish", "fop", "barf", "barry", "tip"}; - - using pvector = std::vector<path>; - - EXPECT_EQ(pvector({"fish", "fop"}), sort_glob("f*", fs)); - EXPECT_EQ(pvector({"fop", "tip"}), sort_glob("??p", fs)); - EXPECT_EQ(pvector(), sort_glob("x*", fs)); -} - -TEST(glob, literals) { - glob_fs_provider fs = mock_fs_provider{ - "/abc/def/ghi", - "/abc/de", - "/xyz", - "pqrs/tuv/w", - "pqrs/tuv/wxy" - }; - - using pvector = std::vector<path>; - - EXPECT_EQ(pvector({"/abc/def/ghi"}), sort_glob("/abc/def/ghi", fs)); - EXPECT_EQ(pvector({"/abc/def/ghi"}), sort_glob("/*/def/ghi", fs)); - EXPECT_EQ(pvector({"/abc/def/ghi"}), sort_glob("/*/*/ghi", fs)); - EXPECT_EQ(pvector({"/abc/def/ghi"}), sort_glob("/abc/def/*", fs)); - EXPECT_EQ(pvector({"/abc/def/ghi"}), sort_glob("/abc/*/*", fs)); - EXPECT_EQ(pvector({"pqrs/tuv/w", "pqrs/tuv/wxy"}), sort_glob("pqrs/tuv/w*", fs)); - EXPECT_EQ(pvector({"pqrs/tuv/w", "pqrs/tuv/wxy"}), sort_glob("*/tuv/w*", fs)); - EXPECT_EQ(pvector({"pqrs/tuv/w", "pqrs/tuv/wxy"}), sort_glob("pqrs/t*/w*", fs)); -} - -TEST(glob, multidir) { - glob_fs_provider fs = mock_fs_provider{ - "abc/fab/x", - "abc/fab/yz", - "abc/flib/x", - "abc/flib/yz", - "abc/rib/x", - "def/rib/yz", - "def/fab/x", - "def/fab/yz", - "def/rib/x", - "def/rib/yz" - }; - - using pvector = std::vector<path>; - - EXPECT_EQ(pvector({"abc/fab/x", "abc/flib/x"}), sort_glob("*c/f*b/?", fs)); -} - -TEST(glob, dots) { - glob_fs_provider fs = mock_fs_provider{ - "f.oo/b.ar", "f.oo/.bar", - ".foo/b.ar", ".foo/.bar" - }; - - using pvector = std::vector<path>; - - EXPECT_EQ(pvector({"f.oo/b.ar"}), sort_glob("*/*", fs)); - EXPECT_EQ(pvector({".foo/b.ar"}), sort_glob(".*/*", fs)); - EXPECT_EQ(pvector({"f.oo/b.ar"}), sort_glob("f[.z]oo/*", fs)); - EXPECT_EQ(pvector({"f.oo/b.ar"}), sort_glob("f?oo/*", fs)); - EXPECT_EQ(pvector(), sort_glob("[.z]foo/*", fs)); - EXPECT_EQ(pvector(), sort_glob("?foo/*", fs)); -} - diff --git a/test/unit/test_nml_morphology.cpp b/test/unit/test_nml_morphology.cpp new file mode 100644 index 00000000..3c685a08 --- /dev/null +++ b/test/unit/test_nml_morphology.cpp @@ -0,0 +1,641 @@ +#include <optional> +#include <string> +#include <vector> + +#include <arbor/morph/mprovider.hpp> +#include <arbor/morph/place_pwlin.hpp> +#include <arbor/morph/primitives.hpp> + +#include <arbornml/arbornml.hpp> +#include <arbornml/nmlexcept.hpp> +#include <arbornml/with_xml.hpp> + +#include "../test/gtest.h" +#include "morph_pred.hpp" + +using testing::region_eq; + +TEST(neuroml, with_xml) { + // This (hopefully) will not blow up. + { + arbnml::with_xml scope; + } + { + arbnml::with_xml scope; + } +} + +// Tests for basic morphology scanning and collection from XML. + +TEST(neuroml, morph_badxml) { + std::string illformed = "<wha?"; + + EXPECT_THROW(arbnml::neuroml{illformed}, arbnml::xml_error); +} + +TEST(neuroml, morph_none) { + // No NeuroML doc, with and without declaration: + { + std::string empty1 = R"~(<?xml version="1.0" encoding="UTF-8"?><foo/>)~"; + + arbnml::neuroml N1(empty1); + EXPECT_TRUE(N1.cell_ids().empty()); + EXPECT_TRUE(N1.morphology_ids().empty()); + + std::string empty2 = "<foo/>"; + + arbnml::neuroml N2(empty2); + EXPECT_TRUE(N2.cell_ids().empty()); + EXPECT_TRUE(N2.morphology_ids().empty()); + } + + // Empty NeuroML doc: + { + std::string empty3 = +R"~(<?xml version="1.0" encoding="UTF-8"?> +<neuroml xmlns="http://www.neuroml.org/schema/neuroml2"> +</neuroml>)~"; + + arbnml::neuroml N3(empty3); + EXPECT_TRUE(N3.cell_ids().empty()); + EXPECT_TRUE(N3.morphology_ids().empty()); + } +} + +TEST(neuroml, morph_ids) { + // Two top-level morphologies (m1 and m2); + // cell c3 uses top-level morphology m1; + // cell c4 uses internally defined morphology m4. + std::string doc = +R"~( +<neuroml xmlns="http://www.neuroml.org/schema/neuroml2"> +<morphology id="m1"/> +<morphology id="m2"/> +<cell id="c3" morphology="m1"/> +<cell id="c4"> + <morphology id="m4"/> +</cell> +</neuroml> +)~"; + + using svector = std::vector<std::string>; + + arbnml::neuroml N(doc); + + svector m_ids = N.morphology_ids(); // only top-level! + std::sort(m_ids.begin(), m_ids.end()); + EXPECT_EQ((svector{"m1", "m2"}), m_ids); + + svector c_ids = N.cell_ids(); + std::sort(c_ids.begin(), c_ids.end()); + EXPECT_EQ((svector{"c3", "c4"}), c_ids); + + arbnml::morphology_data mdata; + + mdata = N.cell_morphology("c4").value(); + EXPECT_EQ("c4", mdata.cell_id); + EXPECT_EQ("m4", mdata.id); + + mdata = N.cell_morphology("c3").value(); + EXPECT_EQ("c3", mdata.cell_id); + EXPECT_EQ("m1", mdata.id); + + EXPECT_THROW(N.cell_morphology("mr. bobbins").value(), std::bad_optional_access); +} + +TEST(neuroml, simple_morphologies) { + using namespace arb; + + // Points used in morphology definitions below. + + mpoint p0{1, -2, 3.5, 4}; + mpoint p1{3, -3.5, 4, 4.25}; + mpoint p2{3, -4, 4, 2.25}; + mpoint p3{4.5, -5, 5, 0.25}; + + std::string doc = +R"~( +<neuroml xmlns="http://www.neuroml.org/schema/neuroml2"> +<morphology id="m1"> + <!-- Just one segment between p0 and p1. --> + <segment id="0"> + <proximal x="1" y="-2" z="3.5" diameter="8"/> + <distal x="3" y="-3.5" z="4" diameter="8.5"/> + </segment> +</morphology> +<morphology id="m2"> + <!-- Two segments, implicit proximal, [p0 p1] [p1 p3]. --> + <segment id="0"> + <proximal x="1" y="-2" z="3.5" diameter="8"/> + <distal x="3" y="-3.5" z="4" diameter="8.5"/> + </segment> + <segment id="1"> + <parent segment="0"/> + <distal x="4.5" y="-5" z="5" diameter="0.5"/> + </segment> +</morphology> +<morphology id="m3"> + <!-- Two segments, explicit proximal (with gap) + [p0 p1] [p2 p3]. --> + <segment id="0" name="soma"> + <proximal x="1" y="-2" z="3.5" diameter="8"/> + <distal x="3" y="-3.5" z="4" diameter="8.5"/> + </segment> + <segment id="1"> + <parent segment="0"/> + <proximal x="3" y="-4" z="4" diameter="4.5"/> + <distal x="4.5" y="-5" z="5" diameter="0.5"/> + </segment> +</morphology> +<morphology id="m4"> + <!-- Two segments, meeting at root point p0, + [p0 p1] and [p0 p3]. --> + <segment id="0"> + <proximal x="1" y="-2" z="3.5" diameter="8"/> + <distal x="3" y="-3.5" z="4" diameter="8.5"/> + </segment> + <segment id="1"> + <parent segment="0" fractionAlong="0.0"/> + <distal x="4.5" y="-5" z="5" diameter="0.5"/> + </segment> +</morphology> +<morphology id="m5"> + <!-- Two segments, meeting at root point p0, + [p0 p1] and [p0 p3], but in reverse order. --> + <segment id="1"> + <parent segment="0" fractionAlong="0.0"/> + <distal x="4.5" y="-5" z="5" diameter="0.5"/> + </segment> + <segment id="0"> + <proximal x="1" y="-2" z="3.5" diameter="8"/> + <distal x="3" y="-3.5" z="4" diameter="8.5"/> + </segment> +</morphology> +</neuroml> +)~"; + + arbnml::neuroml N(doc); + + { + arbnml::morphology_data m1 = N.morphology("m1").value(); + label_dict labels; + labels.import(m1.segments, "seg:"); + mprovider P(m1.morphology, labels); + + EXPECT_TRUE(region_eq(P, reg::named("seg:0"), reg::all())); + + place_pwlin G(P.morphology()); + EXPECT_EQ(p0, G.at(mlocation{0, 0})); + EXPECT_EQ(p1, G.at(mlocation{0, 1})); + } + + { + arbnml::morphology_data m2 = N.morphology("m2").value(); + label_dict labels; + labels.import(m2.segments, "seg:"); + mprovider P(m2.morphology, labels); + + mextent seg0_extent = thingify(reg::named("seg:0"), P); + ASSERT_EQ(1u, seg0_extent.size()); + mcable seg0 = seg0_extent.cables()[0]; + + mextent seg1_extent = thingify(reg::named("seg:1"), P); + ASSERT_EQ(1u, seg1_extent.size()); + mcable seg1 = seg1_extent.cables()[0]; + + EXPECT_EQ(0u, seg0.branch); + EXPECT_EQ(0.0, seg0.prox_pos); + + EXPECT_EQ(0u, seg1.branch); + EXPECT_EQ(seg0.dist_pos, seg1.prox_pos); + EXPECT_EQ(1.0, seg1.dist_pos); + + place_pwlin G(P.morphology()); + EXPECT_EQ(p0, G.at(prox_loc(seg0))); + EXPECT_EQ(p1, G.at(dist_loc(seg0))); + EXPECT_EQ(p1, G.at(prox_loc(seg1))); + EXPECT_EQ(p3, G.at(dist_loc(seg1))); + } + + { + arbnml::morphology_data m3 = N.morphology("m3").value(); + label_dict labels; + labels.import(m3.segments, "seg:"); + mprovider P(m3.morphology, labels); + + mextent seg0_extent = thingify(reg::named("seg:0"), P); + ASSERT_EQ(1u, seg0_extent.size()); + mcable seg0 = seg0_extent.cables()[0]; + + mextent seg1_extent = thingify(reg::named("seg:1"), P); + ASSERT_EQ(1u, seg1_extent.size()); + mcable seg1 = seg1_extent.cables()[0]; + + EXPECT_EQ(0u, seg0.branch); + EXPECT_EQ(0.0, seg0.prox_pos); + + EXPECT_EQ(0u, seg1.branch); + EXPECT_EQ(seg0.dist_pos, seg1.prox_pos); + EXPECT_EQ(1.0, seg1.dist_pos); + + place_pwlin G(P.morphology()); + auto seg0_segments = G.segments(seg0_extent); + auto seg1_segments = G.segments(seg1_extent); + + ASSERT_EQ(1u, seg0_segments.size()); + EXPECT_EQ(p0, seg0_segments[0].prox); + EXPECT_EQ(p1, seg0_segments[0].dist); + + ASSERT_EQ(1u, seg1_segments.size()); + EXPECT_EQ(p2, seg1_segments[0].prox); + EXPECT_EQ(p3, seg1_segments[0].dist); + } + { + for (const char* m_name: {"m4", "m5"}) { + arbnml::morphology_data m4_or_5 = N.morphology(m_name).value(); + label_dict labels; + labels.import(m4_or_5.segments, "seg:"); + mprovider P(m4_or_5.morphology, labels); + + mextent seg0_extent = thingify(reg::named("seg:0"), P); + ASSERT_EQ(1u, seg0_extent.size()); + + mextent seg1_extent = thingify(reg::named("seg:1"), P); + ASSERT_EQ(1u, seg1_extent.size()); + + place_pwlin G(P.morphology()); + auto seg0_segments = G.segments(seg0_extent); + auto seg1_segments = G.segments(seg1_extent); + + ASSERT_EQ(1u, seg0_segments.size()); + EXPECT_EQ(p0, seg0_segments[0].prox); + EXPECT_EQ(p1, seg0_segments[0].dist); + + ASSERT_EQ(1u, seg1_segments.size()); + EXPECT_EQ(p0, seg1_segments[0].prox); + EXPECT_EQ(p3, seg1_segments[0].dist); + } + } +} + +TEST(neuroml, segment_errors) { + using namespace arb; + + // Points used in morphology definitions below. + + std::string doc = +R"~( +<neuroml xmlns="http://www.neuroml.org/schema/neuroml2"> +<morphology id="no-proximal"> + <!-- No proximal point for root segment --> + <segment id="0"> + <distal x="3" y="-3.5" z="4" diameter="8.5"/> + </segment> +</morphology> +<morphology id="no-such-parent"> + <!-- Parent of segment 1 does not exist --> + <segment id="0"> + <proximal x="1" y="-2" z="3.5" diameter="8"/> + <distal x="3" y="-3.5" z="4" diameter="8.5"/> + </segment> + <segment id="1"> + <parent segment="2"/> + <distal x="4.5" y="-5" z="5" diameter="0.5"/> + </segment> +</morphology> +<morphology id="cyclic-dependency"> + <!-- Segments 1, 2 3 form a cycle --> + <segment id="0" name="soma"> + <proximal x="1" y="-2" z="3.5" diameter="8"/> + <distal x="3" y="-3.5" z="4" diameter="8.5"/> + </segment> + <segment id="1"> + <parent segment="3"/> + <distal x="4.5" y="-5" z="5" diameter="0.5"/> + </segment> + <segment id="2"> + <parent segment="1"/> + <distal x="5.5" y="-5" z="5" diameter="0.5"/> + </segment> + <segment id="3"> + <parent segment="2"/> + <distal x="6.5" y="-5" z="5" diameter="0.5"/> + </segment> +</morphology> +<morphology id="duplicate-id"> + <!-- Two segments with the same id --> + <segment id="0"> + <proximal x="1" y="-2" z="3.5" diameter="8"/> + <distal x="3" y="-3.5" z="4" diameter="8.5"/> + </segment> + <segment id="1"> + <parent segment="0" fractionAlong="0.0"/> + <distal x="4.5" y="-5" z="5" diameter="0.5"/> + </segment> + <segment id="1"> + <parent segment="0" fractionAlong="0.0"/> + <distal x="7.5" y="-5" z="5" diameter="0.5"/> + </segment> +</morphology> +<morphology id="bad-segment-id"> + <!-- Segment id is a negative number --> + <segment id="-1"> + <proximal x="1" y="-2" z="3.5" diameter="8"/> + <distal x="3" y="-3.5" z="4" diameter="8.5"/> + </segment> +</morphology> +<morphology id="another-bad-segment-id"> + <!-- Segment id is not a whole number --> + <segment id="1.6"> + <proximal x="1" y="-2" z="3.5" diameter="8"/> + <distal x="3" y="-3.5" z="4" diameter="8.5"/> + </segment> +</morphology> +</neuroml> +)~"; + + arbnml::neuroml N(doc); + + EXPECT_THROW(N.morphology("no-proximal").value(), arbnml::bad_segment); + EXPECT_THROW(N.morphology("no-such-parent").value(), arbnml::bad_segment); + EXPECT_THROW(N.morphology("cyclic-dependency").value(), arbnml::cyclic_dependency); + EXPECT_THROW(N.morphology("duplicate-id").value(), arbnml::bad_segment); + EXPECT_THROW(N.morphology("bad-segment-id").value(), arbnml::bad_segment); + EXPECT_THROW(N.morphology("another-bad-segment-id").value(), arbnml::bad_segment); +} + +TEST(neuroml, simple_groups) { + using namespace arb; + + std::string doc = +R"~( +<neuroml xmlns="http://www.neuroml.org/schema/neuroml2"> +<morphology id="m1"> + <segment id="0"> + <proximal x="1" y="1" z="1" diameter="1"/> + <distal x="2" y="2" z="2" diameter="2"/> + </segment> + <segment id="1"> + <parent segment="0"/> + <proximal x="1" y="1" z="1" diameter="1"/> + <distal x="2" y="2" z="2" diameter="2"/> + </segment> + <segment id="2"> + <parent segment="1"/> + <proximal x="1" y="1" z="1" diameter="1"/> + <distal x="2" y="2" z="2" diameter="2"/> + </segment> + <segmentGroup id="group-a"> + <member segment="0"/> + </segmentGroup> + <segmentGroup id="group-b"> + <member segment="2"/> + </segmentGroup> + <segmentGroup id="group-c"> + <member segment="2"/> + <member segment="1"/> + </segmentGroup> +</morphology> +<morphology id="m2"> + <segment id="0"> + <proximal x="1" y="1" z="1" diameter="1"/> + <distal x="2" y="2" z="2" diameter="2"/> + </segment> + <segment id="1"> + <parent segment="0"/> + <proximal x="1" y="1" z="1" diameter="1"/> + <distal x="2" y="2" z="2" diameter="2"/> + </segment> + <segment id="2"> + <parent segment="1"/> + <proximal x="1" y="1" z="1" diameter="1"/> + <distal x="2" y="2" z="2" diameter="2"/> + </segment> + <segment id="3"> + <parent segment="2"/> + <proximal x="1" y="1" z="1" diameter="1"/> + <distal x="2" y="2" z="2" diameter="2"/> + </segment> + <segmentGroup id="group-a"> + <!-- segments 0 and 2 --> + <member segment="0"/> + <include segmentGroup="group-b"/> + </segmentGroup> + <segmentGroup id="group-b"> + <member segment="2"/> + </segmentGroup> + <segmentGroup id="group-c"> + <!-- segments 0, 1 and 2 --> + <member segment="1"/> + <include segmentGroup="group-a"/> + </segmentGroup> + <segmentGroup id="group-d"> + <!-- segments 0, 2 and 3 --> + <include segmentGroup="group-e"/> + <include segmentGroup="group-a"/> + </segmentGroup> + <segmentGroup id="group-e"> + <member segment="3"/> + </segmentGroup> +</morphology> +</neuroml> +)~"; + + arbnml::neuroml N(doc); + using reg::named; + + { + arbnml::morphology_data m1 = N.morphology("m1").value(); + label_dict labels; + labels.import(m1.segments); + labels.import(m1.groups); + mprovider P(m1.morphology, labels); + + EXPECT_TRUE(region_eq(P, named("group-a"), named("0"))); + EXPECT_TRUE(region_eq(P, named("group-b"), named("2"))); + EXPECT_TRUE(region_eq(P, named("group-c"), join(named("2"), named("1")))); + } + { + arbnml::morphology_data m2 = N.morphology("m2").value(); + label_dict labels; + labels.import(m2.segments); + labels.import(m2.groups); + mprovider P(m2.morphology, labels); + + EXPECT_TRUE(region_eq(P, named("group-a"), join(named("0"), named("2")))); + EXPECT_TRUE(region_eq(P, named("group-c"), join(named("0"), named("1"), named("2")))); + EXPECT_TRUE(region_eq(P, named("group-d"), join(named("0"), named("2"), named("3")))); + } +} + +TEST(neuroml, group_errors) { + using namespace arb; + + std::string doc = +R"~( +<neuroml xmlns="http://www.neuroml.org/schema/neuroml2"> +<morphology id="no-such-segment"> + <segment id="0"> + <proximal x="1" y="1" z="1" diameter="1"/> + <distal x="2" y="2" z="2" diameter="2"/> + </segment> + <segmentGroup id="group"> + <member segment="1"/> + </segmentGroup> +</morphology> +<morphology id="no-such-group"> + <segment id="0"> + <proximal x="1" y="1" z="1" diameter="1"/> + <distal x="2" y="2" z="2" diameter="2"/> + </segment> + <segmentGroup id="group"> + <member segment="0"/> + <include segmentGroup="other-group"/> + </segmentGroup> +</morphology> +<morphology id="cyclic-dependency"> + <segment id="0"> + <proximal x="1" y="1" z="1" diameter="1"/> + <distal x="2" y="2" z="2" diameter="2"/> + </segment> + <segmentGroup id="group"> + <include segmentGroup="other-group"/> + </segmentGroup> + <segmentGroup id="other-group"> + <include segmentGroup="group"/> + </segmentGroup> +</morphology> +</neuroml> +)~"; + + arbnml::neuroml N(doc); + + EXPECT_THROW(N.morphology("no-such-segment").value(), arbnml::bad_segment_group); + EXPECT_THROW(N.morphology("no-such-group").value(), arbnml::bad_segment_group); + EXPECT_THROW(N.morphology("cyclic-dependency").value(), arbnml::cyclic_dependency); +} + + +TEST(neuroml, group_paths_subtrees) { + using namespace arb; + + std::string doc = +R"~( +<neuroml xmlns="http://www.neuroml.org/schema/neuroml2"> +<morphology id="m1"> + <segment id="0"> + <proximal x="0" y="0" z="0" diameter="1"/> + <distal x="1" y="0" z="0" diameter="2"/> + </segment> + <segment id="1"> + <parent segment="0" fractionAlong="0.5"/> + <proximal x="0.5" y="0" z="0" diameter="1"/> + <distal x="0.5" y="1" z="0" diameter="2"/> + </segment> + <segment id="2"> + <parent segment="1"/> + <proximal x="0.5" y="1" z="0" diameter="1"/> + <distal x="0.5" y="2" z="0" diameter="2"/> + </segment> + <segment id="3"> + <parent segment="1" fractionAlong="0"/> + <distal x="0.5" y="0" z="3" diameter="2"/> + </segment> + <!-- paths and subTrees are essentially equivalent --> + <segmentGroup id="path01"> + <path> + <from segment="0"/> + <to segment="1"/> + </path> + </segmentGroup> + <segmentGroup id="path12"> + <path> + <from segment="1"/> + <to segment="2"/> + </path> + </segmentGroup> + <segmentGroup id="path10"> + <path> + <from segment="1"/> + <to segment="0"/> + </path> + </segmentGroup> + <segmentGroup id="path0-"> + <path> + <from segment="0"/> + </path> + </segmentGroup> + <segmentGroup id="path1-"> + <path> + <from segment="1"/> + </path> + </segmentGroup> + <segmentGroup id="path-3"> + <path> + <to segment="3"/> + </path> + </segmentGroup> + <segmentGroup id="subTree01"> + <subTree> + <from segment="0"/> + <to segment="1"/> + </subTree> + </segmentGroup> + <segmentGroup id="subTree12"> + <subTree> + <from segment="1"/> + <to segment="2"/> + </subTree> + </segmentGroup> + <segmentGroup id="subTree10"> + <subTree> + <from segment="1"/> + <to segment="0"/> + </subTree> + </segmentGroup> + <segmentGroup id="subTree0-"> + <subTree> + <from segment="0"/> + </subTree> + </segmentGroup> + <segmentGroup id="subTree1-"> + <subTree> + <from segment="1"/> + </subTree> + </segmentGroup> + <segmentGroup id="subTree-3"> + <subTree> + <to segment="3"/> + </subTree> + </segmentGroup> +</morphology> +</neuroml> +)~"; + + arbnml::neuroml N(doc); + + arbnml::morphology_data m1 = N.morphology("m1").value(); + label_dict labels; + labels.import(m1.segments); + labels.import(m1.groups); + mprovider P(m1.morphology, labels); + + // Note: paths/subTrees respect segment parent–child relationships, + // not morphological distality. + + using reg::named; + + EXPECT_TRUE(region_eq(P, named("path01"), join(named("0"), named("1")))); + EXPECT_TRUE(region_eq(P, named("path12"), join(named("1"), named("2")))); + EXPECT_TRUE(region_eq(P, named("path10"), reg::nil())); + EXPECT_TRUE(region_eq(P, named("path0-"), reg::all())); + EXPECT_TRUE(region_eq(P, named("path1-"), join(named("1"), named("2"), named("3")))); + EXPECT_TRUE(region_eq(P, named("path-3"), join(named("0"), named("1"), named("3")))); + + EXPECT_TRUE(region_eq(P, named("subTree01"), join(named("0"), named("1")))); + EXPECT_TRUE(region_eq(P, named("subTree12"), join(named("1"), named("2")))); + EXPECT_TRUE(region_eq(P, named("subTree10"), reg::nil())); + EXPECT_TRUE(region_eq(P, named("subTree0-"), reg::all())); + EXPECT_TRUE(region_eq(P, named("subTree1-"), join(named("1"), named("2"), named("3")))); + EXPECT_TRUE(region_eq(P, named("subTree-3"), join(named("0"), named("1"), named("3")))); +} -- GitLab