diff --git a/.github/workflows/test-everything.yml b/.github/workflows/test-everything.yml index 143d04c1f812e3099d342fea4aae333704a3cca7..43f84aae12de5f495999a5f6e9711c327fd2388d 100644 --- a/.github/workflows/test-everything.yml +++ b/.github/workflows/test-everything.yml @@ -97,11 +97,11 @@ jobs: if: ${{ startsWith(matrix.config.os, 'ubuntu') }} run: | sudo apt-get update - sudo apt-get install -y libxml2-dev libopenmpi-dev ninja-build ccache ${{ matrix.config.cc }} + sudo apt-get install -y libopenmpi-dev ninja-build ccache ${{ matrix.config.cc }} - name: "MacOS: get build dependencies" if: ${{ startsWith(matrix.config.os, 'macos') }} run: | - brew install libxml2 openmpi ninja ccache + brew install openmpi ninja ccache - name: Set up cmake uses: jwlawson/actions-setup-cmake@v1.13 with: @@ -151,7 +151,7 @@ jobs: export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" mkdir build cd build - cmake .. -GNinja -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_C_COMPILER=$CC -DARB_WITH_PYTHON=ON -DARB_VECTORIZE=${{ matrix.config.simd }} -DPython3_EXECUTABLE=`which python` -DARB_WITH_MPI=${{ matrix.config.mpi }} -DARB_USE_BUNDLED_LIBS=ON -DARB_WITH_NEUROML=ON -DARB_WITH_ASSERTIONS=ON + cmake .. -GNinja -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_C_COMPILER=$CC -DARB_WITH_PYTHON=ON -DARB_VECTORIZE=${{ matrix.config.simd }} -DPython3_EXECUTABLE=`which python` -DARB_WITH_MPI=${{ matrix.config.mpi }} -DARB_USE_BUNDLED_LIBS=ON -DARB_WITH_ASSERTIONS=ON ninja -j4 tests examples pyarb html cd - - if: ${{ matrix.variant == 'shared' }} @@ -160,7 +160,7 @@ jobs: export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" mkdir build cd build - cmake .. -GNinja -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_C_COMPILER=$CC -DARB_WITH_PYTHON=ON -DARB_VECTORIZE=${{ matrix.config.simd }} -DPython3_EXECUTABLE=`which python` -DARB_WITH_MPI=${{ matrix.config.mpi }} -DARB_USE_BUNDLED_LIBS=ON -DARB_WITH_NEUROML=ON -DARB_WITH_ASSERTIONS=ON -DBUILD_SHARED_LIBS=ON + cmake .. -GNinja -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_C_COMPILER=$CC -DARB_WITH_PYTHON=ON -DARB_VECTORIZE=${{ matrix.config.simd }} -DPython3_EXECUTABLE=`which python` -DARB_WITH_MPI=${{ matrix.config.mpi }} -DARB_USE_BUNDLED_LIBS=ON -DARB_WITH_ASSERTIONS=ON -DBUILD_SHARED_LIBS=ON ninja -j4 tests examples pyarb html cd - - name: Install arbor diff --git a/.gitmodules b/.gitmodules index ca46a262d5db14196120ca1bdb374881f5d0c320..cd515e55423d824434cdb01dc86558a8a87ecd7d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -22,4 +22,7 @@ path = ext/googletest url = https://github.com/google/googletest.git branch = main - +[submodule "ext/pugixml"] + path = ext/pugixml + url = https://github.com/zeux/pugixml.git + branch = master diff --git a/ATTRIBUTIONS.md b/ATTRIBUTIONS.md index 1f7b99e1df39d5ac5dd87b762f91bd561ebfab10..6229eeba5f7f680af11d4409929f4ae926f51c90 100644 --- a/ATTRIBUTIONS.md +++ b/ATTRIBUTIONS.md @@ -58,6 +58,13 @@ BSD License. http://llvm.org/ +# PugiXML + +A simple and fast C++ XML processing library. +MIT License. + +https://github.com/zeux/pugixml + ## Transcendentals intrinsics The numerical algorithms for the transcendentals intrinsics are based on the diff --git a/CMakeLists.txt b/CMakeLists.txt index bb84ea9d2ec7de54adba103572ba210ee321ffbc..8fa2643dd685e12f4b7140727fdf60cd370db7b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,12 +82,6 @@ option(ARB_WITH_PROFILING "use built-in profiling" OFF) option(ARB_WITH_ASSERTIONS "enable arb_assert() assertions in code" OFF) -#---------------------------------------------------------- -# NeuroML support library: -#---------------------------------------------------------- - -option(ARB_WITH_NEUROML "build NeuroML support library" OFF) - #---------------------------------------------------------- # Python front end for Arbor: #---------------------------------------------------------- @@ -227,6 +221,7 @@ install(FILES mechanisms/BuildModules.cmake DESTINATION ${ARB_INSTALL_DATADIR}) # Creates interface libraries `ext-json`, `ext-tinyopt` and `ext-random123` cmake_dependent_option(ARB_USE_BUNDLED_FMT "Use bundled FMT lib." ON "ARB_USE_BUNDLED_LIBS" OFF) +cmake_dependent_option(ARB_USE_BUNDLED_PUGIXML "Use bundled XML lib." ON "ARB_USE_BUNDLED_LIBS" OFF) cmake_dependent_option(ARB_USE_BUNDLED_JSON "Use bundled Niels Lohmann's json library." ON "ARB_USE_BUNDLED_LIBS" OFF) if(NOT ARB_USE_BUNDLED_JSON) diff --git a/arbor/include/CMakeLists.txt b/arbor/include/CMakeLists.txt index 461b7d5428a26628f6bfc71881f05a750bdc6ed9..bf912d3c1c4f203c7b3ec3f301e0a4be4a8c7059 100644 --- a/arbor/include/CMakeLists.txt +++ b/arbor/include/CMakeLists.txt @@ -38,10 +38,8 @@ if(ARB_WITH_GPU) # define ARB_GPU_ENABLED in version.hpp list(APPEND arb_features GPU) endif() -if(ARB_WITH_NEUROML) - # define ARB_NEUROML_ENABLED in version.hpp - list(APPEND arb_features NEUROML) -endif() +# This is provided for legacy reasons, now always on. +list(APPEND arb_features NEUROML) if(ARB_WITH_PROFILING) # define ARB_PROFILE_ENABLED in version.hpp list(APPEND arb_features PROFILE) diff --git a/arborio/CMakeLists.txt b/arborio/CMakeLists.txt index 6db6b46fc27f2a8930dd95387a67b2854c9b5482..1e40f43922f68684569c9f8d5851493e74b30218 100644 --- a/arborio/CMakeLists.txt +++ b/arborio/CMakeLists.txt @@ -5,19 +5,19 @@ set(arborio-sources cableio.cpp cv_policy_parse.cpp label_parse.cpp -) -if(ARB_WITH_NEUROML) - list(APPEND arborio-sources - neuroml.cpp - nml_parse_morphology.cpp - xml.cpp - xmlwrap.cpp) - find_package(LibXml2 REQUIRED) -endif() - + neuroml.cpp + nml_parse_morphology.cpp) add_library(arborio ${arborio-sources}) +if (ARB_USE_BUNDLED_PUGIXML) + target_include_directories(arborio PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../ext/pugixml/src>) + target_compile_definitions(arborio PRIVATE PUGIXML_HEADER_ONLY) +else() + find_package(pugixml REQUIRED) + target_link_libraries(arborio PUBLIC pugixml::pugixml) +endif() + add_library(arborio-public-headers INTERFACE) add_library(arborio-private-headers INTERFACE) @@ -29,15 +29,9 @@ target_include_directories(arborio-public-headers INTERFACE target_include_directories(arborio-private-headers INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>") -if(ARB_WITH_NEUROML) - target_link_libraries(arborio PUBLIC arbor arborio-public-headers LibXml2::LibXml2) - list(APPEND arbor_export_dependencies "LibXml2") - set(arbor_export_dependencies "${arbor_export_dependencies}" PARENT_SCOPE) - list(APPEND arbor_supported_components "neuroml") - set(arbor_supported_components "${arbor_supported_components}" PARENT_SCOPE) -else () - target_link_libraries(arborio PUBLIC arbor arborio-public-headers) -endif() +target_link_libraries(arborio PUBLIC arbor arborio-public-headers) +list(APPEND arbor_supported_components "neuroml") +set(arbor_supported_components "${arbor_supported_components}" PARENT_SCOPE) target_link_libraries(arborio PRIVATE arbor-config-defs arborio-private-deps) diff --git a/arborio/include/arborio/neuroml.hpp b/arborio/include/arborio/neuroml.hpp index 3646548e6f773b106a3414994434041f406b4b90..7d889581b4c948fd25fe141699a84e9790f97301 100644 --- a/arborio/include/arborio/neuroml.hpp +++ b/arborio/include/arborio/neuroml.hpp @@ -14,6 +14,15 @@ namespace arborio { +// `non_negative` represents the corresponding constraint in the schema, which +// can mean any arbitrarily large non-negative integer value. +// +// A faithful representation would use an arbitrary-size 'big' integer or a +// string, but for ease of implementation (and a bit more speed) we restrict it +// to whatever we can fit in an unsigned long long. + +using non_negative = unsigned long long; + // Common base-class for neuroml run-time errors. struct ARB_SYMBOL_VISIBLE neuroml_exception: std::runtime_error { neuroml_exception(const std::string& what_arg): @@ -28,34 +37,30 @@ struct ARB_SYMBOL_VISIBLE nml_no_document: neuroml_exception { // Generic error parsing NeuroML data. struct ARB_SYMBOL_VISIBLE nml_parse_error: neuroml_exception { - nml_parse_error(const std::string& error_msg, unsigned line = 0); + nml_parse_error(const std::string& error_msg); std::string error_msg; - unsigned line; }; // NeuroML morphology error: improper segment data, e.g. bad id specification, // segment parent does not exist, fractionAlong is out of bounds, missing // required <proximal> data. struct ARB_SYMBOL_VISIBLE nml_bad_segment: neuroml_exception { - nml_bad_segment(unsigned long long segment_id, unsigned line = 0); + nml_bad_segment(unsigned long long segment_id); unsigned long long segment_id; - unsigned line; }; // NeuroML morphology error: improper segmentGroup data, e.g. malformed // element data, missing referenced segments or groups, etc. struct ARB_SYMBOL_VISIBLE nml_bad_segment_group: neuroml_exception { - nml_bad_segment_group(const std::string& group_id, unsigned line = 0); + nml_bad_segment_group(const std::string& group_id); std::string group_id; - unsigned line; }; // A segment or segmentGroup ultimately refers to itself via `parent` // or `include` elements respectively. struct ARB_SYMBOL_VISIBLE nml_cyclic_dependency: neuroml_exception { - nml_cyclic_dependency(const std::string& id, unsigned line = 0); + nml_cyclic_dependency(const std::string& id); std::string id; - unsigned line; }; // Collect and parse morphology elements from XML. diff --git a/arborio/include/arborio/xml.hpp b/arborio/include/arborio/xml.hpp deleted file mode 100644 index bedc54fdf2ee5ca8f084f38fcf578a4eaeed1566..0000000000000000000000000000000000000000 --- a/arborio/include/arborio/xml.hpp +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include <stdexcept> -#include <string> - -#include <arborio/export.hpp> - -// XML related interfaces deriving from the underlying XML implementation library. - -namespace arborio { - -// Generic XML error (as reported by libxml2). -struct ARB_SYMBOL_VISIBLE xml_error: std::runtime_error { - xml_error(const std::string& xml_error_msg, unsigned line = 0); - std::string xml_error_msg; - unsigned line; -}; - -// Wrap initialization and cleanup of libxml2 library. -// -// Use of `with_xml` is only necessary if arborio is being -// used in a multithreaded context and the client code is -// not managing libxml2 initialization and cleanup. - -struct ARB_ARBORIO_API with_xml { - with_xml(); - ~with_xml(); - - with_xml(with_xml&&); - with_xml(const with_xml&) = delete; - - with_xml& operator=(const with_xml&) = delete; - with_xml& operator=(with_xml&&) = delete; - - bool run_cleanup_; -}; - -} // namespace arborio diff --git a/arborio/neuroml.cpp b/arborio/neuroml.cpp index 31dcc80777c40ffaa1ae42dffe5aeb96493f32ea..554b7a6db494c5f489554f28f6a8a13175bd49a0 100644 --- a/arborio/neuroml.cpp +++ b/arborio/neuroml.cpp @@ -6,75 +6,47 @@ #include <arborio/neuroml.hpp> #include "nml_parse_morphology.hpp" -#include "xmlwrap.hpp" +#include "xml.hpp" using std::optional; using std::nullopt; -using namespace arborio::xmlwrap; - namespace arborio { -static std::string fmt_error(const char* prefix, const std::string& err, unsigned line) { - return prefix + (line==0? err: "line " + std::to_string(line) + ": " + err); -} - nml_no_document::nml_no_document(): neuroml_exception("no NeuroML document to parse") {} -nml_parse_error::nml_parse_error(const std::string& error_msg, unsigned line): - neuroml_exception(fmt_error("parse error: ", error_msg, line)), - error_msg(error_msg), - line(line) +nml_parse_error::nml_parse_error(const std::string& error_msg): + neuroml_exception("parse error: " + error_msg), + error_msg(error_msg) {} -nml_bad_segment::nml_bad_segment(unsigned long long segment_id, unsigned line): - neuroml_exception( - fmt_error( - "bad morphology segment: ", - "segment "+(segment_id+1==0? "unknown": "\""+std::to_string(segment_id)+"\""), - line)), - segment_id(segment_id), - line(line) +nml_bad_segment::nml_bad_segment(unsigned long long segment_id): + neuroml_exception("bad morphology segment: id="+(segment_id+1==0? "unknown": "\""+std::to_string(segment_id)+"\"")), + segment_id(segment_id) {} -nml_bad_segment_group::nml_bad_segment_group(const std::string& group_id, unsigned line): +nml_bad_segment_group::nml_bad_segment_group(const std::string& group_id): neuroml_exception( - fmt_error( - "bad morphology segmentGroup: ", - "segmentGroup id "+(group_id.empty()? "unknown": "\""+group_id+"\""), - line)), - group_id(group_id), - line(line) + "bad morphology segmentGroup: id=" + (group_id.empty()? "unknown": "\""+group_id+"\"")), + group_id(group_id) {} -nml_cyclic_dependency::nml_cyclic_dependency(const std::string& id, unsigned line): - neuroml_exception( - fmt_error( - "cyclic dependency: ", - "element id \""+id+"\"", - line)), - id(id), - line(line) +nml_cyclic_dependency::nml_cyclic_dependency(const std::string& id): + neuroml_exception("cyclic dependency: id \""+id+"\""), + id(id) {} struct ARB_ARBORIO_API neuroml_impl { xml_doc doc; + std::string raw; neuroml_impl() {} - explicit neuroml_impl(std::string text) { - xml_error_scope err; - doc = xml_doc(text); - } - - xml_xpathctx make_context() const { - if (!doc) throw nml_no_document{}; - - auto ctx = xpath_context(doc); - ctx.register_ns("nml", "http://www.neuroml.org/schema/neuroml2"); - return ctx; + explicit neuroml_impl(std::string text): raw{text} { + auto res = doc.load_buffer_inplace(raw.data(), raw.size()+1); + if (res.status) throw nml_parse_error{res.description()}; } }; @@ -87,53 +59,39 @@ neuroml& neuroml::operator=(neuroml&&) = default; neuroml::~neuroml() = default; std::vector<std::string> neuroml::cell_ids() const { - xml_error_scope err; + auto matches = impl_->doc.select_nodes("//neuroml/cell/@id"); std::vector<std::string> result; - - auto ctx = impl_->make_context(); - auto matches = ctx.query("//nml:neuroml/nml:cell/@id"); - result.reserve(matches.size()); - for (auto node: matches) { - result.push_back(std::string(node.content())); + for (const auto& it: matches) { + result.push_back(it.attribute().as_string()); } - return result; } std::vector<std::string> neuroml::morphology_ids() const { - xml_error_scope err; + auto matches = impl_->doc.select_nodes("//neuroml/morphology/@id"); std::vector<std::string> result; - - auto ctx = impl_->make_context(); - auto matches = ctx.query("//nml:neuroml/nml:morphology/@id"); - result.reserve(matches.size()); - for (auto node: matches) { - result.push_back(std::string(node.content())); + for (const auto& it: matches) { + result.push_back(it.attribute().as_string()); } - return result; } optional<nml_morphology_data> neuroml::morphology(const std::string& morph_id, enum neuroml_options::values options) const { - xml_error_scope err; - auto ctx = impl_->make_context(); - auto matches = ctx.query("//nml:neuroml/nml:morphology[@id="+xpath_escape(morph_id)+"]"); - - return matches.empty()? nullopt: optional(nml_parse_morphology_element(ctx, matches[0], options)); + auto id = xpath_escape(morph_id); + auto query = "//neuroml/morphology[@id=" + id + "]"; + auto match = impl_->doc.select_node(query.data()).node(); + if (match.empty()) return {}; + return nml_parse_morphology_element(match, options); } optional<nml_morphology_data> neuroml::cell_morphology(const std::string& cell_id, enum neuroml_options::values options) const { - xml_error_scope err; - auto ctx = impl_->make_context(); - auto matches = ctx.query( - "( //nml:neuroml/nml:morphology[@id=string((//nml:neuroml/nml:cell[@id="+xpath_escape(cell_id)+"]/@morphology)[1])] | " - " //nml:neuroml/nml:cell[@id="+xpath_escape(cell_id)+"]/nml:morphology )[1]"); - - if (matches.empty()) return nullopt; - - nml_morphology_data M = nml_parse_morphology_element(ctx, matches[0], options); + auto id = "//neuroml/cell[@id=" + xpath_escape(cell_id) + "]"; + auto query = "(//neuroml/morphology[@id=string((" + id + "/@morphology)[1])] | " + id + "/morphology)[1]"; + auto match = impl_->doc.select_node(query.data()).node(); + if (match.empty()) return nullopt; + nml_morphology_data M = nml_parse_morphology_element(match, options); M.cell_id = cell_id; return M; } diff --git a/arborio/nml_parse_morphology.cpp b/arborio/nml_parse_morphology.cpp index 24203ea18451fdcdb7ef3004d00bce8139429503..84201e49d5d29af033343489ccb72a51bf38663e 100644 --- a/arborio/nml_parse_morphology.cpp +++ b/arborio/nml_parse_morphology.cpp @@ -17,7 +17,7 @@ #include <arborio/neuroml.hpp> #include "nml_parse_morphology.hpp" -#include "xmlwrap.hpp" +#include "xml.hpp" using std::optional; using arb::region; @@ -25,7 +25,6 @@ using arb::util::expected; using arb::util::unexpected; using namespace std::literals; -using namespace arborio::xmlwrap; namespace arborio { @@ -132,22 +131,6 @@ expected<std::vector<std::size_t>, cycle_detected> topological_sort(std::size_t return depth; } -template <typename T> -struct propx { - explicit propx(xml_node n, const char* attr, optional<T> dflt = std::nullopt) { - if (auto x = n.prop<T>(attr, dflt)) { - result_ = std::move(x.value()); - } - else { - throw nml_parse_error(x.error().error, x.error().line); - } - } - - operator T() && { return std::move(result_); } - operator T() const& { return result_; } - T result_; -}; - } // namespace @@ -162,10 +145,6 @@ struct neuroml_segment { optional<non_negative> parent_id; double along = 1; bool spherical = false; - - // Data for error reporting: - unsigned line = 0; - // Topological depth: std::size_t tdepth = 0; }; @@ -174,9 +153,6 @@ struct neuroml_segment_group_subtree { // Interval determined by segment ids. // Represents both `<path>` and `<subTree>` elements. optional<non_negative> from, to; - - // Data for error reporting: - unsigned line = 0; }; struct neuroml_segment_group_info { @@ -184,9 +160,6 @@ struct neuroml_segment_group_info { std::vector<non_negative> segments; std::vector<std::string> includes; std::vector<neuroml_segment_group_subtree> subtrees; - - // Data for error reporting: - unsigned line = 0; }; // Processing of parsed segment/segmentGroup data: @@ -227,14 +200,14 @@ struct neuroml_segment_tree { // Build index, throw on duplicate id. for (std::size_t i = 0; i<n_seg; ++i) { if (!index_.insert({segments_[i].id, i}).second) { - throw nml_bad_segment(segments_[i].id, segments_[i].line); + throw nml_bad_segment(segments_[i].id); } } // Check parent relationship is sound. for (const auto& s: segments_) { if (s.parent_id && !index_.count(*s.parent_id)) { - throw nml_bad_segment(s.id, s.line); // No such parent id. + throw nml_bad_segment(s.id); // No such parent id. } } @@ -251,12 +224,12 @@ struct neuroml_segment_tree { } else { const auto& seg = segments_[depths.error().index]; - throw nml_cyclic_dependency(nl_to_string(seg.id), seg.line); + throw nml_cyclic_dependency(std::to_string(seg.id)); } std::sort(segments_.begin(), segments_.end(), [](auto& a, auto& b) { return a.tdepth<b.tdepth; }); // Check for multiple roots: - if (n_seg>1 && segments_[1].tdepth==0) throw nml_bad_segment(segments_[1].id, segments_[1].line); + if (n_seg>1 && segments_[1].tdepth==0) throw nml_bad_segment(segments_[1].id); // Update index: for (std::size_t i = 0; i<n_seg; ++i) { @@ -285,11 +258,8 @@ static std::unordered_map<std::string, std::vector<non_negative>> evaluate_segme // Expand subTree/path specifications: for (auto& g: groups) { - unsigned line = g.line; try { for (auto& subtree: g.subtrees) { - line = subtree.line; - if (!subtree.from && !subtree.to) { // Matches all segments: for (auto& seg: segtree) { @@ -326,7 +296,7 @@ static std::unordered_map<std::string, std::vector<non_negative>> evaluate_segme } } catch (...) { - throw nml_bad_segment_group(g.id, line); + throw nml_bad_segment_group(g.id); } } @@ -334,7 +304,7 @@ static std::unordered_map<std::string, std::vector<non_negative>> evaluate_segme std::unordered_map<std::string, std::size_t> index; for (std::size_t i = 0; i<n_group; ++i) { if (!index.insert({groups[i].id, i}).second) { - throw nml_bad_segment_group(groups[i].id, groups[i].line); + throw nml_bad_segment_group(groups[i].id); } } @@ -344,7 +314,7 @@ static std::unordered_map<std::string, std::vector<non_negative>> evaluate_segme const auto& includes = groups[i].includes; index_to_included_indices[i].reserve(includes.size()); for (auto& id: includes) { - if (!index.count(id)) throw nml_bad_segment_group(groups[i].id, groups[i].line); + if (!index.count(id)) throw nml_bad_segment_group(groups[i].id); index_to_included_indices[i].push_back(index.at(id)); } } @@ -358,7 +328,7 @@ static std::unordered_map<std::string, std::vector<non_negative>> evaluate_segme } else { const auto& group = groups[depths.error().index]; - throw nml_cyclic_dependency(group.id, group.line); + throw nml_cyclic_dependency(group.id); } // Accumulate included group segments, following topological order. @@ -396,7 +366,7 @@ static arb::stitched_morphology construct_morphology(const neuroml_segment_tree& // Construct result from topologically sorted segments. for (const auto& s: segtree) { - arb::mstitch stitch(nl_to_string(s.id), s.distal); + arb::mstitch stitch(std::to_string(s.id), s.distal); double along = s.along; if (s.spherical) { @@ -418,7 +388,7 @@ static arb::stitched_morphology construct_morphology(const neuroml_segment_tree& } if (s.parent_id) { - builder.add(stitch, nl_to_string(s.parent_id.value()), along); + builder.add(stitch, std::to_string(s.parent_id.value()), along); } else { builder.add(stitch); @@ -428,57 +398,49 @@ static arb::stitched_morphology construct_morphology(const neuroml_segment_tree& return arb::stitched_morphology(std::move(builder)); } -nml_morphology_data nml_parse_morphology_element(xml_xpathctx ctx, xml_node morph, enum neuroml_options::values options) { +nml_morphology_data nml_parse_morphology_element(const xml_node& morph, + enum neuroml_options::values options) { using namespace neuroml_options; nml_morphology_data M; - M.id = propx<std::string>(morph, "id", ""s); + M.id = get_attr<std::string>(morph, "id"); std::vector<neuroml_segment> segments; - // TODO: precompile xpath queries for nml:distal, nml:proximal, nml:parent. - const char* q_parent = "./nml:parent"; - const char* q_proximal = "./nml:proximal"; - const char* q_distal = "./nml:distal"; + const char* q_parent = "./parent"; + const char* q_proximal = "./proximal"; + const char* q_distal = "./distal"; - for (auto n: ctx.query(morph, "./nml:segment")) { + for (auto xn: morph.select_nodes("./segment")) { + auto n = xn.node(); neuroml_segment seg; - int line = n.line(); // for error context! - try { - seg.id = -1; - seg.id = propx<non_negative>(n, "id"); - std::string name = propx<std::string>(n, "name", ""s); - - auto result = ctx.query(n, q_parent); - if (!result.empty()) { - line = result[0].line(); - seg.parent_id = propx<non_negative>(result[0], "segment"); - seg.along = propx<double>(result[0], "fractionAlong", 1.0); + seg.id = get_attr<unsigned>(n, "id"); + auto name = get_attr<std::string>(n, "name", ""); + auto parent = n.select_node(q_parent).node(); + if (!parent.empty()) { + seg.parent_id = get_attr<unsigned>(parent, "segment"); + seg.along = get_attr<double>(parent, "fractionAlong", 1.0); } - result = ctx.query(n, q_proximal); - if (!result.empty()) { - line = result[0].line(); - double x = propx<double>(result[0], "x"); - double y = propx<double>(result[0], "y"); - double z = propx<double>(result[0], "z"); - double diameter = propx<double>(result[0], "diameter"); - if (diameter<0) throw nml_bad_segment(seg.id, n.line()); - + auto prox = n.select_node(q_proximal).node(); + if (!prox.empty()) { + double x = get_attr<double>(prox, "x"); + double y = get_attr<double>(prox, "y"); + double z = get_attr<double>(prox, "z"); + double diameter = get_attr<double>(prox, "diameter"); + if (diameter<0) throw nml_bad_segment(seg.id); seg.proximal = arb::mpoint{x, y, z, diameter/2}; } - if (!seg.parent_id && !seg.proximal) throw nml_bad_segment(seg.id, n.line()); - - result = ctx.query(n, q_distal); - if (!result.empty()) { - line = result[0].line(); - double x = propx<double>(result[0], "x"); - double y = propx<double>(result[0], "y"); - double z = propx<double>(result[0], "z"); - double diameter = propx<double>(result[0], "diameter"); - if (diameter<0) throw nml_bad_segment(seg.id, n.line()); + if (!seg.parent_id && !seg.proximal) throw nml_bad_segment(seg.id); + auto dist = n.select_node(q_distal).node(); + if (!dist.empty()) { + double x = get_attr<double>(dist, "x"); + double y = get_attr<double>(dist, "y"); + double z = get_attr<double>(dist, "z"); + double diameter = get_attr<double>(dist, "diameter"); + if (diameter<0) throw nml_bad_segment(seg.id); seg.distal = arb::mpoint{x, y, z, diameter/2}; // Set spherical flag if we have no parent, options has allow_spherical_root flag, @@ -486,14 +448,12 @@ nml_morphology_data nml_parse_morphology_element(xml_xpathctx ctx, xml_node morp seg.spherical = (options & allow_spherical_root) && !seg.parent_id && seg.proximal && seg.proximal.value()==seg.distal; } else { - throw nml_bad_segment(seg.id, n.line()); + throw nml_bad_segment(seg.id); } } catch (nml_parse_error& e) { - throw nml_bad_segment(seg.id, line); + throw nml_bad_segment(seg.id); } - - seg.line = n.line(); segments.push_back(std::move(seg)); } @@ -502,65 +462,53 @@ nml_morphology_data nml_parse_morphology_element(xml_xpathctx ctx, xml_node morp // Compute tree now to save further parsing if something goes wrong. neuroml_segment_tree segtree(std::move(segments)); - // TODO: precompile xpath queries for following: - const char* q_member = "./nml:member"; - const char* q_include = "./nml:include"; - const char* q_path = "./nml:path"; - const char* q_from = "./nml:from"; - const char* q_to = "./nml:to"; - const char* q_subtree = "./nml:subTree"; + const char* q_member = "./member"; + const char* q_include = "./include"; + const char* q_path = "./path"; + const char* q_from = "./from"; + const char* q_to = "./to"; + const char* q_subtree = "./subTree"; std::vector<neuroml_segment_group_info> groups; - for (auto n: ctx.query(morph, "./nml:segmentGroup")) { + for (auto xn: morph.select_nodes("./segmentGroup")) { + auto n = xn.node(); neuroml_segment_group_info group; - int line = n.line(); // for error context! - try { - group.id = propx<std::string>(n, "id"); - for (auto elem: ctx.query(n, q_member)) { - line = elem.line(); - auto seg_id = propx<non_negative>(elem, "segment"); - if (!segtree.contains(seg_id)) throw nml_bad_segment_group(group.id, line); - group.segments.push_back(propx<non_negative>(elem, "segment")); + group.id = get_attr<std::string>(n, "id"); + for (auto xelem: n.select_nodes(q_member)) { + auto elem = xelem.node(); + auto seg_id = get_attr<unsigned>(elem, "segment");; + if (!segtree.contains(seg_id)) throw nml_bad_segment_group(group.id); + group.segments.push_back(get_attr<unsigned>(elem, "segment")); } - for (auto elem: ctx.query(n, q_include)) { - line = elem.line(); - group.includes.push_back(propx<std::string>(elem, "segmentGroup")); + for (auto xelem: n.select_nodes(q_include)) { + auto elem = xelem.node(); + group.includes.push_back(get_attr<std::string>(elem, "segmentGroup")); } // Treat `<path>` and `<subTree>` identically: - auto parse_subtree_elem = [&](auto& elem) { - line = elem.line(); - auto froms = ctx.query(elem, q_from); - auto tos = ctx.query(elem, q_to); + auto parse_subtree_elem = [&](const auto& elem) { + auto froms = elem.select_node(q_from).node(); + auto tos = elem.select_node(q_to).node(); neuroml_segment_group_subtree sub; - sub.line = line; - if (!froms.empty()) { - line = froms[0].line(); - sub.from = propx<non_negative>(froms[0], "segment"); - } - if (!tos.empty()) { - line = tos[0].line(); - sub.to = propx<non_negative>(tos[0], "segment"); - } - + if (!froms.empty()) sub.from = get_attr<unsigned>(froms, "segment"); + if (!tos.empty()) sub.to = get_attr<unsigned>(tos, "segment"); return sub; }; - for (auto elem: ctx.query(n, q_path)) { - group.subtrees.push_back(parse_subtree_elem(elem)); + for (auto elem: n.select_nodes(q_path)) { + group.subtrees.push_back(parse_subtree_elem(elem.node())); } - for (auto elem: ctx.query(n, q_subtree)) { - group.subtrees.push_back(parse_subtree_elem(elem)); + for (auto elem: n.select_nodes(q_subtree)) { + group.subtrees.push_back(parse_subtree_elem(elem.node())); } } catch (nml_parse_error& e) { - throw nml_bad_segment_group(group.id, line); + throw nml_bad_segment_group(group.id); } - group.line = n.line(); groups.push_back(std::move(group)); } @@ -586,7 +534,7 @@ nml_morphology_data nml_parse_morphology_element(xml_xpathctx ctx, xml_node morp arb::region r; auto ids = name_to_ids.equal_range(name); for (auto i = ids.first; i!=ids.second; ++i) { - r = join(std::move(r), M.segments.regions().at(nl_to_string(i->second))); + r = join(std::move(r), M.segments.regions().at(std::to_string(i->second))); } M.named_segments.set(name, std::move(r)); } @@ -594,7 +542,7 @@ nml_morphology_data nml_parse_morphology_element(xml_xpathctx ctx, xml_node morp for (const auto& [group_id, segment_ids]: M.group_segments) { arb::region r; for (auto id: segment_ids) { - r = join(std::move(r), M.segments.regions().at(nl_to_string(id))); + r = join(std::move(r), M.segments.regions().at(std::to_string(id))); } M.groups.set(group_id, std::move(r)); } diff --git a/arborio/nml_parse_morphology.hpp b/arborio/nml_parse_morphology.hpp index 80e88a0594a52331caf6b316c7d577e9162729a4..eb185604cd767b638133d62cc4dcce3dd691bf84 100644 --- a/arborio/nml_parse_morphology.hpp +++ b/arborio/nml_parse_morphology.hpp @@ -1,10 +1,11 @@ #pragma once #include <arborio/neuroml.hpp> -#include "xmlwrap.hpp" + +#include <pugixml.hpp> namespace arborio { -nml_morphology_data nml_parse_morphology_element(xmlwrap::xml_xpathctx ctx, xmlwrap::xml_node morph, enum neuroml_options::values); +nml_morphology_data nml_parse_morphology_element(const pugi::xml_node& morph, enum neuroml_options::values); } // namespace arborio diff --git a/arborio/xml.cpp b/arborio/xml.cpp deleted file mode 100644 index 6e8a19e4a37722290405dc1e39e2dd6b2e928f87..0000000000000000000000000000000000000000 --- a/arborio/xml.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include <stdexcept> -#include <string> - -#include <libxml/parser.h> - -#include <arborio/xml.hpp> - -// Implementations for exposed libxml2 interfaces. - -namespace arborio { - -xml_error::xml_error(const std::string& xml_error_msg, unsigned line): - std::runtime_error(std::string("xml error: ") + (line? "line " + std::to_string(line): "") + xml_error_msg), - xml_error_msg(xml_error_msg), - line(line) -{} - -with_xml::with_xml(): run_cleanup_(true) { - // Initialize before any multithreaded access by library or client code. - xmlInitParser(); -} - -with_xml::with_xml(with_xml&& other): run_cleanup_(other.run_cleanup_) { - other.run_cleanup_ = false; -} - -with_xml::~with_xml() { - if (run_cleanup_) { - xmlCleanupParser(); - } -} - -} // namespace arborio diff --git a/arborio/xml.hpp b/arborio/xml.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2d0d3f3282eedbcdd9dd3d7741ca3f4394c8c806 --- /dev/null +++ b/arborio/xml.hpp @@ -0,0 +1,75 @@ +#pragma once + +#include <arborio/neuroml.hpp> + +#include <iostream> +#include <string> +#include <optional> + +#include <pugixml.hpp> + +namespace arborio { + +using xml_node = pugi::xml_node; +using xml_doc = pugi::xml_document; + +template<typename T> +T get_attr(const xml_node& n, + const std::string& a, + std::optional<T> d={}) { + auto attr = n.attribute(a.data()); + if (attr.empty()) { + if (!d) throw nml_parse_error("Required attribute " + a + " is empty/absent."); + return *d; + } + std::string val = attr.value(); + if constexpr (std::is_same_v<T, double>) { + return std::stod(val); + } + if constexpr (std::is_same_v<T, std::string>) { + return val; + } + if constexpr (std::is_unsigned_v<T>) { + std::size_t n = 0; + long long int i = std::stoull(val, &n); + // Either we didn't consume all chars -- eg 1.6 -- or we consumed all, but the result is negative. + // This _should_ be considered a bug in std::toull... + if (n != val.size() || i < 0) throw nml_parse_error("Couldn't parse unsigned integer: " + val); + return i; + } + throw std::runtime_error{"Not implemented"}; +} + +inline +std::string xpath_escape(const std::string& x) { + auto npos = std::string::npos; + if (x.find_first_of("'")==npos) { + return "'"+x+"'"; + } + else if (x.find_first_of("\"")==npos) { + return "\""+x+"\""; + } + else { + std::string r = "concat("; + std::string::size_type i = 0; + for (;;) { + auto j = x.find_first_of("'", i); + r += "'"; + r.append(x, i, j==npos? j: j-i); + r += "'"; + if (j==npos) break; + r += ",\""; + i = j+1; + j = x.find_first_not_of("'",i); + r.append(x, i, j==npos? j: j-i); + r += "\""; + if (j==npos) break; + r += ","; + i = j+1; + } + r += ")"; + return r; + } +} + +} diff --git a/arborio/xmlwrap.cpp b/arborio/xmlwrap.cpp deleted file mode 100644 index 6f791b2b922d4d34d1f498b014c9733c3093dd49..0000000000000000000000000000000000000000 --- a/arborio/xmlwrap.cpp +++ /dev/null @@ -1,128 +0,0 @@ -#include <cerrno> -#include <charconv> -#include <cstdarg> -#include <cstddef> -#include <cstdlib> -#include <cstring> -#include <limits> -#include <locale> -#include <sstream> -#include <string> -#include <vector> - -#include <libxml/xmlerror.h> - -#include "xmlwrap.hpp" - -namespace arborio { -namespace xmlwrap { - -namespace detail { - -// Note: widely missing library support for floating point std::from_chars. - -template <typename V> -bool from_cstr_(V& out, const char* s) { - auto [p, ec] = std::from_chars(s, s+std::strlen(s), out); - return ec==std::errc{} && !*p; -} - -template <typename V> -std::string nl_to_string_(const V& v, unsigned digits_estimate) { - std::vector<char> digits(digits_estimate); - for (;;) { - if (auto [p, ec] = std::to_chars(digits.data(), digits.data()+digits.size(), v); ec == std::errc{}) { - return std::string(digits.data(), p); - } - - digits_estimate *= 2; - digits.resize(digits_estimate); - } -} - -} // namespace detail - - -bool nl_from_cstr(std::string& out, const char* content) { - out = content; - return true; -} - -bool nl_from_cstr(long long& out, const char* content) { - return detail::from_cstr_(out, content); -} - -bool nl_from_cstr(non_negative& out, const char* content) { - return detail::from_cstr_(out, content); -} - -bool nl_from_cstr(double& out, const char* content) { - // Note: library support is widely missing for floating point std::from_chars, - // so can't just do: - // return detail::from_cstr_(out, content); - // - // std::strtod() will use the current C locale, so that's out: anticipating the - // decimal point character is a race condition. - - std::istringstream is{std::string(content)}; - is.imbue(std::locale::classic()); - - double x; - is >> x; - if (!is || !is.eof()) return false; - out = x; - return true; -} - -std::string nl_to_string(non_negative n) { - return detail::nl_to_string_(n, std::numeric_limits<non_negative>::digits10); -} - -std::string nl_to_string(long long n) { - return detail::nl_to_string_(n, 1+std::numeric_limits<long long>::digits10); -} - -void throw_on_xml_generic_error(void *, const char* msg, ...) { - va_list va, vb; - va_start(va, msg); - va_copy(vb, va); - - int r = vsnprintf(nullptr, 0, msg, va); - va_end(va); - - std::string err(r+1, '\0'); - vsnprintf(&err[0], err.size(), msg, vb); - va_end(vb); - - throw ::arborio::xml_error(err); -} - -void throw_on_xml_structured_error(void *ctx, xmlErrorPtr errp) { - if (errp->level!=1) { // ignore warnings! - std::string msg(errp->message); - if (!msg.empty() && msg.back()=='\n') msg.pop_back(); - throw ::arborio::xml_error(msg, errp->line); - } -} - -xml_error_scope::xml_error_scope() { - generic_handler_ = xmlGenericError; - generic_context_ = xmlGenericErrorContext; - - structured_handler_ = xmlStructuredError; - structured_context_ = xmlStructuredErrorContext; - - xmlSetGenericErrorFunc(nullptr, &throw_on_xml_generic_error); - xmlSetStructuredErrorFunc((void*)this, &throw_on_xml_structured_error); -} - -xml_error_scope::~xml_error_scope() { - xmlGenericError = generic_handler_; - xmlGenericErrorContext = generic_context_; - - xmlStructuredError = structured_handler_; - xmlStructuredErrorContext = structured_context_; -} - -} // namespace xmlwrap -} // namespace arborio diff --git a/arborio/xmlwrap.hpp b/arborio/xmlwrap.hpp deleted file mode 100644 index a95942a9171af76700ace4b66d033a7ee377026c..0000000000000000000000000000000000000000 --- a/arborio/xmlwrap.hpp +++ /dev/null @@ -1,329 +0,0 @@ -#pragma once - -// RAII and iterator wrappers for some libxml2 objects. - -#include <any> -#include <cstddef> -#include <cstdlib> -#include <memory> -#include <optional> -#include <string> -#include <utility> - -#include <libxml/parser.h> -#include <libxml/xpath.h> -#include <libxml/xpathInternals.h> - -#include <arbor/util/expected.hpp> -#include <arborio/xml.hpp> - -namespace arborio { -namespace xmlwrap { - -struct bad_property { - std::string error; - unsigned line = 0; -}; - -// `non_negative` represents the corresponding constraint in the schema, which -// can mean any arbitrarily large non-negtative integer value. -// -// A faithful representation would use an arbitrary-size 'big' integer or -// a string, but for ease of implementation (and a bit more speed) we restrict -// it to whatever we can fit in an unsigned long long. - -using non_negative = unsigned long long; - -// String wrappers around `to_chars` for attribute types we care about. -// (`nl` is meant to stand for "no locale".) - -std::string nl_to_string(non_negative); -std::string nl_to_string(long long); - -// Parse attribute content as the representation of a specific type. -// Return true if successful. - -bool nl_from_cstr(std::string& out, const char* content); -bool nl_from_cstr(non_negative& out, const char* content); -bool nl_from_cstr(long long& out, const char* content); -bool nl_from_cstr(double& out, const char* content); - -// Wrap xmlChar* NUL-terminated string that requires deallocation. - -struct xml_string { - explicit xml_string(const xmlChar* p): p_(p, xml_string::deleter) {} - - operator const char*() const { - return reinterpret_cast<const char*>(p_.get()); - } - -private: - std::shared_ptr<const xmlChar> p_; - static void deleter(const xmlChar* x) { xmlFree((void*)x); } -}; - -// Wrappers below are generally constructed with two arguments, -// a pointer corresponding to the libxml2 object, and a dependency -// object (typically shared_ptr<X> for some X) that guards the -// lifetime of another object upon which this depends. - -template <typename XmlType> -void trivial_dealloc(XmlType*) {} - -template <typename XmlType, void (*xml_dealloc)(XmlType *) = &trivial_dealloc<XmlType>> -struct xml_base { - xml_base(XmlType* p, std::any depends = {}): - p_(p, xml_dealloc), - depends_(std::move(depends)) - {} - -protected: - // Access to raw wrapped pointer type. - XmlType* get() const { return p_.get(); } - - // Copy of shared_ptr<> governing lifetime of referenced object. - auto self() const { return p_; } - - // Copy of dependency object. - std::any depends() const { return depends_; } - -private: - std::shared_ptr<XmlType> p_; - std::any depends_; -}; - -// xmlNode RAII wrapper (non-owning). - -struct xml_node: protected xml_base<xmlNode> { - using base = xml_base<xmlNode>; - explicit xml_node(xmlNode* p, std::any depends): - base(p, std::move(depends)) - {} - - bool is_element() const { return get()->type==XML_ELEMENT_NODE; } - bool is_attr() const { return get()->type==XML_ATTRIBUTE_NODE; } - xml_string content() const { return xml_string(xmlNodeGetContent(get())); } - unsigned line() const { return get()->line; } - - bool has_prop(const char* name) const { return xmlHasProp(get(), (const xmlChar*)name); } - - template <typename T> - arb::util::expected<T, bad_property> prop(const char* name, std::optional<T> default_value = std::nullopt) const { - using arb::util::unexpected; - - xmlChar* c = xmlGetProp(get(), (const xmlChar*)(name)); - if (!c) { - if (default_value) return default_value.value(); - else return unexpected(bad_property{"missing required attribute", get()->line}); - } - - T v; - if (nl_from_cstr(v, reinterpret_cast<const char*>(c))) return v; - else return unexpected(bad_property{"attribute type error", get()->line}); - } - - using base::get; // (unsafe access) -}; - -// xmlNodeSet RAII wrapper; resource lifetime is governed by an xmlXPathObject. - -struct xml_nodeset: protected xml_base<xmlNodeSet> { - using base = xml_base<xmlNodeSet>; - - xml_nodeset(): xml_nodeset(nullptr, std::any{}) {} - - xml_nodeset(xmlNodeSet* p, std::any depends): - base(p, std::move(depends)) - {} - - struct iterator { - using value_type = xml_node; - using difference_type = std::ptrdiff_t; - using reference = value_type; // yeah, not a real random access iterator - using pointer = value_type*; - using iterator_category = std::random_access_iterator_tag; - - explicit iterator(xmlNodePtr* p, const xml_nodeset* ns_ptr): p_(p), ns_ptr_(ns_ptr) {} - - bool operator==(iterator i) const { return p_==i.p_; } - bool operator!=(iterator i) const { return p_!=i.p_; } - bool operator<(iterator i) const { return p_<i.p_; } - bool operator<=(iterator i) const { return p_<=i.p_; } - bool operator>(iterator i) const { return p_>i.p_; } - bool operator>=(iterator i) const { return p_>=i.p_; } - - reference operator*() const { return ns_ptr_->mk_xml_node(*p_); } - - struct ptr_proxy { - xml_node inner_; - const xml_node* operator->() const { return &inner_; } - }; - ptr_proxy operator->() const { return ptr_proxy{ns_ptr_->mk_xml_node(*p_)}; } - - iterator& operator++() { return ++p_, *this; } - iterator operator++(int) { - iterator x(*this); - return ++p_, x; - } - - iterator& operator--() { return --p_, *this; } - iterator operator--(int) { - iterator x(*this); - return --p_, x; - } - - iterator& operator+=(ptrdiff_t n) { return p_ += n, *this; } - iterator& operator-=(ptrdiff_t n) { return p_ -= n, *this; } - reference operator[](ptrdiff_t n) { return *(*this+n); } - - iterator operator+(ptrdiff_t n) { - iterator i(*this); - return i += n; - } - friend iterator operator+(ptrdiff_t n, iterator i) { return i+n; } - - iterator operator-(ptrdiff_t n) { - iterator i(*this); - return i -= n; - } - friend iterator operator-(ptrdiff_t n, iterator i) { return i-n; } - - ptrdiff_t operator=(iterator i) { return p_-i.p_; } - - private: - xmlNode** p_; - const xml_nodeset* ns_ptr_; - }; - - iterator begin() const { return iterator{get()? get()->nodeTab: nullptr, this}; } - iterator end() const { return iterator{get()? get()->nodeTab+get()->nodeNr: nullptr, this}; } - - iterator::reference operator[](int i) const { return begin()[i]; } - std::size_t size() const { return get()? get()->nodeNr: 0u; } - bool empty() const { return size()==0u; } - -private: - // Construct xml_node wrapper with the same lifetime dependency as this node set. - xml_node mk_xml_node(xmlNode* p) const { - return xml_node{p, depends()}; - } -}; - -// xmlPathObj RAII wrapper; lifetime of xmlPathObj governs lifetime of node set. - -struct xml_xpathobj: protected xml_base<xmlXPathObject, xmlXPathFreeObject> { - using base = xml_base<xmlXPathObject, xmlXPathFreeObject>; - - explicit xml_xpathobj(xmlXPathObject* p, std::any depends): - base(p, std::move(depends)) - {} - - xml_nodeset nodes() { - return get()->type==XPATH_NODESET? xml_nodeset{get()->nodesetval, self()}: xml_nodeset{}; - } -}; - -// xmlXPathContext RAII wrapper. - -struct xml_xpathctx: protected xml_base<xmlXPathContext, xmlXPathFreeContext> { - using base = xml_base<xmlXPathContext, xmlXPathFreeContext>; - - explicit xml_xpathctx(xmlXPathContext* p, std::any depends): - base(p, std::move(depends)) - {} - - void register_ns(const char* ns, const char* uri) { - xmlXPathRegisterNs(get(), (const xmlChar*)ns, (const xmlChar*)uri); - } - - xml_nodeset query(const char* q) { - return xml_xpathobj{xmlXPathEvalExpression((xmlChar*)q, get()), self()}.nodes(); - } - xml_nodeset query(const std::string& q) { return query(q.c_str()); } - - xml_nodeset query(xml_node context, const char* q) { - return xml_xpathobj{xmlXPathNodeEval(context.get(), (xmlChar*)q, get()), self()}.nodes(); - } - xml_nodeset query(xml_node context, const std::string& q) { return query(std::move(context), q.c_str()); } -}; - -// xmlDoc RAII wrapper. - -struct xml_doc: protected xml_base<xmlDoc, xmlFreeDoc> { - using base = xml_base<xmlDoc, xmlFreeDoc>; - - xml_doc(): xml_doc(nullptr) {} - - explicit xml_doc(std::string the_doc): - // 'Pretty sure' we don't need to keep the string after the tree is built. Pretty sure. - xml_doc(xmlReadMemory(the_doc.c_str(), the_doc.length(), "", nullptr, xml_options)) - {} - - // TODO: (... add other ctors ...) - - friend xml_xpathctx xpath_context(const xml_doc& doc) { - return xml_xpathctx{xmlXPathNewContext(doc.get()), doc.self()}; - } - - explicit operator bool() const { return get(); } - -private: - explicit xml_doc(xmlDoc* p): base(p) {} - static constexpr int xml_options = XML_PARSE_NOENT | XML_PARSE_NONET; -}; - -// Escape a string for use as string expression within an XPath expression. - -inline std::string xpath_escape(const std::string& x) { - auto npos = std::string::npos; - if (x.find_first_of("'")==npos) { - return "'"+x+"'"; - } - else if (x.find_first_of("\"")==npos) { - return "\""+x+"\""; - } - else { - std::string r = "concat("; - std::string::size_type i = 0; - for (;;) { - auto j = x.find_first_of("'", i); - r += "'"; - r.append(x, i, j==npos? j: j-i); - r += "'"; - if (j==npos) break; - r += ",\""; - i = j+1; - j = x.find_first_not_of("'",i); - r.append(x, i, j==npos? j: j-i); - r += "\""; - if (j==npos) break; - r += ","; - i = j+1; - } - r += ")"; - return r; - } -} - -// Error management: -// -// Use xml_error_scope to catch libxml2 warnings and errors. The -// xml_error_scope object will restore the original error handling -// behaviour on destruction. -// -// Errors are turned into arborio::xml_error exceptions and thrown, -// while warnings are ignored (libxml2 warnings are highly innocuous). - -struct xml_error_scope { - xml_error_scope(); - ~xml_error_scope(); - - xmlGenericErrorFunc generic_handler_; - void* generic_context_; - - xmlStructuredErrorFunc structured_handler_; - void* structured_context_; -}; - -} // namespace xmlwrap -} // namespace arborio diff --git a/doc/contrib/dependency-management.rst b/doc/contrib/dependency-management.rst index e36ad2e2b19ebef829193f0f31a4316c97d1068c..ad783e269266770cfee855800a93129e14cc79ac 100644 --- a/doc/contrib/dependency-management.rst +++ b/doc/contrib/dependency-management.rst @@ -7,7 +7,7 @@ Arbor relies on a (small) number of dependencies. We can distinguish three kinds 0. Source management dependencies: Git. 1. Build dependencies. E.g. CMake, compilers like GCC or CUDA. -2. Linking dependencies. E.g. MPI, libxml2. +2. Linking dependencies. E.g. MPI. 3. Source dependencies. These are present as `git submodules <https://git-scm.com/docs/git-submodule>`_ or as copy in ``ext/``. Their use is optional: users who need integration with their package manager (e.g. apt, spack, yum) can link to those versions instead. Note that the actual dependencies of your build configuration may vary. diff --git a/doc/cpp/morphology.rst b/doc/cpp/morphology.rst index 68e7e9f1f34d0e337d19cd9aa1ab7f5ef914e995..1de631528ce54e847e352191eebd316557a7533f 100644 --- a/doc/cpp/morphology.rst +++ b/doc/cpp/morphology.rst @@ -579,39 +579,8 @@ NeuroML ------- Arbor offers limited support for models described in `NeuroML version 2 -<https://neuroml.org/neuromlv2>`_. This is not built by default, but can be -enabled by providing the `-DARB_WITH_NEUROML=ON` argument to CMake at configuration -time (see :ref:`install-neuroml`). This will build the ``arborio`` libray with -neuroml support. - -The ``arborio`` library uses `libxml2 <http://xmlsoft.org/>`_ for XML parsing. -Applications using NeuroML through ``arborio`` will need to link against -``libxml2`` in addition, though this is performed implicitly within CMake -projects that add ``arbor::arborio`` as a link library. - -All classes and functions provided by the ``arborio`` library are provided in -the ``arborio`` namespace. - -Libxml2 interface -^^^^^^^^^^^^^^^^^ - -Libxml2 offers threadsafe XML parsing, but not by default. If the application -uses NeuromML support from ``arborio`` in an unthreaded context, or has already -explicitly initialized ``libxml2``, nothing more needs to be done. Otherwise, -the ``libxml2`` function ``xmlInitParser()`` must be called explicitly. - -``arborio`` provides a helper guard object for this purpose, defined -in ``arborio/xml.hpp``: - -.. cpp:namespace:: arborio - -.. cpp:class:: with_xml - - An RAII guard object that calls ``xmlInitParser()`` upon construction, and - ``xmlCleanupParser()`` upon destruction. The constructor takes no parameters. - -Unhandleable exceptions from ``libxml2`` are forwarded via an exception -``xml_error``, derived from ``std::runtime_error``. +<https://neuroml.org/neuromlv2>`_. All classes and functions provided by the +``arborio`` library are provided in the ``arborio`` namespace. NeuroML2 morphology support ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -624,7 +593,7 @@ An implementation limitation restricts valid segment id values to those which can be represented by an ``unsigned long long`` value. ``arborio::neuroml`` methods can throw an ``arborio::xml_error`` in the instance that -the underlying libxml2 library reports a problem that cannot be handled by the ``arborio`` +the underlying XML library reports a problem that cannot be handled by the ``arborio`` library. Otherwise, exceptions derived from ``aborio::neuroml_exception`` can be thrown when encountering problems interpreting the NeuroML document (see :ref:`cppneuromlexceptions` below). diff --git a/doc/dependencies.csv b/doc/dependencies.csv index 09b3f141c6a89ddaff1f8f4aaf24b723ab7f0faf..72a5c296d62fe67f724deb277f6a8f505d945197 100644 --- a/doc/dependencies.csv +++ b/doc/dependencies.csv @@ -8,13 +8,13 @@ Build option/target,Tool name,Minimum version,Notes,Update criteria --,Apple Clang,11,Apple LLVM version 11.0.0 (clang-1100.0.33.8), ARB_GPU,Hip Clang,ROCm 3.9,HIP support is currently experimental., ARB_GPU,CUDA,11.0,,"* It is available on all of the target HPC systems (Piz Daint, Juwels Booster)" -ARB_WITH_NEUROML,libxml2,,, ARB_WITH_MPI,MPI,,Many MPI implementations are supported., "ARB_WITH_MPI, ARB_WITH_PYTHON ",mpi4py,3.1.0,Minimum version depends on compatiblity with your Python and MPI versions. 3.1.0 is the first release to support Python > 3.8., html, .. literalinclude:: ../requirements.txt,,See ``doc/requirements.txt``, unit,googletest,,submodule ``ext/gtest.*``, bench,Google-benchmark,,submodule ``ext/google-benchmark``, --,json,,submodule ``ext/json``, +--,pugi,,submodule ``ext/pugixml``, --,random123,,submodule ``ext/random123``, --,fmt,,submodule ``ext/fmt``, --,tinyopt,,source copy ``ext/tinyopt``, diff --git a/doc/fileformat/neuroml.rst b/doc/fileformat/neuroml.rst index ba91b3f9c7ac8524477d07a4a15a4e155252379e..f919e27e9f5331f80197392a02d9415f74698ff3 100644 --- a/doc/fileformat/neuroml.rst +++ b/doc/fileformat/neuroml.rst @@ -8,12 +8,10 @@ NeuroML2 "NeuroML2", "``nml``", "✓", "✗" -Arbor offers limited support for models described in `NeuroML version 2 <https://neuroml.org/neuromlv2>`_. -This is not built by default (see :ref:`NeuroML support <install-neuroml>` for instructions on how -to build Arbor with NeuroML). - -Once support is enabled, Arbor is able to parse and check the validity of morphologies described in NeuroML files, -and present the encoded data to the user. This is more than a simple `segment tree`. +Arbor offers limited support for models described in `NeuroML version 2 +<https://neuroml.org/neuromlv2>`_. Arbor is able to parse and check the validity +of morphologies described in NeuroML files, and present the encoded data to the +user. This is more than a simple `segment tree`. NeuroML can encode in the same file multiple top-level morphologies, as well as cells: diff --git a/doc/install/build_install.rst b/doc/install/build_install.rst index a3c964ed57ba319aa9a5727b95efd57f4bfd7e27..c83e57a4c90f1dfb7dda96ecfa201ba58c05011b 100644 --- a/doc/install/build_install.rst +++ b/doc/install/build_install.rst @@ -148,11 +148,9 @@ Python package is recommended. See :ref:`install-python` for more information. NeuroML ~~~~~~~ -Arbor supports reading cell morphologies defined in NeuroML version 2 through -an additional support library ``arborio``. This library requires -`libxml2 <http://xmlsoft.org>`_ for the parsing of NeuroML2 XML, if it is built -with NeuroML support enabled. -See :ref:`install-neuroml` for more information. +Arbor supports reading cell morphologies defined in NeuroML version 2 through an +additional support library ``arborio``. See :ref:`install-neuroml` for more +information. Boost ~~~~~ @@ -534,14 +532,9 @@ NeuroML support --------------- Arbor has limited support for NeuroML version 2 through an additional library -``arborio``. This library will be built with NeuroML support if the option -``-DARB_WITH_NEUROML=ON`` is passed to CMake at configuration time. -``arborio`` depends upon the the ``libxml2`` library for XML parsing. - -Arbor will additionally install the static library ``libarborio.a``. -Applications using this functionality will need to link -against this library in addition to the main Arbor library and ``libxml2``. -For example: +``arborio``. Arbor will additionally install the static library +``libarborio.a``. Applications using this functionality will need to link +against this library in addition to the main Arbor library. For example: .. code-block:: bash diff --git a/doc/install/python.rst b/doc/install/python.rst index 10f1d279a4e175335910d94689060cc0598e6b93..2f18d59d2486686c4415e81443da9aa7a58484ec 100644 --- a/doc/install/python.rst +++ b/doc/install/python.rst @@ -42,9 +42,9 @@ You are now ready to use Arbor! You can continue reading these documentation pag for any other platforms than listed above, ``pip`` will attempt a build from source and thus require these packages as well. - * Ubuntu/Debian: ``git cmake gcc python3-dev python3-pip libxml2-dev`` - * Fedora/CentOS/OpenSuse: ``git cmake gcc-c++ python3-devel python3-pip libxml2-devel`` - * MacOS: get ``brew`` `here <https://brew.sh>`_ and run ``brew install cmake clang python3 libxml2`` + * Ubuntu/Debian: ``git cmake gcc python3-dev python3-pip`` + * Fedora/CentOS/OpenSuse: ``git cmake gcc-c++ python3-devel python3-pip`` + * MacOS: get ``brew`` `here <https://brew.sh>`_ and run ``brew install cmake clang python3`` * Windows: the simplest way is to use `WSL <https://docs.microsoft.com/en-us/windows/wsl/install-win10>`_ and then follow the instructions for Ubuntu. .. _in_python_custom: @@ -89,8 +89,6 @@ where you place the arguments separated by space inside the quotes. The following flags can be used to configure the installation: -* ``ARB_WITH_NEUROML=<ON|OFF>``: Enable support for NeuroML2 morphologies, - requires ``libxml2`` library. Default ``OFF`` * ``ARB_WITH_MPI=<ON|OFF>``: Enable MPI support, requires MPI library. Default ``OFF``. If you intend to use ``mpi4py``, you need to install the package before building Arbor, as binding it requires access to its headers. diff --git a/doc/python/hardware.rst b/doc/python/hardware.rst index af51f6b2b1d5f6ef2af7a9372f905840dcece4e4..ecfa7ca8cf9fef289ba1771fea0a7f28f31b0a10 100644 --- a/doc/python/hardware.rst +++ b/doc/python/hardware.rst @@ -27,7 +27,6 @@ Helper functions for checking cmake or environment variables, as well as configu * ``ARB_GPU_ENABLED`` * ``ARB_VECTORIZE`` * ``ARB_WITH_PROFILING`` - * ``ARB_WITH_NEUROML`` * ``ARB_USE_BUNDLED_LIBS`` * ``ARB_VERSION`` * ``ARB_ARCH`` @@ -39,7 +38,7 @@ Helper functions for checking cmake or environment variables, as well as configu import arbor arbor.config() - {'mpi': True, 'mpi4py': True, 'gpu': False, 'vectorize': True, 'profiling': True, 'neuroml': True, 'bundled': True, 'version': '0.5.3-dev', 'arch': 'native'} + {'mpi': True, 'mpi4py': True, 'gpu': False, 'vectorize': True, 'profiling': True, 'bundled': True, 'version': '0.5.3-dev', 'arch': 'native'} .. function:: mpi_init() diff --git a/ext/pugixml b/ext/pugixml new file mode 160000 index 0000000000000000000000000000000000000000..08b3433180727ea2f78fe02e860a08471db1e03c --- /dev/null +++ b/ext/pugixml @@ -0,0 +1 @@ +Subproject commit 08b3433180727ea2f78fe02e860a08471db1e03c diff --git a/pyproject.toml b/pyproject.toml index 3c897394adc3dbc33c6b2d2263844a6b67c77536..ec34cf9a3399404c62397914ee35c5eb75e23b88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,9 +71,7 @@ MACOSX_DEPLOYMENT_TARGET = "10.15" [tool.cibuildwheel.linux] archs = ["x86_64"] manylinux-x86_64-image = "manylinux2014" -before-all = "yum -y install libxml2-devel" repair-wheel-command = "auditwheel repair -w {dest_dir} {wheel} && python3 /project/scripts/patchwheel.py {dest_dir}" [[tool.cibuildwheel.overrides]] select = "*-musllinux*" -before-all = "apk add libxml2-dev" diff --git a/python/config.cpp b/python/config.cpp index 62d604eeb873627e4908745a4efa40435562e399..c692ee2e5914955df8b51b6b1486114dcae9cf62 100644 --- a/python/config.cpp +++ b/python/config.cpp @@ -46,11 +46,7 @@ pybind11::dict config() { #else dict[pybind11::str("profiling")] = pybind11::bool_(false); #endif -#ifdef ARB_NEUROML_ENABLED dict[pybind11::str("neuroml")] = pybind11::bool_(true); -#else - dict[pybind11::str("neuroml")] = pybind11::bool_(false); -#endif #ifdef ARB_BUNDLED_ENABLED dict[pybind11::str("bundled")] = pybind11::bool_(true); #else diff --git a/python/morphology.cpp b/python/morphology.cpp index b0ba4367455d733bb50810ef410f2ca214d55ec6..a344b066b19f7795e81ff4ce80bcd265806026af 100644 --- a/python/morphology.cpp +++ b/python/morphology.cpp @@ -16,10 +16,7 @@ #include <arborio/label_parse.hpp> #include <arborio/swcio.hpp> #include <arborio/neurolucida.hpp> - -#ifdef ARB_NEUROML_ENABLED #include <arborio/neuroml.hpp> -#endif #include "util.hpp" #include "error.hpp" @@ -388,8 +385,6 @@ void register_morphology(py::module& m) { pybind11::arg_v("raw", false, "Return a segment tree instead of a fully formed morphology"), "Load a morphology or segment_tree and meta data from a Neurolucida ASCII .asc file."); - -#ifdef ARB_NEUROML_ENABLED // arborio::morphology_data py::class_<arborio::nml_morphology_data> nml_morph_data(m, "neuroml_morph_data"); nml_morph_data @@ -473,7 +468,6 @@ void register_morphology(py::module& m) { } }, "cell_id"_a, "allow_spherical_root"_a=false, "Retrieve nml_morph_data associated with cell_id."); -#endif // def ARB_NEUROML_ENABLED } } // namespace pyarb diff --git a/scripts/build-wheels.sh b/scripts/build-wheels.sh index 4b6412da948b8ce2191980cc375f977870b35194..2032822dd83c2ca79b055a31f8032de75f7e7991 100755 --- a/scripts/build-wheels.sh +++ b/scripts/build-wheels.sh @@ -13,8 +13,6 @@ set -e -u -x -yum -y install libxml2-devel - rm -rf /src_dir/arbor/_skbuild rm -rf /opt/python/cp36-cp36m # Python build toolchain does not work on Py3.6 diff --git a/scripts/patchwheel.py b/scripts/patchwheel.py index c1c6ed46a65186509c54bfe864da6e24ff910a1f..21d3314bcdd972363d823d86e98571d9aafe8964 100644 --- a/scripts/patchwheel.py +++ b/scripts/patchwheel.py @@ -40,11 +40,9 @@ for inwheel in parsed_args.path.glob("*.whl"): subprocess.check_call(f"unzip {inwheel} -d {zipdir}", shell=True) arborn = list(zipdir.glob("**/_arbor.cpython*.so"))[0] - libxml2n = list(zipdir.glob("**/libxml2*.so*"))[0] subprocess.check_call( f"patchelf --set-rpath '$ORIGIN/../arbor.libs' {arborn}", shell=True ) - subprocess.check_call(f"patchelf --set-rpath '$ORIGIN' {libxml2n}", shell=True) # TODO? correct checksum/bytecounts in *.dist-info/RECORD. # So far, Python does not report mismatches diff --git a/setup.py b/setup.py index 30f9ed589d923cfb63b839faa9aff9ffd2484cc5..7e93e6bf98afea2813dffd888cb655a6ec4549e1 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,6 @@ with_mpi = False with_gpu = "none" with_vec = False arch = "none" -with_nml = True use_libs = True build_type = "Release" # this is ok even for debugging, as we always produce info @@ -20,7 +19,6 @@ setup( f"-DARB_VECTORIZE={with_vec}", f"-DARB_ARCH={arch}", f"-DARB_GPU={with_gpu}", - f"-DARB_WITH_NEUROML={with_nml}", f"-DARB_USE_BUNDLED_LIBS={use_libs}", f"-DCMAKE_BUILD_TYPE={build_type}", ], diff --git a/spack/package.py b/spack/package.py index 7d8c3a10c16a09439d60e6a104a59b32b0f4cc43..2cf738397b1e4979b241d94d69fe81675b6db282 100644 --- a/spack/package.py +++ b/spack/package.py @@ -44,7 +44,6 @@ class Arbor(CMakePackage, CudaPackage): ) variant("doc", default=False, description="Build documentation.") variant("mpi", default=False, description="Enable MPI support") - variant("neuroml", default=True, description="Build NeuroML support library.") variant("python", default=True, description="Enable Python frontend support") variant( "vectorize", @@ -70,9 +69,9 @@ class Arbor(CMakePackage, CudaPackage): # misc dependencies depends_on("fmt@7.1:", when="@0.5.3:") # required by the modcc compiler depends_on("fmt@9.1:", when="@0.7.1:") + depends_on("pugixml@1.11:", when="@0.7.1:") depends_on("nlohmann-json") depends_on("random123") - depends_on("libxml2", when="+neuroml") with when("+cuda"): depends_on("cuda@10:") depends_on("cuda@11:", when="@0.7.1:") @@ -103,7 +102,6 @@ class Arbor(CMakePackage, CudaPackage): args = [ self.define_from_variant("ARB_WITH_ASSERTIONS", "assertions"), self.define_from_variant("ARB_WITH_MPI", "mpi"), - self.define_from_variant("ARB_WITH_NEUROML", "neuroml"), self.define_from_variant("ARB_WITH_PYTHON", "python"), self.define_from_variant("ARB_VECTORIZE", "vectorize"), ] diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 360028f217068ba31da21a905fbc11dfaa8caecb..560045491612534ae3949e5e1d19682bd243a490 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -153,6 +153,9 @@ set(unit_sources mech_private_field_access.cpp stats.cpp unit_test_catalogue.cpp + + # neuroml + test_nml_morphology.cpp ) if(ARB_WITH_GPU) @@ -170,10 +173,6 @@ if(ARB_WITH_GPU) ) endif() -if(ARB_WITH_NEUROML) - list(APPEND unit_sources test_nml_morphology.cpp) -endif() - if(ARB_WITH_CUDA_CLANG OR ARB_WITH_HIP_CLANG) set_source_files_properties(${unit_sources} PROPERTIES LANGUAGE CXX) endif() diff --git a/test/unit/test_nml_morphology.cpp b/test/unit/test_nml_morphology.cpp index 07b1d2678dbcb4cfef4d60d5a4b46eba2c4a2c1d..8c6ecfc039959a3846a4cd50c671107099fc8780 100644 --- a/test/unit/test_nml_morphology.cpp +++ b/test/unit/test_nml_morphology.cpp @@ -7,29 +7,18 @@ #include <arbor/morph/primitives.hpp> #include <arborio/neuroml.hpp> -#include <arborio/xml.hpp> #include <gtest/gtest.h> #include "morph_pred.hpp" using testing::region_eq; -TEST(neuroml, with_xml) { - // This (hopefully) will not blow up. - { - arborio::with_xml scope; - } - { - arborio::with_xml scope; - } -} - // Tests for basic morphology scanning and collection from XML. TEST(neuroml, morph_badxml) { std::string illformed = "<wha?"; - EXPECT_THROW(arborio::neuroml{illformed}, arborio::xml_error); + EXPECT_THROW(arborio::neuroml{illformed}, std::runtime_error); } TEST(neuroml, morph_none) { @@ -176,7 +165,7 @@ R"~( arborio::neuroml N(doc); { - arborio::nml_morphology_data m1 = N.morphology("m1").value(); + auto m1 = N.morphology("m1").value(); label_dict labels; labels.import(m1.segments, "seg:"); mprovider P(m1.morphology, labels); @@ -275,6 +264,7 @@ R"~( EXPECT_EQ(p3, seg1_segments[0].dist); } } + } TEST(neuroml, spherical_segments) { diff --git a/test/unit/test_strprintf.cpp b/test/unit/test_strprintf.cpp index 2449e8d69740859747982afbb5b54083648286d8..6da58abaaf36430ca2283d51d33ca8e9922c2457 100644 --- a/test/unit/test_strprintf.cpp +++ b/test/unit/test_strprintf.cpp @@ -14,12 +14,12 @@ TEST(strprintf, simple) { char buf[200]; const char* fmt1 = " %% %04d % 3.2f %#016x %c"; - sprintf(buf, fmt1, 3, 7.1e-3, 0x1234ul, 'x'); + snprintf(buf, 200, fmt1, 3, 7.1e-3, 0x1234ul, 'x'); auto result = strprintf(fmt1, 3, 7.1e-3, 0x1234ul, 'x'); EXPECT_EQ(std::string(buf), result); const char* fmt2 = "%5s %3s"; - sprintf(buf, fmt2, "bear", "pear"); + snprintf(buf, 200, fmt2, "bear", "pear"); result = strprintf(fmt2, "bear", "pear"); EXPECT_EQ(std::string(buf), result); } @@ -51,12 +51,12 @@ TEST(strprintf, wrappers) { char buf[200]; auto uptr = std::unique_ptr<int>{new int(17)}; - sprintf(buf, "uptr %p", uptr.get()); + snprintf(buf, 200, "uptr %p", uptr.get()); EXPECT_EQ(std::string(buf), strprintf("uptr %p", uptr)); auto sptr = std::shared_ptr<double>{new double(19.)}; - sprintf(buf, "sptr %p", sptr.get()); + snprintf(buf, 200, "sptr %p", sptr.get()); EXPECT_EQ(std::string(buf), strprintf("sptr %p", sptr));