diff --git a/miniapp/io.cpp b/miniapp/io.cpp index 330f1a20b231ff22f1c2a07b8d6828184aeef41c..d3acd53097efaf5e16d8f7f60f400cc60aaaea48 100644 --- a/miniapp/io.cpp +++ b/miniapp/io.cpp @@ -158,6 +158,9 @@ cl_options read_options(int argc, char** argv, bool allow_write) { TCLAP::ValueArg<uint32_t> group_size_arg( "g", "group-size", "number of cells per cell group", false, defopts.compartments_per_segment, "integer", cmd); + TCLAP::ValueArg<double> sample_dt_arg( + "", "sample-dt", "set sampling interval to <time> ms", + false, defopts.bin_dt, "time", cmd); TCLAP::ValueArg<double> probe_ratio_arg( "p", "probe-ratio", "proportion between 0 and 1 of cells to probe", false, defopts.probe_ratio, "proportion", cmd); @@ -165,10 +168,13 @@ cl_options read_options(int argc, char** argv, bool allow_write) { "X", "probe-soma-only", "only probe cell somas, not dendrites", cmd, false); TCLAP::ValueArg<std::string> trace_prefix_arg( "P", "prefix", "write traces to files with prefix <prefix>", - false, defopts.trace_prefix, "stringr", cmd); + false, defopts.trace_prefix, "string", cmd); TCLAP::ValueArg<util::optional<unsigned>> trace_max_gid_arg( "T", "trace-max-gid", "only trace probes on cells up to and including <gid>", false, defopts.trace_max_gid, "gid", cmd); + TCLAP::ValueArg<std::string> trace_format_arg( + "F", "trace-format", "select trace data format: csv or json", + false, defopts.trace_prefix, "string", cmd); TCLAP::ValueArg<util::optional<std::string>> morphologies_arg( "M", "morphologies", "load morphologies from SWC files matching <glob>", false, defopts.morphologies, "glob", cmd); @@ -214,10 +220,12 @@ cl_options read_options(int argc, char** argv, bool allow_write) { update_option(options.all_to_all, fopts, "all_to_all"); update_option(options.ring, fopts, "ring"); update_option(options.group_size, fopts, "group_size"); + update_option(options.sample_dt, fopts, "sample_dt"); update_option(options.probe_ratio, fopts, "probe_ratio"); update_option(options.probe_soma_only, fopts, "probe_soma_only"); update_option(options.trace_prefix, fopts, "trace_prefix"); update_option(options.trace_max_gid, fopts, "trace_max_gid"); + update_option(options.trace_format, fopts, "trace_format"); update_option(options.morphologies, fopts, "morphologies"); update_option(options.morph_rr, fopts, "morph_rr"); update_option(options.report_compartments, fopts, "report_compartments"); @@ -258,10 +266,12 @@ cl_options read_options(int argc, char** argv, bool allow_write) { update_option(options.all_to_all, all_to_all_arg); update_option(options.ring, ring_arg); update_option(options.group_size, group_size_arg); + update_option(options.sample_dt, sample_dt_arg); update_option(options.probe_ratio, probe_ratio_arg); update_option(options.probe_soma_only, probe_soma_only_arg); update_option(options.trace_prefix, trace_prefix_arg); update_option(options.trace_max_gid, trace_max_gid_arg); + update_option(options.trace_format, trace_format_arg); update_option(options.morphologies, morphologies_arg); update_option(options.morph_rr, morph_rr_arg); update_option(options.report_compartments, report_compartments_arg); @@ -269,6 +279,10 @@ cl_options read_options(int argc, char** argv, bool allow_write) { update_option(options.profile_only_zero, profile_only_zero_arg); update_option(options.dry_run_ranks, dry_run_ranks_arg); + if (options.trace_format!="csv" && options.trace_format!="json") { + throw usage_error("trace format must be one of: csv, json"); + } + if (options.all_to_all && options.ring) { throw usage_error("can specify at most one of --ring and --all-to-all"); } @@ -301,6 +315,7 @@ cl_options read_options(int argc, char** argv, bool allow_write) { fopts["all_to_all"] = options.all_to_all; fopts["ring"] = options.ring; fopts["group_size"] = options.group_size; + fopts["sample_dt"] = options.sample_dt; fopts["probe_ratio"] = options.probe_ratio; fopts["probe_soma_only"] = options.probe_soma_only; fopts["trace_prefix"] = options.trace_prefix; @@ -310,6 +325,7 @@ cl_options read_options(int argc, char** argv, bool allow_write) { else { fopts["trace_max_gid"] = nullptr; } + fopts["trace_format"] = options.trace_format; if (options.morphologies) { fopts["morphologies"] = options.morphologies.get(); } @@ -352,6 +368,7 @@ std::ostream& operator<<(std::ostream& o, const cl_options& options) { o << " all to all network : " << (options.all_to_all ? "yes" : "no") << "\n"; o << " ring network : " << (options.ring ? "yes" : "no") << "\n"; o << " group size : " << options.group_size << "\n"; + o << " sample dt : " << options.sample_dt << "\n"; o << " probe ratio : " << options.probe_ratio << "\n"; o << " probe soma only : " << (options.probe_soma_only ? "yes" : "no") << "\n"; o << " trace prefix : " << options.trace_prefix << "\n"; @@ -360,6 +377,7 @@ std::ostream& operator<<(std::ostream& o, const cl_options& options) { o << *options.trace_max_gid; } o << "\n"; + o << " trace format : " << options.trace_format << "\n"; o << " morphologies : "; if (options.morphologies) { o << *options.morphologies; diff --git a/miniapp/io.hpp b/miniapp/io.hpp index 8e6b7b1f54715d78536820ec70f569614481c4c7..bc16a6b1099b7a911bf32bb456e9017bead41470 100644 --- a/miniapp/io.hpp +++ b/miniapp/io.hpp @@ -36,10 +36,12 @@ struct cl_options { double bin_dt = 0.0025; // 0 => no binning. // Probe/sampling specification. + double sample_dt = 0.1; bool probe_soma_only = false; double probe_ratio = 0; // Proportion of cells to probe. std::string trace_prefix = "trace_"; util::optional<unsigned> trace_max_gid; // Only make traces up to this gid. + std::string trace_format = "json"; // Support only 'json' and 'csv'. // Parameters for spike output. bool spike_file_output = false; diff --git a/miniapp/miniapp.cpp b/miniapp/miniapp.cpp index 9b6cbc0f4d297822fa3a22f93a5ae6df8388b304..c74db58a6d234553d0ef182d5c0bb6608f759cc9 100644 --- a/miniapp/miniapp.cpp +++ b/miniapp/miniapp.cpp @@ -37,6 +37,7 @@ std::unique_ptr<sample_trace_type> make_trace(probe_record probe); using communicator_type = communication::communicator<communication::global_policy>; void write_trace_json(const sample_trace_type& trace, const std::string& prefix = "trace_"); +void write_trace_csv(const sample_trace_type& trace, const std::string& prefix = "trace_"); void report_compartment_stats(const recipe&); int main(int argc, char** argv) { @@ -111,7 +112,7 @@ int main(int argc, char** argv) { // Attach samplers to all probes std::vector<std::unique_ptr<sample_trace_type>> traces; - const time_type sample_dt = 0.1; + const time_type sample_dt = options.sample_dt; for (auto probe: m.probes()) { if (options.trace_max_gid && probe.id.gid>*options.trace_max_gid) { continue; @@ -153,8 +154,9 @@ int main(int argc, char** argv) { std::cout << "there were " << m.num_spikes() << " spikes\n"; // save traces + auto write_trace = options.trace_format=="json"? write_trace_json: write_trace_csv; for (const auto& trace: traces) { - write_trace_json(*trace.get(), options.trace_prefix); + write_trace(*trace.get(), options.trace_prefix); } util::save_to_file(meters, "meters.json"); @@ -227,6 +229,20 @@ std::unique_ptr<sample_trace_type> make_trace(probe_record probe) { return util::make_unique<sample_trace_type>(probe.id, name, units); } +void write_trace_csv(const sample_trace_type& trace, const std::string& prefix) { + auto path = prefix + std::to_string(trace.probe_id.gid) + + "." + std::to_string(trace.probe_id.index) + "_" + trace.name + ".csv"; + + std::ofstream file(path); + file << "# cell: " << trace.probe_id.gid << "\n"; + file << "# probe: " << trace.probe_id.index << "\n"; + file << "time_ms, " << trace.name << "_" << trace.units << "\n"; + + for (const auto& sample: trace.samples) { + file << util::strprintf("% 20.15f, % 20.15f\n", sample.time, sample.value); + } +} + void write_trace_json(const sample_trace_type& trace, const std::string& prefix) { auto path = prefix + std::to_string(trace.probe_id.gid) + "." + std::to_string(trace.probe_id.index) + "_" + trace.name + ".json"; diff --git a/modcc/backends/avx2.hpp b/modcc/backends/avx2.hpp index 7aceb44387bb1b80c8f29e9ad542f13b89456bad..1fbcf848082de751be0f023204ea9cb825366a0c 100644 --- a/modcc/backends/avx2.hpp +++ b/modcc/backends/avx2.hpp @@ -156,6 +156,16 @@ struct simd_intrinsics<targetKind::avx2> { tb << ")"; } + // In avx2 require 4-wide gather of i32 indices. + template<typename A, typename I, typename S> + static void emit_gather_index(TextBuffer& tb, const A& addr, + const I& index, const S& scale) { + tb << "_mm_i32gather_epi32("; + emit_operands(tb, arg_emitter(addr), arg_emitter(index), + arg_emitter(scale)); + tb << ")"; + } + template<typename T> static void emit_set_value(TextBuffer& tb, const T& arg) { tb << "_mm256_set1_pd("; diff --git a/modcc/backends/avx512.hpp b/modcc/backends/avx512.hpp index 87b7ef5da6e47c837178c2921b8f626da486da13..f1c7fce179afd21b4284af6026c58326867b89bf 100644 --- a/modcc/backends/avx512.hpp +++ b/modcc/backends/avx512.hpp @@ -130,6 +130,16 @@ struct simd_intrinsics<targetKind::avx512> { tb << ")"; } + // In avx512 require 8-wide gather of i32 indices. + template<typename A, typename I, typename S> + static void emit_gather_index(TextBuffer& tb, const A& addr, + const I& index, const S& scale) { + tb << "_mm256_i32gather_epi32("; + emit_operands(tb, arg_emitter(addr), arg_emitter(index), + arg_emitter(scale)); + tb << ")"; + } + template<typename T> static void emit_set_value(TextBuffer& tb, const T& arg) { tb << "_mm512_set1_pd("; diff --git a/modcc/backends/base.hpp b/modcc/backends/base.hpp index 8323cb484a41dc145f857e4a187fc3708fd7e443..c5d0fdf45eb826fdf13fc1869915d289dfa6af89 100644 --- a/modcc/backends/base.hpp +++ b/modcc/backends/base.hpp @@ -76,6 +76,11 @@ struct simd_intrinsics { static void emit_gather(TextBuffer& tb, const A& addr, const I& index, const S& scale); + // int32 value version of `emit_gather` to look up cell indices + template<typename A, typename I, typename S> + static void emit_gather_index(TextBuffer& tb, const A& addr, + const I& index, const S& scale); + template<typename T> static void emit_set_value(TextBuffer& tb, const T& arg); diff --git a/modcc/cprinter.cpp b/modcc/cprinter.cpp index b871d60cd6ffc19cb406be98dcf663e43068bbea..f553a7a2cc6394159e651a834446e860ae2af68d 100644 --- a/modcc/cprinter.cpp +++ b/modcc/cprinter.cpp @@ -1,4 +1,5 @@ #include <algorithm> +#include <string> #include "cprinter.hpp" #include "lexer.hpp" @@ -56,8 +57,10 @@ std::string CPrinter::emit_source() { text_.add_line("using iarray = typename base::iarray;"); text_.add_line("using view = typename base::view;"); text_.add_line("using iview = typename base::iview;"); + text_.add_line("using const_view = typename base::const_view;"); text_.add_line("using const_iview = typename base::const_iview;"); text_.add_line("using ion_type = typename base::ion_type;"); + text_.add_line("using multi_event_stream = typename base::multi_event_stream;"); text_.add_line(); ////////////////////////////////////////////// @@ -85,8 +88,8 @@ std::string CPrinter::emit_source() { ////////////////////////////////////////////// int num_vars = array_variables.size(); text_.add_line(); - text_.add_line(class_name + "(view vec_v, view vec_i, array&& weights, iarray&& node_index)"); - text_.add_line(": base(vec_v, vec_i, std::move(node_index))"); + text_.add_line(class_name + "(size_type mech_id, const_iview vec_ci, const_view vec_t, const_view vec_t_to, const_view vec_dt, view vec_v, view vec_i, array&& weights, iarray&& node_index)"); + text_.add_line(": base(mech_id, vec_ci, vec_t, vec_t_to, vec_dt, vec_v, vec_i, std::move(node_index))"); text_.add_line("{"); text_.increase_indentation(); text_.add_gutter() << "size_type num_fields = " << num_vars << ";"; @@ -176,11 +179,7 @@ std::string CPrinter::emit_source() { text_.add_line("}"); text_.add_line(); - text_.add_line("void set_params(value_type t_, value_type dt_) override {"); - text_.increase_indentation(); - text_.add_line("t = t_;"); - text_.add_line("dt = dt_;"); - text_.decrease_indentation(); + text_.add_line("void set_params() override {"); text_.add_line("}"); text_.add_line(); @@ -319,17 +318,36 @@ std::string CPrinter::emit_source() { auto proctest = [] (procedureKind k) { return is_in(k, {procedureKind::normal, procedureKind::api, procedureKind::net_receive}); }; + bool override_deliver_events = false; for(auto const& var: module_->symbols()) { auto isproc = var.second->kind()==symbolKind::procedure; - if(isproc ) - { + if(isproc) { auto proc = var.second->is_procedure(); if(proctest(proc->kind())) { proc->accept(this); } + override_deliver_events |= proc->kind()==procedureKind::net_receive; } } + if(override_deliver_events) { + text_.add_line("void deliver_events(multi_event_stream& events) override {"); + text_.increase_indentation(); + text_.add_line("auto ncell = events.n_streams();"); + text_.add_line("for (size_type c = 0; c<ncell; ++c) {"); + text_.increase_indentation(); + text_.add_line("for (auto ev: events.marked_events(c)) {"); + text_.increase_indentation(); + text_.add_line("if (ev.handle.mech_id==mech_id_) net_receive(ev.handle.index, ev.weight);"); + text_.decrease_indentation(); + text_.add_line("}"); + text_.decrease_indentation(); + text_.add_line("}"); + text_.decrease_indentation(); + text_.add_line("}"); + text_.add_line(); + } + ////////////////////////////////////////////// ////////////////////////////////////////////// @@ -357,6 +375,11 @@ std::string CPrinter::emit_source() { } text_.add_line(); + text_.add_line("using base::mech_id_;"); + text_.add_line("using base::vec_ci_;"); + text_.add_line("using base::vec_t_;"); + text_.add_line("using base::vec_t_to_;"); + text_.add_line("using base::vec_dt_;"); text_.add_line("using base::vec_v_;"); text_.add_line("using base::vec_i_;"); text_.add_line("using base::node_index_;"); @@ -380,6 +403,7 @@ void CPrinter::emit_headers() { text_.add_line(); text_.add_line("#include <mechanism.hpp>"); text_.add_line("#include <algorithms.hpp>"); + text_.add_line("#include <backends/multicore/multi_event_stream.hpp>"); text_.add_line("#include <util/pprintf.hpp>"); text_.add_line(); } @@ -429,6 +453,10 @@ void CPrinter::visit(IndexedVariable *e) { text_ << e->index_name() << "[i_]"; } +void CPrinter::visit(CellIndexedVariable *e) { + text_ << e->index_name() << "[i_]"; +} + void CPrinter::visit(UnaryExpression *e) { auto b = (e->expression()->is_binary()!=nullptr); switch(e->op()) { @@ -582,20 +610,25 @@ void CPrinter::visit(APIMethod *e) { // create local indexed views for(auto &symbol : e->scope()->locals()) { auto var = symbol.second->is_local_variable(); - if(var->is_indexed()) { - auto const& name = var->name(); - auto const& index_name = var->external_variable()->index_name(); - text_.add_gutter(); - text_ << "auto " + index_name + " = util::indirect_view"; - auto channel = var->external_variable()->ion_channel(); - if(channel==ionKind::none) { - text_ << "(" + index_name + "_, node_index_);\n"; - } - else { - auto iname = ion_store(channel); - text_ << "(" << iname << "." << name << ", " - << ion_store(channel) << ".index);\n"; - } + if (!var->is_indexed()) continue; + + auto external = var->external_variable(); + auto const& name = var->name(); + auto const& index_name = external->index_name(); + + text_.add_gutter(); + text_ << "auto " + index_name + " = "; + + if(external->is_cell_indexed_variable()) { + text_ << "util::indirect_view(util::indirect_view(" + index_name + "_, vec_ci_), node_index_);\n"; + } + else if(external->is_ion()) { + auto channel = external->ion_channel(); + auto iname = ion_store(channel); + text_ << "util::indirect_view(" << iname << "." << name << ", " << ion_store(channel) << ".index);\n"; + } + else { + text_ << "util::indirect_view(" + index_name + "_, node_index_);\n"; } } diff --git a/modcc/cprinter.hpp b/modcc/cprinter.hpp index 2b58a9fc85d7b891eb2453e84b86520430f25fec..0a254bc569fbc7d7209c0767d45612b96cf6fea7 100644 --- a/modcc/cprinter.hpp +++ b/modcc/cprinter.hpp @@ -21,6 +21,7 @@ public: virtual void visit(Symbol *e) override; virtual void visit(LocalVariable *e) override; virtual void visit(IndexedVariable *e) override; + virtual void visit(CellIndexedVariable *e) override; virtual void visit(IdentifierExpression *e) override; virtual void visit(CallExpression *e) override; virtual void visit(ProcedureExpression *e) override; diff --git a/modcc/cudaprinter.cpp b/modcc/cudaprinter.cpp index 5672dd48fb152b23b67bd2fdbe7c65756bec8d89..f4e3db811a20aa0a4e58c50148e4b3f8f147f565 100644 --- a/modcc/cudaprinter.cpp +++ b/modcc/cudaprinter.cpp @@ -42,6 +42,7 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o) text_.add_line("#include <mechanism.hpp>"); text_.add_line("#include <algorithms.hpp>"); text_.add_line("#include <backends/gpu/intrinsics.hpp>"); + text_.add_line("#include <backends/gpu/multi_event_stream.hpp>"); text_.add_line("#include <util/pprintf.hpp>"); text_.add_line(); @@ -82,6 +83,16 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o) param_pack.push_back(tname + ".index.data()"); } + text_.add_line("// cv index to cell mapping and cell time states"); + text_.add_line("const I* ci;"); + text_.add_line("const T* vec_t;"); + text_.add_line("const T* vec_t_to;"); + text_.add_line("const T* vec_dt;"); + param_pack.push_back("vec_ci_.data()"); + param_pack.push_back("vec_t_.data()"); + param_pack.push_back("vec_t_to_.data()"); + param_pack.push_back("vec_dt_.data()"); + text_.add_line("// voltage and current state within the cell"); text_.add_line("T* vec_v;"); text_.add_line("T* vec_i;"); @@ -103,6 +114,7 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o) text_.add_line("namespace kernels {"); { increase_indentation(); + text_.add_line("using nest::mc::gpu::multi_event_stream;"); // forward declarations of procedures for(auto const &var : m.symbols()) { @@ -149,6 +161,7 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o) text_.add_line("using typename base::const_iview;"); text_.add_line("using typename base::const_view;"); text_.add_line("using typename base::ion_type;"); + text_.add_line("using multi_event_stream = typename base::multi_event_stream;"); text_.add_line("using param_pack_type = " + module_name + "_ParamPack<value_type, size_type>;"); ////////////////////////////////////////////// @@ -178,8 +191,8 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o) int num_vars = array_variables.size(); text_.add_line(); - text_.add_line(class_name + "(view vec_v, view vec_i, array&& weights, iarray&& node_index):"); - text_.add_line(" base(vec_v, vec_i, std::move(node_index))"); + text_.add_line(class_name + "(size_type mech_id, const_iview vec_ci, const_view vec_t, const_view vec_t_to, const_view vec_dt, view vec_v, view vec_i, array&& weights, iarray&& node_index):"); + text_.add_line(" base(mech_id, vec_ci, vec_t, vec_t_to, vec_dt, vec_v, vec_i, std::move(node_index))"); text_.add_line("{"); text_.increase_indentation(); text_.add_gutter() << "size_type num_fields = " << num_vars << ";"; @@ -252,13 +265,8 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o) text_.add_line("}"); text_.add_line(); - // print the member funtion that - // * sets time step parameters - // * packs up the parameters for use on the GPU - text_.add_line("void set_params(value_type t_, value_type dt_) override {"); - text_.increase_indentation(); - text_.add_line("t = t_;"); - text_.add_line("dt = dt_;"); + // print the member funtion that packs up the parameters for use on the GPU + text_.add_line("void set_params() override {"); text_.add_line("param_pack_ ="); text_.increase_indentation(); text_.add_line("param_pack_type {"); @@ -432,15 +440,23 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o) else if( var.second->kind()==symbolKind::procedure && var.second->is_procedure()->kind()==procedureKind::net_receive) { - // Simplest net_receive implementation forwards a single update - // to a GPU kernel. - auto proc = var.second->is_procedure(); - auto name = proc->name(); - text_.add_line("void " + name + "(int i_, value_type weight) {"); + // Override `deliver_events`. + text_.add_line("void deliver_events(multi_event_stream& events) override {"); text_.increase_indentation(); - text_.add_line( - "kernels::" + name + "<value_type, size_type>" - + "<<<1, 1>>>(param_pack_, i_, weight);"); + text_.add_line("auto ncell = events.n_streams();"); + text_.add_line("constexpr int blockwidth = 128;"); + text_.add_line("int nblock = 1+(ncell-1)/blockwidth;"); + text_.add_line("kernels::deliver_events<value_type, size_type>" + "<<<nblock, blockwidth>>>(param_pack_, mech_id_, events.delivery_data());"); + text_.decrease_indentation(); + text_.add_line("}"); + text_.add_line(); + + // Provide testing interface to `net_receive`. + text_.add_line("void net_receive(int i_, value_type weight) override {"); + text_.increase_indentation(); + text_.add_line("kernels::net_receive_global<value_type, size_type>" + "<<<1, 1>>>(param_pack_, i_, weight);"); text_.decrease_indentation(); text_.add_line("}"); text_.add_line(); @@ -468,6 +484,11 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o) } } + text_.add_line("using base::mech_id_;"); + text_.add_line("using base::vec_ci_;"); + text_.add_line("using base::vec_t_;"); + text_.add_line("using base::vec_t_to_;"); + text_.add_line("using base::vec_dt_;"); text_.add_line("using base::vec_v_;"); text_.add_line("using base::vec_i_;"); text_.add_line("using base::node_index_;"); @@ -531,6 +552,9 @@ std::string CUDAPrinter::index_string(Symbol *s) { s->location()); } } + else if(s->is_cell_indexed_variable()) { + return "cid_"; + } return ""; } @@ -538,6 +562,11 @@ void CUDAPrinter::visit(IndexedVariable *e) { text_ << "params_." << e->index_name() << "[" << index_string(e) << "]"; } +void CUDAPrinter::visit(CellIndexedVariable *e) { + text_ << "params_." << e->index_name() << "[" << index_string(e) << "]"; +} + + void CUDAPrinter::visit(LocalVariable *e) { std::string const& name = e->name(); text_ << name; @@ -680,10 +709,11 @@ void CUDAPrinter::visit(ProcedureExpression *e) { } else { // net_receive() kernel is a special case, not covered by APIMethod visit. + + // Core `net_receive` kernel is called device-side from `kernel::deliver_events`. text_.add_gutter() << "template <typename T, typename I>\n"; - text_.add_line( "__global__"); - text_.add_gutter() << "void " << e->name() - << "(" << module_->name() << "_ParamPack<T,I> params_, " + text_.add_line( "__device__"); + text_.add_gutter() << "void net_receive(const " << module_->name() << "_ParamPack<T,I>& params_, " << "I i_, T weight) {"; text_.add_line(); increase_indentation(); @@ -692,15 +722,60 @@ void CUDAPrinter::visit(ProcedureExpression *e) { text_.add_line("using iarray = I;"); text_.add_line(); - text_.add_line("if (threadIdx.x || blockIdx.x) return;"); text_.add_line("auto tid_ = i_;"); text_.add_line("auto gid_ __attribute__((unused)) = params_.ni[tid_];"); + text_.add_line("auto cid_ __attribute__((unused)) = params_.ci[gid_];"); print_APIMethod_body(e); decrease_indentation(); text_.add_line("}"); text_.add_line(); + + // Global one-thread wrapper for `net_receive` kernel is used to implement the + // `mechanism::net_receive` method. This is not called in the normal course + // of event delivery. + text_.add_gutter() << "template <typename T, typename I>\n"; + text_.add_line( "__global__"); + text_.add_gutter() << "void net_receive_global(" + << module_->name() << "_ParamPack<T,I> params_, " + << "I i_, T weight) {"; + text_.add_line(); + increase_indentation(); + + text_.add_line("if (threadIdx.x || blockIdx.x) return;"); + text_.add_line("net_receive<T, I>(params_, i_, weight);"); + + decrease_indentation(); + text_.add_line("}"); + text_.add_line(); + + text_.add_gutter() << "template <typename T, typename I>\n"; + text_.add_line( "__global__"); + text_.add_gutter() << "void deliver_events(" + << module_->name() << "_ParamPack<T,I> params_, " + << "I mech_id, multi_event_stream::span_state state) {"; + text_.add_line(); + increase_indentation(); + + text_.add_line("auto tid_ = threadIdx.x + blockDim.x*blockIdx.x;"); + text_.add_line("auto const ncell_ = state.n;"); + text_.add_line(); + text_.add_line("if(tid_<ncell_) {"); + increase_indentation(); + + text_.add_line("for (auto j = state.span_begin[tid_]; j<state.mark[tid_]; ++j) {"); + increase_indentation(); + text_.add_line("if (state.ev_mech_id[j]==mech_id) net_receive<T, I>(params_, state.ev_index[j], state.ev_weight[j]);"); + decrease_indentation(); + text_.add_line("}"); + + decrease_indentation(); + text_.add_line("}"); + + decrease_indentation(); + text_.add_line("}"); + text_.add_line(); } } @@ -731,6 +806,7 @@ void CUDAPrinter::visit(APIMethod *e) { increase_indentation(); text_.add_line("auto gid_ __attribute__((unused)) = params_.ni[tid_];"); + text_.add_line("auto cid_ __attribute__((unused)) = params_.ci[gid_];"); print_APIMethod_body(e); diff --git a/modcc/cudaprinter.hpp b/modcc/cudaprinter.hpp index 0f4acbbbe1c8b0ed53b978aeea946a6dd222a3c7..7c5165225790750323fa8f94ab1c1e088659a086 100644 --- a/modcc/cudaprinter.hpp +++ b/modcc/cudaprinter.hpp @@ -22,6 +22,7 @@ public: void visit(Symbol *e) override; void visit(LocalVariable *e) override; void visit(IndexedVariable *e) override; + void visit(CellIndexedVariable *e) override; void visit(IdentifierExpression *e) override; void visit(CallExpression *e) override; diff --git a/modcc/expression.cpp b/modcc/expression.cpp index 7b05d65fe8c974a9bc54c5e01e81e9b6cc666a49..2fcf79e32d5337f1a6b6e913dbe90d4610934dc6 100644 --- a/modcc/expression.cpp +++ b/modcc/expression.cpp @@ -96,7 +96,7 @@ void IdentifierExpression::semantic(scope_ptr scp) { // indexed variable is used in this procedure. In which case, we create // a local variable which refers to the indexed variable, which will be // found for any subsequent variable lookup inside the procedure - if(auto sym = s->is_indexed_variable()) { + if(auto sym = s->is_abstract_indexed_variable()) { auto var = new LocalVariable(location_, spelling_); var->external_variable(sym); s = scope_->add_local_symbol(spelling_, scope_type::symbol_ptr{var}); @@ -262,6 +262,15 @@ std::string IndexedVariable::to_string() const { + ", ion" + (ion_channel()==ionKind::none ? red(ch) : green(ch)) + ") "; } +/******************************************************************************* + CellIndexedVariable +*******************************************************************************/ + +std::string CellIndexedVariable::to_string() const { + auto ch = ::to_string(ion_channel()); + return blue("cellindexed") + " " + yellow(name()) + "->" + yellow(index_name()); +} + /******************************************************************************* ReactionExpression *******************************************************************************/ @@ -869,6 +878,9 @@ void VariableExpression::accept(Visitor *v) { void IndexedVariable::accept(Visitor *v) { v->visit(this); } +void CellIndexedVariable::accept(Visitor *v) { + v->visit(this); +} void NumberExpression::accept(Visitor *v) { v->visit(this); } diff --git a/modcc/expression.hpp b/modcc/expression.hpp index 0da1dac24f9926cfef31d6c78aa597b9a0ef7a74..84eb7d09f86e786bc2646f4c96993cb2a9a2564b 100644 --- a/modcc/expression.hpp +++ b/modcc/expression.hpp @@ -21,7 +21,9 @@ class BlockExpression; class InitialBlock; class IfExpression; class VariableExpression; +class AbstractIndexedVariable; class IndexedVariable; +class CellIndexedVariable; class NumberExpression; class IntegerExpression; class LocalDeclaration; @@ -233,7 +235,9 @@ public : virtual ProcedureExpression* is_procedure() {return nullptr;} virtual NetReceiveExpression* is_net_receive() {return nullptr;} virtual APIMethod* is_api_method() {return nullptr;} + virtual AbstractIndexedVariable* is_abstract_indexed_variable() {return nullptr;} virtual IndexedVariable* is_indexed_variable() {return nullptr;} + virtual CellIndexedVariable* is_cell_indexed_variable() {return nullptr;} virtual LocalVariable* is_local_variable() {return nullptr;} private : @@ -509,8 +513,27 @@ protected: double value_ = std::numeric_limits<double>::quiet_NaN(); }; +// abstract base class for the two sorts of indexed externals +class AbstractIndexedVariable: public Symbol { +public: + AbstractIndexedVariable(Location loc, std::string name, symbolKind kind) + : Symbol(std::move(loc), std::move(name), std::move(kind)) + {} + + virtual accessKind access() const = 0; + virtual ionKind ion_channel() const = 0; + virtual std::string const& index_name() const = 0; + virtual tok op() const = 0; + + virtual bool is_ion() const = 0; + virtual bool is_read() const = 0; + virtual bool is_write() const = 0; + + AbstractIndexedVariable* is_abstract_indexed_variable() override {return this;} +}; + // an indexed variable -class IndexedVariable : public Symbol { +class IndexedVariable : public AbstractIndexedVariable { public: IndexedVariable(Location loc, std::string lookup_name, @@ -518,7 +541,7 @@ public: accessKind acc, tok o=tok::eq, ionKind channel=ionKind::none) - : Symbol(loc, std::move(lookup_name), symbolKind::indexed_variable), + : AbstractIndexedVariable(loc, std::move(lookup_name), symbolKind::indexed_variable), access_(acc), ion_channel_(channel), index_name_(index_name), @@ -529,7 +552,7 @@ public: if(access()==accessKind::readwrite) { msg = pprintf("attempt to generate an index % with readwrite access", yellow(lookup_name)); - goto compiler_error; + goto compiler_error; } // read only variables must be assigned via equality if(is_read() && op()!=tok::eq) { @@ -546,30 +569,20 @@ public: return; -compiler_error: + compiler_error: throw(compiler_exception(msg, location_)); } std::string to_string() const override; - accessKind access() const { - return access_; - } - - ionKind ion_channel() const { - return ion_channel_; - } - tok op() const { - return op_; - } - - std::string const& index_name() const { - return index_name_; - } + accessKind access() const override { return access_; } + ionKind ion_channel() const override { return ion_channel_; } + std::string const& index_name() const override { return index_name_; } + tok op() const override { return op_; } - bool is_ion() const {return ion_channel_ != ionKind::none;} - bool is_read() const {return access_ == accessKind::read; } - bool is_write() const {return access_ == accessKind::write; } + bool is_ion() const override { return ion_channel_ != ionKind::none; } + bool is_read() const override { return access_ == accessKind::read; } + bool is_write() const override { return access_ == accessKind::write; } void accept(Visitor *v) override; IndexedVariable* is_indexed_variable() override {return this;} @@ -582,6 +595,36 @@ protected: tok op_; }; +class CellIndexedVariable : public AbstractIndexedVariable { +public: + CellIndexedVariable(Location loc, + std::string lookup_name, + std::string index_name) + : AbstractIndexedVariable(loc, std::move(lookup_name), symbolKind::indexed_variable), + index_name_(std::move(index_name)) + {} + + std::string to_string() const override; + + accessKind access() const override { return accessKind::read; } + ionKind ion_channel() const override { return ionKind::none; } + std::string const& index_name() const override { return index_name_; } + tok op() const override { return tok::plus; } + + bool is_ion() const override { return false; } + bool is_read() const override { return true; } + bool is_write() const override { return false; } + + void accept(Visitor *v) override; + CellIndexedVariable* is_cell_indexed_variable() override {return this;} + + ~CellIndexedVariable() {} + +protected: + std::string index_name_; +}; + + class LocalVariable : public Symbol { public : LocalVariable(Location loc, @@ -626,11 +669,11 @@ public : return kind_==localVariableKind::argument; } - IndexedVariable* external_variable() { + AbstractIndexedVariable* external_variable() { return external_; } - void external_variable(IndexedVariable *i) { + void external_variable(AbstractIndexedVariable *i) { external_ = i; } @@ -638,7 +681,7 @@ public : void accept(Visitor *v) override; private : - IndexedVariable *external_=nullptr; + AbstractIndexedVariable *external_=nullptr; localVariableKind kind_; }; diff --git a/modcc/module.cpp b/modcc/module.cpp index 31a7b1261c6bbba6c6b888e0bda3e5701aa64bd1..41358492789bbbdaab34a248468369745a851b77 100644 --- a/modcc/module.cpp +++ b/modcc/module.cpp @@ -295,17 +295,13 @@ bool Module::semantic() { // The second is nrn_current, which is handled after this block auto state_api = make_empty_api_method("nrn_state", "breakpoint"); auto api_state = state_api.first; - auto breakpoint = state_api.second; + auto breakpoint = state_api.second; // implies we are building the `nrn_state()` method. api_state->semantic(symbols_); scope_ptr nrn_state_scope = api_state->scope(); if(breakpoint) { - //.......................................................... - // nrn_state : The temporal integration of state variables - //.......................................................... - - // grab SOLVE statements, put them in `nrn_state` after translation. + // Grab SOLVE statements, put them in `nrn_state` after translation. bool found_solve = false; bool found_non_solve = false; std::set<std::string> solved_ids; @@ -360,6 +356,7 @@ bool Module::semantic() { } // May have now redundant local variables; remove these first. + solve_block->semantic(nrn_state_scope); solve_block = remove_unused_locals(solve_block->is_block()); // Copy body into nrn_state. @@ -381,10 +378,14 @@ bool Module::semantic() { breakpoint->location()); } else { - // redo semantic pass in order to elimate any removed local symbols. + // Redo semantic pass in order to elimate any removed local symbols. api_state->semantic(symbols_); } + // Run remove locals pass again on the whole body in case `dt` was never used. + api_state->body(remove_unused_locals(api_state->body())); + api_state->semantic(symbols_); + //.......................................................... // nrn_current : update contributions to currents //.......................................................... @@ -430,8 +431,6 @@ void Module::add_variables_to_symbols() { symbols_[name] = symbol_ptr{t}; }; - create_variable("t", rangeKind::scalar, accessKind::read); - create_variable("dt", rangeKind::scalar, accessKind::read); // density mechanisms use a vector of weights from current densities to // units of nA if (kind()==moduleKind::density) { @@ -456,6 +455,23 @@ void Module::add_variables_to_symbols() { accessKind::write, ionKind::none, Location()); create_indexed_variable("v", "vec_v", tok::eq, accessKind::read, ionKind::none, Location()); + create_indexed_variable("dt", "vec_dt", tok::eq, + accessKind::read, ionKind::none, Location()); + + // add cell-indexed variables to the table + auto create_cell_indexed_variable = [this] + (std::string const& name, std::string const& indexed_name, Location loc = Location()) + { + if(symbols_.count(name)) { + throw compiler_exception( + "trying to insert a symbol that already exists", + loc); + } + symbols_[name] = make_symbol<CellIndexedVariable>(loc, name, indexed_name); + }; + + create_cell_indexed_variable("t", "vec_t"); + create_cell_indexed_variable("t_to", "vec_t_to"); // add state variables for(auto const &var : state_block()) { diff --git a/modcc/simd_printer.hpp b/modcc/simd_printer.hpp index ed77cc6b6fba9f2c68e0ed7f20d489152b0368cc..849ac2da18a08320147877baecb0d8010f2c8725 100644 --- a/modcc/simd_printer.hpp +++ b/modcc/simd_printer.hpp @@ -41,6 +41,7 @@ public: text_ << name; } + void visit(CellIndexedVariable *e) override; void visit(IndexedVariable *e) override; void visit(APIMethod *e) override; void visit(BlockExpression *e) override; @@ -109,12 +110,14 @@ void SimdPrinter<Arch>::visit(APIMethod *e) { if (e->is_api_method()->body()->statements().size()) { text_.increase_indentation(); - // First emit the raw pointer of node_index_ + // First emit the raw pointer of node_index_ and vec_ci_ text_.add_line("constexpr size_t simd_width = " + simd_backend::emit_simd_width() + " / (CHAR_BIT*sizeof(value_type));"); text_.add_line("const size_type* " + emit_rawptr_name("node_index_") + " = node_index_.data();"); + text_.add_line("const size_type* " + emit_rawptr_name("vec_ci_") + + " = vec_ci_.data();"); text_.add_line(); // create local indexed views @@ -152,7 +155,8 @@ template<targetKind Arch> void SimdPrinter<Arch>::emit_indexed_view(LocalVariable* var, std::set<std::string>& decls) { auto const& name = var->name(); - auto const& index_name = var->external_variable()->index_name(); + auto external = var->external_variable(); + auto const& index_name = external->index_name(); text_.add_gutter(); if (decls.find(index_name) == decls.cend()) { @@ -160,24 +164,29 @@ void SimdPrinter<Arch>::emit_indexed_view(LocalVariable* var, decls.insert(index_name); } - text_ << index_name; - text_ << " = util::indirect_view"; - auto channel = var->external_variable()->ion_channel(); - if (channel == ionKind::none) { - text_ << "(" + emit_member_name(index_name) + ", node_index_);\n"; + text_ << index_name << " = "; + + if (external->is_cell_indexed_variable()) { + text_ << "util::indirect_view(util::indirect_view(" + << emit_member_name(index_name) << ", vec_ci_), node_index_);\n"; } - else { + else if (external->is_ion()) { + auto channel = external->ion_channel(); auto iname = ion_store(channel); - text_ << "(" << iname << "." << name << ", " + text_ << "util::indirect_view(" << iname << "." << name << ", " << ion_store(channel) << ".index);\n"; } + else { + text_ << " util::indirect_view(" + emit_member_name(index_name) + ", node_index_);\n"; + } } template<targetKind Arch> void SimdPrinter<Arch>::emit_indexed_view_simd(LocalVariable* var, std::set<std::string>& decls) { auto const& name = var->name(); - auto const& index_name = var->external_variable()->index_name(); + auto external = var->external_variable(); + auto const& index_name = external->index_name(); // We need to work with with raw pointers in the vectorized version auto channel = var->external_variable()->ion_channel(); @@ -236,11 +245,11 @@ void SimdPrinter<Arch>::emit_api_loop(APIMethod* e, for (auto& symbol : e->scope()->locals()) { auto var = symbol.second->is_local_variable(); if (var->is_indexed()) { - auto channel = var->external_variable()->ion_channel(); + auto external = var->external_variable(); + auto channel = external->ion_channel(); std::string cast_type = "(const " + simd_backend::emit_index_type() + " *) "; - std::string vindex_name, index_ptr_name; if (channel == ionKind::none) { vindex_name = emit_vtmp_name("node_index_"); @@ -253,7 +262,6 @@ void SimdPrinter<Arch>::emit_api_loop(APIMethod* e, } - if (declared_ion_vars.find(vindex_name) == declared_ion_vars.cend()) { declared_ion_vars.insert(vindex_name); text_.add_gutter(); @@ -264,6 +272,20 @@ void SimdPrinter<Arch>::emit_api_loop(APIMethod* e, text_, cast_type + "&" + index_ptr_name + "[off_]"); text_.end_line(";"); } + + if (external->is_cell_indexed_variable()) { + std::string vci_name = emit_vtmp_name("vec_ci_"); + std::string ci_ptr_name = emit_rawptr_name("vec_ci_"); + + if (declared_ion_vars.find(vci_name) == declared_ion_vars.cend()) { + declared_ion_vars.insert(vci_name); + text_.add_gutter(); + text_ << simd_backend::emit_index_type() << " " + << vci_name << " = "; + simd_backend::emit_gather_index(text_, "(int *)"+ci_ptr_name, vindex_name, "sizeof(size_type)"); + text_.end_line(";"); + } + } } } @@ -381,6 +403,16 @@ void SimdPrinter<Arch>::visit(IndexedVariable *e) { simd_backend::emit_gather(text_, value_name, vindex_name, "sizeof(value_type)"); } +template<targetKind Arch> +void SimdPrinter<Arch>::visit(CellIndexedVariable *e) { + std::string vindex_name, value_name; + + vindex_name = emit_vtmp_name("vec_ci_"); + value_name = emit_rawptr_name(e->index_name()); + + simd_backend::emit_gather(text_, vindex_name, value_name, "sizeof(value_type)"); +} + template<targetKind Arch> void SimdPrinter<Arch>::visit(BlockExpression *e) { if (!e->is_nested()) { diff --git a/modcc/visitor.hpp b/modcc/visitor.hpp index 921189bc70cda3674bf94fabe6441a3a43f174b4..3ccbb938ae08563ff93a1a63a87b7bd77d9bb401 100644 --- a/modcc/visitor.hpp +++ b/modcc/visitor.hpp @@ -30,6 +30,7 @@ public: virtual void visit(StoichExpression *e) { visit((Expression*) e); } virtual void visit(VariableExpression *e) { visit((Expression*) e); } virtual void visit(IndexedVariable *e) { visit((Expression*) e); } + virtual void visit(CellIndexedVariable *e) { visit((Expression*) e); } virtual void visit(FunctionExpression *e) { visit((Expression*) e); } virtual void visit(IfExpression *e) { visit((Expression*) e); } virtual void visit(SolveExpression *e) { visit((Expression*) e); } diff --git a/scripts/cc-filter b/scripts/cc-filter index e406b1091fb30bbcc6dd8955ae6fc01da7df5fcc..fca5c49c7369b14aa7031edfaca86e93154e01b2 100755 --- a/scripts/cc-filter +++ b/scripts/cc-filter @@ -226,7 +226,7 @@ rule cxx:unsigned-int s/\bunsigned\s+int\b/unsigned/g group cxx:tidy cxx:rm-template-space cxx:unsigned-int rule cxx:strip-qualified s/%cxx:qualified%//g -rule cxx:strip-args s/(%cxx:identifier%%cxx:template-args%?)%cxx:paren-args%/$1(...)/g +rule cxx:strip-args s/(%cxx:identifier%%cxx:template-args%?)%cxx:paren-args%(?!:)/$1(...)/g group cxx:strip-all cxx:strip-qualified cxx:strip-args _end_ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index badd3733a2a0cf3d90b2b9bb6c19fb553fcae686..47b79f5fd7ac9e2a036c40eeb10b41bb3bc3be2c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,6 +22,7 @@ set(BASE_SOURCES ) set(CUDA_SOURCES backends/gpu/fvm.cu + backends/gpu/multi_event_stream.cu memory/fill.cu ) diff --git a/src/backends/event.hpp b/src/backends/event.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0eae623fbd0b90abf493dfc44e6c53b3d2bc5047 --- /dev/null +++ b/src/backends/event.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include <common_types.hpp> + +// Structures for the representation of event delivery targets and +// staged events. + +namespace nest { +namespace mc { + +struct target_handle { + cell_local_size_type mech_id; // mechanism type identifier (per cell group). + cell_local_size_type index; // instance of the mechanism + cell_size_type cell_index; // which cell (acts as index into e.g. vec_t) + + target_handle() {} + target_handle(cell_local_size_type mech_id, cell_local_size_type index, cell_size_type cell_index): + mech_id(mech_id), index(index), cell_index(cell_index) {} +}; + +struct deliverable_event { + time_type time; + target_handle handle; + float weight; + + deliverable_event() {} + deliverable_event(time_type time, target_handle handle, float weight): + time(time), handle(handle), weight(weight) {} +}; + +} // namespace mc +} // namespace nest diff --git a/src/backends/event_delivery.md b/src/backends/event_delivery.md new file mode 100644 index 0000000000000000000000000000000000000000..796ccbae65b187d1120b20ef4aa2e544f3cd7399 --- /dev/null +++ b/src/backends/event_delivery.md @@ -0,0 +1,123 @@ +# Back-end event delivery + +## Data structures + +The lowered cell implementation gathers postsynaptic spike events from its governing +`cell_group` class, and then passes them on to concrete back-end device-specific +classes for on-device delivery. + +`backends/event.hpp` contains the concrete classes used to represent event +destinations and event information. + +The back-end event management structure is supplied by the corresponding `backend` +class as `backend::multi_event_stream`. It presents a limited public interface to +the lowered cell, and is passed by reference as a parameter to the mechanism +`deliver_events` method. + +### Target handles + +Target handles are used by the lowered cell implementation to identify a particular mechanism +instance that can receive events — ultimately via `net_receive` — and corresponding simulated +cell. The cell information is given as an index into the cell group collection of cells, +and is used to group events by integration domain (we have one domain per cell in each cell +group). + +Target handles are represented by the `target_handle` struct, opaque to `mc_cell_group`, +but created in the `fvm_multicell` for each point mechanism (synapse) in the cell group. + +### Deliverable events + +Events for delivery within the next integration period are staged in the lowered cell +as a vector of `deliverable_event` objects. These comprise an event delivery time, +a `target_handle` describing their destination, and a weight. + +### Back-end event streams + +`backend::multi_event_stream` represents a set (one per cell/integration domain) +of event streams. There is one `multi_event_stream` per lowered cell. + +From the perspective of the lowered cell, it must support the methods below. +In the following, `time` is a `view` or `const_view` of an array with one +element per stream. + +* `void init(const std::vector<deliverable_event>& staged_events)` + + Take a copy of the staged events (which must be ordered by increasing event time) + and initialize the streams by gathering the events by cell. + +* `bool empty() const` + + Return true if and only if there are no un-retired events left in any stream. + +* `void clear()` + + Retire all events, leaving the `multi_event_stream` in an empty state. + +* `void mark_until_after(const_view time)` + + For all streams, mark events for delivery in the _i_ th stream with event time ≤ _time[i]_. + +* `void event_times_if_before(view time) const` + + For each stream, set _time[i]_ to the time of the next event time in the _i_ th stream + if such an event exists and has time less than _time[i]_. + +* `void drop_marked_events()` + + Retire all marked events. + + +## Event delivery and integration timeline + +Event delivery is performed as part of the integration loop within the lowered +cell implementation. The interface is provided by the `multi_event_stream` +described above, together with the mechanism method that handles the delivery proper, +`mechanism::deliver_events` and a `backend` static method that computes the +integration step end time before considering any pending events. + +For `fvm_multicell` one integration step comprises: + +1. Events for each cell that are due at that cell's corresponding time are + gathered with `events_.mark_events(time_)` where `time_` is a + `const_view` of the cell times and `events_` is a reference to the + `backend::multi_event_stream` object. + +2. Each mechanism is requested to deliver to itself any marked events that + are associated with that mechanism, via the virtual + `mechanism::deliver_events(backend::multi_event_stream&)` method. + + This action must precede the computation of mechanism current contributions + with `mechanism::nrn_current()`. + +3. Marked events are discarded with `events_.drop_marked_events()`. + +4. The upper bound on the integration step stop time `time_to_` is + computed via `backend::update_time_to(time_to_, time_, dt_max_, tfinal_)`, + as the minimum of the per-cell time `time_` plus `dt_max_` and + the final integration stop time `tfinal_`. + +5. The integration step stop time `time_to_` is reduced to match any + pending events on each cell with `events_.event_times_if_before(time_to)`. + +6. The solver matrix is assembled and solved to compute the voltages, using the + newly computed currents and integration step times. + +7. The mechanism states are updated with `mechanism::nrn_state()`. + +8. The cell times `time_` are set to the integration step stop times `time_to_`. + +9. Spike detection for the last integration step is performed via the + `threshold_watcher_` object. + +## Consequences for the integrator + +Towards the end of the integration period, an integration step may have a zero _dt_ +for one or more cells within the group, and this needs to be handled correctly: + +* Generated mechanism `nrn_state()` methods should be numerically correct with + zero _dt_; a possibility is to guard the integration step with a _dt_ check. + +* Matrix assemble and solve must check for zero _dt_. In the FVM `multicore` + matrix implementation, zero _dt_ sets the diagonal element to zero and the + rhs to the voltage; the solve procedure then ignores cells with a zero + diagonal element. diff --git a/src/backends/fvm.hpp b/src/backends/fvm.hpp index adb9838b227a392ba5b1dc6556c937bea303104a..a12ce30ea3f83af44e98d13a1d3ac8e21eb38af6 100644 --- a/src/backends/fvm.hpp +++ b/src/backends/fvm.hpp @@ -13,7 +13,13 @@ struct null_backend: public multicore::backend { } static mechanism make_mechanism( - const std::string&, view, view, const std::vector<value_type>&, const std::vector<size_type>&) + const std::string&, + size_type, + const_iview, + const_view, const_view, const_view, + view, view, + const std::vector<value_type>&, + const std::vector<size_type>&) { throw std::runtime_error("attempt to use an unsupported back end"); } diff --git a/src/backends/gpu/fvm.hpp b/src/backends/gpu/fvm.hpp index 1d932a07b68b39e2ea8a07098358141b61a491d2..c7a4dbb6b782c2c1754537735b212ead9d6d296e 100644 --- a/src/backends/gpu/fvm.hpp +++ b/src/backends/gpu/fvm.hpp @@ -3,12 +3,16 @@ #include <map> #include <string> +#include <backends/event.hpp> #include <common_types.hpp> #include <mechanism.hpp> #include <memory/memory.hpp> +#include <util/rangeutil.hpp> +#include "kernels/time_ops.hpp" #include "matrix_state_interleaved.hpp" #include "matrix_state_flat.hpp" +#include "multi_event_stream.hpp" #include "stimulus.hpp" #include "threshold_watcher.hpp" @@ -47,6 +51,11 @@ struct backend { // matrix back end implementation using matrix_state = matrix_state_interleaved<value_type, size_type>; + using multi_event_stream = nest::mc::gpu::multi_event_stream; + + // re-expose common backend event types + using deliverable_event = nest::mc::deliverable_event; + using target_handle = nest::mc::target_handle; // mechanism infrastructure using ion = mechanisms::ion<backend>; @@ -57,6 +66,9 @@ struct backend { static mechanism make_mechanism( const std::string& name, + size_type mech_id, + const_iview vec_ci, + const_view vec_t, const_view vec_t_to, const_view vec_dt, view vec_v, view vec_i, const std::vector<value_type>& weights, const std::vector<size_type>& node_indices) @@ -66,7 +78,7 @@ struct backend { } return mech_map_.find(name)-> - second(vec_v, vec_i, memory::make_const_view(weights), memory::make_const_view(node_indices)); + second(mech_id, vec_ci, vec_t, vec_t_to, vec_dt, vec_v, vec_i, memory::make_const_view(weights), memory::make_const_view(node_indices)); } static bool has_mechanism(const std::string& name) { @@ -76,15 +88,49 @@ struct backend { using threshold_watcher = nest::mc::gpu::threshold_watcher<value_type, size_type>; -private: + // perform min/max reductions on 'array' type + template <typename V> + static std::pair<V, V> minmax_value(const memory::device_vector<V>& v) { + // TODO: consider/test replacement with CUDA kernel (or generic reduction kernel). + auto v_copy = memory::on_host(v); + return util::minmax_value(v_copy); + } + + // perform element-wise comparison on 'array' type against `t_test`. + template <typename V> + static bool any_time_before(const memory::device_vector<V>& t, V t_test) { + // Note: benchmarking (on a P100) indicates that using the gpu::any_time_before + // function is slower than the copy, unless we're running over ten thousands of + // cells per cell group. + // + // Commenting out for now, but consider a size-dependent test or adaptive choice. + + // return gpu::any_time_before(t.size(), t.data(), t_test); + + auto v_copy = memory::on_host(t); + return util::minmax_value(v_copy).first<t_test; + } - using maker_type = mechanism (*)(view, view, array&&, iarray&&); + static void update_time_to(array& time_to, const_view time, value_type dt, value_type tmax) { + nest::mc::gpu::update_time_to<value_type, size_type>(time_to.size(), time_to.data(), time.data(), dt, tmax); + } + + // set the per-cell and per-compartment dt_ from time_to_ - time_. + static void set_dt(array& dt_cell, array& dt_comp, const_view time_to, const_view time, const_iview cv_to_cell) { + size_type ncell = util::size(dt_cell); + size_type ncomp = util::size(dt_comp); + + nest::mc::gpu::set_dt<value_type, size_type>(ncell, ncomp, dt_cell.data(), dt_comp.data(), time_to.data(), time.data(), cv_to_cell.data()); + } + +private: + using maker_type = mechanism (*)(size_type, const_iview, const_view, const_view, const_view, view, view, array&&, iarray&&); static std::map<std::string, maker_type> mech_map_; template <template <typename> class Mech> - static mechanism maker(view vec_v, view vec_i, array&& weights, iarray&& node_indices) { + static mechanism maker(size_type mech_id, const_iview vec_ci, const_view vec_t, const_view vec_t_to, const_view vec_dt, view vec_v, view vec_i, array&& weights, iarray&& node_indices) { return mechanisms::make_mechanism<Mech<backend>> - (vec_v, vec_i, std::move(weights), std::move(node_indices)); + (mech_id, vec_ci, vec_t, vec_t_to, vec_dt, vec_v, vec_i, std::move(weights), std::move(node_indices)); } }; diff --git a/src/backends/gpu/kernels/assemble_matrix.hpp b/src/backends/gpu/kernels/assemble_matrix.hpp index 69d19733dcd9e36eaf5c5fbc24f806fd7fed1f50..1a4d6a4520fd4e926a27d29b8f09ea13f9552803 100644 --- a/src/backends/gpu/kernels/assemble_matrix.hpp +++ b/src/backends/gpu/kernels/assemble_matrix.hpp @@ -15,20 +15,40 @@ namespace gpu { template <typename T, typename I> __global__ void assemble_matrix_flat( - T* d, T* rhs, const T* invariant_d, - const T* voltage, const T* current, const T* cv_capacitance, - T dt, unsigned n) + T* d, + T* rhs, + const T* invariant_d, + const T* voltage, + const T* current, + const T* cv_capacitance, + const I* cv_to_cell, + const T* dt_cell, + unsigned n) { const unsigned tid = threadIdx.x + blockDim.x*blockIdx.x; - // The 1e-3 is a constant of proportionality required to ensure that the - // conductance (gi) values have units μS (micro-Siemens). - // See the model documentation in docs/model for more information. - T factor = 1e-3/dt; if (tid<n) { - auto gi = factor * cv_capacitance[tid]; - d[tid] = gi + invariant_d[tid]; - rhs[tid] = gi*voltage[tid] - current[tid]; + auto cid = cv_to_cell[tid]; + auto dt = dt_cell[cid]; + + // Note: dt==0 case is expected only at the end of a mindelay/2 + // integration period, and consequently divergence is unlikely + // to be a peformance problem. + + if (dt>0) { + // The 1e-3 is a constant of proportionality required to ensure that the + // conductance (gi) values have units μS (micro-Siemens). + // See the model documentation in docs/model for more information. + T factor = 1e-3/dt; + + auto gi = factor * cv_capacitance[tid]; + d[tid] = gi + invariant_d[tid]; + rhs[tid] = gi*voltage[tid] - current[tid]; + } + else { + d[tid] = 0; + rhs[tid] = voltage[tid]; + } } } @@ -49,7 +69,9 @@ void assemble_matrix_interleaved( const T* cv_capacitance, const I* sizes, const I* starts, - T dt, unsigned padded_size, unsigned num_mtx) + const I* matrix_to_cell, + const T* dt_cell, + unsigned padded_size, unsigned num_mtx) { static_assert(BlockWidth*LoadWidth==Threads, "number of threads must equal number of values to process per block"); @@ -76,10 +98,21 @@ void assemble_matrix_interleaved( const unsigned max_size = sizes[0]; - // The 1e-3 is a constant of proportionality required to ensure that the - // conductance (gi) values have units μS (micro-Siemens). - // See the model documentation in docs/model for more information. - T factor = 1e-3/dt; + T factor = 0; + T dt = 0; + const unsigned permuted_cid = blk_id*BlockWidth + blk_lane; + + if (permuted_cid<num_mtx) { + auto cid = matrix_to_cell[permuted_cid]; + dt = dt_cell[cid]; + + // The 1e-3 is a constant of proportionality required to ensure that the + // conductance (gi) values have units μS (micro-Siemens). + // See the model documentation in docs/model for more information. + + factor = dt>0? 1e-3/dt: 0; + } + for (unsigned j=0u; j<max_size; j+=LoadWidth) { if (do_load && load_pos<end) { buffer_v[lid] = voltage[load_pos]; @@ -90,8 +123,15 @@ void assemble_matrix_interleaved( if (j+blk_row<padded_size) { const auto gi = factor * cv_capacitance[store_pos]; - d[store_pos] = gi + invariant_d[store_pos]; - rhs[store_pos] = gi*buffer_v[blk_pos] - buffer_i[blk_pos]; + + if (dt>0) { + d[store_pos] = (gi + invariant_d[store_pos]); + rhs[store_pos] = (gi*buffer_v[blk_pos] - buffer_i[blk_pos]); + } + else { + d[store_pos] = 0; + rhs[store_pos] = buffer_v[blk_pos]; + } } __syncthreads(); diff --git a/src/backends/gpu/kernels/detail.hpp b/src/backends/gpu/kernels/detail.hpp index 8f3993faee6d47af106d4f05a1e33783b1befc18..e33e933bc36d47b98cc3716eca3349b08f0f8b81 100644 --- a/src/backends/gpu/kernels/detail.hpp +++ b/src/backends/gpu/kernels/detail.hpp @@ -75,6 +75,13 @@ constexpr bool is_npos(T v) { return v == npos<T>(); } +/// Cuda lerp by u on [a,b]: (1-u)*a + u*b. +template <typename T> +__host__ __device__ +inline T lerp(T a, T b, T u) { + return std::fma(u, b, std::fma(-u, a, a)); +} + } // namespace impl } // namespace gpu diff --git a/src/backends/gpu/kernels/solve_matrix.hpp b/src/backends/gpu/kernels/solve_matrix.hpp index 2cfa5f105f54f3a0e0e9c357f8dc3ad6ea7ee4b5..48e4a8b9addd31e9d9adefc87eded77c3a03031c 100644 --- a/src/backends/gpu/kernels/solve_matrix.hpp +++ b/src/backends/gpu/kernels/solve_matrix.hpp @@ -11,17 +11,21 @@ namespace gpu { template <typename T, typename I> __global__ void solve_matrix_flat( - T* rhs, T* d, const T* u, const I* p, const I* cell_index, int num_mtx) + T* rhs, T* d, const T* u, const I* p, const I* cell_cv_divs, int num_mtx) { auto tid = threadIdx.x + blockDim.x*blockIdx.x; if (tid<num_mtx) { // get range of this thread's cell matrix - const auto first = cell_index[tid]; - const auto last = cell_index[tid+1]; + const auto first = cell_cv_divs[tid]; + const auto last = cell_cv_divs[tid+1]; + + // Zero diagonal term implies dt==0; just leave rhs (for whole matrix) + // alone in that case. + if (d[last-1]==0) return; // backward sweep - for(auto i=last-1; i>first; --i) { + for (auto i=last-1; i>first; --i) { auto factor = u[i] / d[i]; d[p[i]] -= factor * u[i]; rhs[p[i]] -= factor * rhs[i]; @@ -55,6 +59,10 @@ void solve_matrix_interleaved( const auto last = first + BlockWidth*(sizes[tid]-1); const auto last_max = first + BlockWidth*(sizes[block_start]-1); + // Zero diagonal term implies dt==0; just leave rhs (for whole matrix) + // alone in that case. + if (d[last]==0) return; + // backward sweep for(auto i=last_max; i>first; i-=BlockWidth) { if (i<=last) { diff --git a/src/backends/gpu/kernels/test_thresholds.hpp b/src/backends/gpu/kernels/test_thresholds.hpp index 946030704607b87bad5531c8530f671fe52a48d8..4c23d13d67f6a0df63bf326e75c645c763f0f807 100644 --- a/src/backends/gpu/kernels/test_thresholds.hpp +++ b/src/backends/gpu/kernels/test_thresholds.hpp @@ -17,10 +17,11 @@ namespace gpu { template <typename T, typename I, typename Stack> __global__ void test_thresholds( - float t, float t_prev, int size, + const I* cv_to_cell, const T* t_after, const T* t_before, + int size, Stack& stack, I* is_crossed, T* prev_values, - const I* index, const T* values, const T* thresholds) + const I* cv_index, const T* values, const T* thresholds) { int i = threadIdx.x + blockIdx.x*blockDim.x; @@ -29,8 +30,10 @@ void test_thresholds( if (i<size) { // Test for threshold crossing + const auto cv = cv_index[i]; + const auto cell = cv_to_cell[cv]; const auto v_prev = prev_values[i]; - const auto v = values[index[i]]; + const auto v = values[cv]; const auto thresh = thresholds[i]; if (!is_crossed[i]) { @@ -38,7 +41,7 @@ void test_thresholds( // The threshold has been passed, so estimate the time using // linear interpolation auto pos = (thresh - v_prev)/(v - v_prev); - crossing_time = t_prev + pos*(t - t_prev); + crossing_time = impl::lerp(t_before[cell], t_after[cell], pos); is_crossed[i] = 1; crossed = true; diff --git a/src/backends/gpu/kernels/time_ops.hpp b/src/backends/gpu/kernels/time_ops.hpp new file mode 100644 index 0000000000000000000000000000000000000000..942710346d137c2611ed66d0efbd2c8840451367 --- /dev/null +++ b/src/backends/gpu/kernels/time_ops.hpp @@ -0,0 +1,113 @@ +#pragma once + +#include <type_traits> + +#include <cuda.h> + +#include <memory/wrappers.hpp> + +namespace nest { +namespace mc { +namespace gpu { + +namespace kernels { + template <typename T, typename I> + __global__ void update_time_to(I n, T* time_to, const T* time, T dt, T tmax) { + int i = threadIdx.x+blockIdx.x*blockDim.x; + if (i<n) { + auto t = time[i]+dt; + time_to[i] = t<tmax? t: tmax; + } + } + + // array-array comparison + template <typename T, typename I, typename Pred> + __global__ void array_reduce_any(I n, const T* x, const T* y, Pred p, int* rptr) { + int i = threadIdx.x+blockIdx.x*blockDim.x; + int cmp = i<n? p(x[i], y[i]): 0; + if (__syncthreads_or(cmp) && !threadIdx.x) { + *rptr=1; + } + } + + // array-scalar comparison + template <typename T, typename I, typename Pred> + __global__ void array_reduce_any(I n, const T* x, T y, Pred p, int* rptr) { + int i = threadIdx.x+blockIdx.x*blockDim.x; + int cmp = i<n? p(x[i], y): 0; + if (__syncthreads_or(cmp) && !threadIdx.x) { + *rptr=1; + } + } + + template <typename T> + struct less { + __device__ __host__ + bool operator()(const T& a, const T& b) const { return a<b; } + }; + + // vector minus: x = y - z + template <typename T, typename I> + __global__ void vec_minus(I n, T* x, const T* y, const T* z) { + int i = threadIdx.x+blockIdx.x*blockDim.x; + if (i<n) { + x[i] = y[i]-z[i]; + } + } + + // vector gather: x[i] = y[index[i]] + template <typename T, typename I> + __global__ void gather(I n, T* x, const T* y, const I* index) { + int i = threadIdx.x+blockIdx.x*blockDim.x; + if (i<n) { + x[i] = y[index[i]]; + } + } +} + +template <typename T, typename I> +void update_time_to(I n, T* time_to, const T* time, T dt, T tmax) { + if (!n) { + return; + } + + constexpr int blockwidth = 128; + int nblock = 1+(n-1)/blockwidth; + kernels::update_time_to<<<nblock, blockwidth>>>(n, time_to, time, dt, tmax); +} + +template <typename T, typename U, typename I> +bool any_time_before(I n, T* t0, U t1) { + static_assert(std::is_convertible<T*, U>::value || std::is_convertible<T, U>::value, + "third-argument must be a compatible scalar or pointer type"); + + static thread_local auto r = memory::device_vector<int>(1); + if (!n) { + return false; + } + + constexpr int blockwidth = 128; + int nblock = 1+(n-1)/blockwidth; + + r[0] = 0; + kernels::array_reduce_any<<<nblock, blockwidth>>>(n, t0, t1, kernels::less<T>(), r.data()); + return r[0]; +} + +template <typename T, typename I> +void set_dt(I ncell, I ncomp, T* dt_cell, T* dt_comp, const T* time_to, const T* time, const I* cv_to_cell) { + if (!ncell || !ncomp) { + return; + } + + constexpr int blockwidth = 128; + int nblock = 1+(ncell-1)/blockwidth; + kernels::vec_minus<<<nblock, blockwidth>>>(ncell, dt_cell, time_to, time); + + nblock = 1+(ncomp-1)/blockwidth; + kernels::gather<<<nblock, blockwidth>>>(ncomp, dt_comp, dt_cell, cv_to_cell); +} + +} // namespace gpu +} // namespace mc +} // namespace nest diff --git a/src/backends/gpu/matrix_state_flat.hpp b/src/backends/gpu/matrix_state_flat.hpp index d214079f7f5841a43c6f6c52cf9992e3e0a635e3..dc7f9a40fa095399a2ff357dfc30660bb245389f 100644 --- a/src/backends/gpu/matrix_state_flat.hpp +++ b/src/backends/gpu/matrix_state_flat.hpp @@ -1,7 +1,9 @@ #pragma once #include <memory/memory.hpp> +#include <memory/wrappers.hpp> #include <util/span.hpp> +#include <util/partition.hpp> #include <util/rangeutil.hpp> #include "kernels/solve_matrix.hpp" @@ -24,7 +26,8 @@ struct matrix_state_flat { using const_view = typename array::const_view_type; iarray parent_index; - iarray cell_index; + iarray cell_cv_divs; + iarray cv_to_cell; array d; // [μS] array u; // [μS] @@ -36,17 +39,15 @@ struct matrix_state_flat { // the invariant part of the matrix diagonal array invariant_d; // [μS] - // interface for exposing the solution to the outside world - view solution; - matrix_state_flat() = default; matrix_state_flat(const std::vector<size_type>& p, - const std::vector<size_type>& cell_idx, + const std::vector<size_type>& cell_cv_divs, const std::vector<value_type>& cv_cap, const std::vector<value_type>& face_cond): parent_index(memory::make_const_view(p)), - cell_index(memory::make_const_view(cell_idx)), + cell_cv_divs(memory::make_const_view(cell_cv_divs)), + cv_to_cell(p.size()), d(p.size()), u(p.size()), rhs(p.size()), @@ -54,12 +55,13 @@ struct matrix_state_flat { { EXPECTS(cv_cap.size() == size()); EXPECTS(face_cond.size() == size()); - EXPECTS(cell_idx.back() == size()); - EXPECTS(cell_idx.size() > 2u); + EXPECTS(cell_cv_divs.back() == size()); + EXPECTS(cell_cv_divs.size() > 1u); using memory::make_const_view; auto n = d.size(); + std::vector<size_type> cv_to_cell_tmp(n, 0); std::vector<value_type> invariant_d_tmp(n, 0); std::vector<value_type> u_tmp(n, 0); @@ -70,18 +72,29 @@ struct matrix_state_flat { invariant_d_tmp[i] += gij; invariant_d_tmp[p[i]] += gij; } + + size_type ci = 0; + for (auto cv_span: util::partition_view(cell_cv_divs)) { + util::fill(util::subrange_view(cv_to_cell_tmp, cv_span), ci); + ++ci; + } + + cv_to_cell = make_const_view(cv_to_cell_tmp); invariant_d = make_const_view(invariant_d_tmp); u = make_const_view(u_tmp); + } - solution = rhs; + // interface for exposing the solution to the outside world + const_view solution() const { + return memory::make_view(rhs); } // Assemble the matrix - // Afterwards the diagonal and RHS will have been set given dt, voltage and current - // dt [ms] + // Afterwards the diagonal and RHS will have been set given dt, voltage and current. + // dt_cell [ms] (per cell) // voltage [mV] // current [nA] - void assemble(value_type dt, const_view voltage, const_view current) { + void assemble(const_view dt_cell, const_view voltage, const_view current) { // determine the grid dimensions for the kernel auto const n = voltage.size(); auto const block_dim = 128; @@ -89,7 +102,7 @@ struct matrix_state_flat { assemble_matrix_flat<value_type, size_type><<<grid_dim, block_dim>>> ( d.data(), rhs.data(), invariant_d.data(), voltage.data(), - current.data(), cv_capacitance.data(), dt, size()); + current.data(), cv_capacitance.data(), cv_to_cell.data(), dt_cell.data(), size()); } void solve() { @@ -100,7 +113,7 @@ struct matrix_state_flat { // perform solve on gpu solve_matrix_flat<value_type, size_type><<<grid_dim, block_dim>>> ( rhs.data(), d.data(), u.data(), parent_index.data(), - cell_index.data(), num_matrices()); + cell_cv_divs.data(), num_matrices()); } std::size_t size() const { @@ -109,7 +122,7 @@ struct matrix_state_flat { private: unsigned num_matrices() const { - return cell_index.size()-1; + return cell_cv_divs.size()-1; } }; diff --git a/src/backends/gpu/matrix_state_interleaved.hpp b/src/backends/gpu/matrix_state_interleaved.hpp index 2f1888395dbaf7624edef3813ad1a9984170e697..a6a127e879a789e9506035180f1195be7555edfe 100644 --- a/src/backends/gpu/matrix_state_interleaved.hpp +++ b/src/backends/gpu/matrix_state_interleaved.hpp @@ -1,7 +1,9 @@ #pragma once #include <memory/memory.hpp> +#include <util/debug.hpp> #include <util/span.hpp> +#include <util/partition.hpp> #include <util/rangeutil.hpp> #include "kernels/solve_matrix.hpp" @@ -55,6 +57,8 @@ struct matrix_state_interleaved { iarray matrix_sizes; // start values corresponding to matrix i in the external storage iarray matrix_index; + // map from matrix index (after permutation) to original cell index + iarray matrix_to_cell_index; // Storage for the matrix and parent index in interleaved format. // Includes the cv_capacitance, which is required for matrix assembly. @@ -77,7 +81,7 @@ struct matrix_state_interleaved { // Storage for solution in uninterleaved format. // Used to hold the storage for passing to caller, and must be updated // after each call to the ::solve() method. - array solution; + array solution_; // default constructor matrix_state_interleaved() = default; @@ -89,18 +93,19 @@ struct matrix_state_interleaved { // cv_cap // [pF] // face_cond // [μS] matrix_state_interleaved(const std::vector<size_type>& p, - const std::vector<size_type>& cell_idx, + const std::vector<size_type>& cell_cv_divs, const std::vector<value_type>& cv_cap, const std::vector<value_type>& face_cond) { EXPECTS(cv_cap.size() == p.size()); EXPECTS(face_cond.size() == p.size()); - EXPECTS(cell_idx.back() == p.size()); + EXPECTS(cell_cv_divs.back() == p.size()); // Just because you never know. - EXPECTS(cell_idx.size() <= UINT_MAX); + EXPECTS(cell_cv_divs.size() <= UINT_MAX); using util::make_span; + using util::indirect_view; // Convenience for commonly used type in this routine. using svec = std::vector<size_type>; @@ -110,11 +115,11 @@ struct matrix_state_interleaved { // // Find the size of each matrix. - const auto num_mtx = cell_idx.size()-1; svec sizes; - for (auto it=cell_idx.begin()+1; it!=cell_idx.end(); ++it) { - sizes.push_back(*it - *(it-1)); + for (auto cv_span: util::partition_view(cell_cv_divs)) { + sizes.push_back(cv_span.second-cv_span.first); } + const auto num_mtx = sizes.size(); // Find permutations and sort indexes/sizes. svec perm(num_mtx); @@ -123,15 +128,10 @@ struct matrix_state_interleaved { util::stable_sort_by(perm, [&sizes](size_type i){ return sizes[i]; }); std::reverse(perm.begin(), perm.end()); - // TODO: refactor to be less verbose with permutation_view - svec sizes_p; - for (auto i: make_span(0, num_mtx)) { - sizes_p.push_back(sizes[perm[i]]); - } - svec cell_index_p; - for (auto i: make_span(0, num_mtx)) { - cell_index_p.push_back(cell_idx[perm[i]]); - } + svec sizes_p = util::assign_from(indirect_view(sizes, perm)); + + auto cell_to_cv = util::subrange_view(cell_cv_divs, 0, num_mtx); + svec cell_to_cv_p = util::assign_from(indirect_view(cell_to_cv, perm)); // // Calculate dimensions required to store matrices. @@ -154,7 +154,7 @@ struct matrix_state_interleaved { auto lane = mtx%block_dim(); auto len = sizes_p[mtx]; - auto src = cell_index_p[mtx]; + auto src = cell_to_cv_p[mtx]; auto dst = block*(block_dim()*padded_size) + lane; for (auto i: make_span(0, len)) { // the p indexes are always relative to the start of the p vector. @@ -189,25 +189,30 @@ struct matrix_state_interleaved { // memory, for use as an rvalue in an assignemt to a device vector. auto interleave = [&] (std::vector<T>const& x) { return memory::on_gpu( - flat_to_interleaved(x, sizes_p, cell_index_p, block_dim(), num_mtx, padded_size)); + flat_to_interleaved(x, sizes_p, cell_to_cv_p, block_dim(), num_mtx, padded_size)); }; u = interleave(u_tmp); invariant_d = interleave(invariant_d_tmp); cv_capacitance = interleave(cv_cap); matrix_sizes = memory::make_const_view(sizes_p); - matrix_index = memory::make_const_view(cell_index_p); + matrix_index = memory::make_const_view(cell_to_cv_p); + matrix_to_cell_index = memory::make_const_view(perm); // Allocate space for storing the un-interleaved solution. - solution = array(p.size()); + solution_ = array(p.size()); + } + + const_view solution() const { + return solution_; } // Assemble the matrix - // Afterwards the diagonal and RHS will have been set given dt, voltage and current - // dt [ms] + // Afterwards the diagonal and RHS will have been set given dt, voltage and current. + // dt_cell [ms] (per cell) // voltage [mV] // current [nA] - void assemble(value_type dt, const_view voltage, const_view current) { + void assemble(const_view dt_cell, const_view voltage, const_view current) { constexpr auto bd = impl::block_dim(); constexpr auto lw = impl::load_width(); constexpr auto block_dim = bd*lw; @@ -220,7 +225,8 @@ struct matrix_state_interleaved { (d.data(), rhs.data(), invariant_d.data(), voltage.data(), current.data(), cv_capacitance.data(), matrix_sizes.data(), matrix_index.data(), - dt, padded_matrix_size(), num_matrices()); + matrix_to_cell_index.data(), + dt_cell.data(), padded_matrix_size(), num_matrices()); } @@ -234,7 +240,7 @@ struct matrix_state_interleaved { // Copy the solution from interleaved to front end storage. interleaved_to_flat<value_type, size_type, impl::block_dim(), impl::load_width()> - (rhs.data(), solution.data(), matrix_sizes.data(), matrix_index.data(), + (rhs.data(), solution_.data(), matrix_sizes.data(), matrix_index.data(), padded_matrix_size(), num_matrices()); } diff --git a/src/backends/gpu/multi_event_stream.cu b/src/backends/gpu/multi_event_stream.cu new file mode 100644 index 0000000000000000000000000000000000000000..cc19e215c14d191a0e0ac04b579dcad86933025e --- /dev/null +++ b/src/backends/gpu/multi_event_stream.cu @@ -0,0 +1,157 @@ +#include <common_types.hpp> +#include <backends/event.hpp> +#include <backends/gpu/multi_event_stream.hpp> +#include <memory/array.hpp> +#include <memory/copy.hpp> +#include <util/rangeutil.hpp> + +namespace nest { +namespace mc { +namespace gpu { + +namespace kernels { + template <typename T, typename I> + __global__ void mark_until_after( + I n, + I* mark, + const I* span_end, + const T* ev_time, + const T* t_until) + { + I i = threadIdx.x+blockIdx.x*blockDim.x; + if (i<n) { + auto t = t_until[i]; + auto end = span_end[i]; + auto &m = mark[i]; + + while (m!=end && !(ev_time[m]>t)) { + ++m; + } + } + } + + template <typename T, typename I> + __global__ void drop_marked_events( + I n, + I* n_nonempty, + I* span_begin, + const I* span_end, + const I* mark) + { + I i = threadIdx.x+blockIdx.x*blockDim.x; + if (i<n) { + bool emptied = (span_begin[i]<span_end[i] && mark[i]==span_end[i]); + span_begin[i] = mark[i]; + if (emptied) { + atomicAdd(n_nonempty, (I)-1); + } + } + } + + template <typename T, typename I> + __global__ void event_time_if_before( + I n, + const I* span_begin, + const I* span_end, + const T* ev_time, + T* t_until) + { + I i = threadIdx.x+blockIdx.x*blockDim.x; + if (i<n) { + if (span_begin[i]<span_end[i]) { + auto ev_t = ev_time[span_begin[i]]; + if (t_until[i]>ev_t) { + t_until[i] = ev_t; + } + } + } + } +} // namespace kernels + +void multi_event_stream::clear() { + memory::fill(span_begin_, 0u); + memory::fill(span_end_, 0u); + memory::fill(mark_, 0u); + n_nonempty_stream_[0] = 0; +} + +void multi_event_stream::init(std::vector<deliverable_event> staged) { + if (staged.size()>std::numeric_limits<size_type>::max()) { + throw std::range_error("too many events"); + } + + // Build vectors in host memory here and transfer to the device at end. + std::size_t n_ev = staged.size(); + + std::vector<size_type> divisions(n_stream_+1, 0); + std::vector<size_type> tmp_ev_indices(n_ev); + std::vector<value_type> tmp_ev_values(n_ev); + + ev_time_ = array(n_ev); + ev_weight_ = array(n_ev); + ev_mech_id_ = iarray(n_ev); + ev_index_ = iarray(n_ev); + + util::stable_sort_by(staged, [](const deliverable_event& e) { return e.handle.cell_index; }); + + // Split out event fields and copy to device. + util::assign_by(tmp_ev_values, staged, [](const deliverable_event& e) { return e.weight; }); + memory::copy(tmp_ev_values, ev_weight_); + + util::assign_by(tmp_ev_values, staged, [](const deliverable_event& e) { return e.time; }); + memory::copy(tmp_ev_values, ev_time_); + + util::assign_by(tmp_ev_indices, staged, [](const deliverable_event& e) { return e.handle.mech_id; }); + memory::copy(tmp_ev_indices, ev_mech_id_); + + util::assign_by(tmp_ev_indices, staged, [](const deliverable_event& e) { return e.handle.index; }); + memory::copy(tmp_ev_indices, ev_index_); + + // Determine divisions by `cell_index` in ev list and copy to device. + size_type ev_i = 0; + size_type n_nonempty = 0; + for (size_type s = 1; s<=n_stream_; ++s) { + while (ev_i<n_ev && staged[ev_i].handle.cell_index<s) ++ev_i; + divisions[s] = ev_i; + n_nonempty += (divisions[s]!=divisions[s-1]); + } + + memory::copy(memory::make_view(divisions)(0,n_stream_), span_begin_); + memory::copy(memory::make_view(divisions)(1,n_stream_+1), span_end_); + memory::copy(span_begin_, mark_); + n_nonempty_stream_[0] = n_nonempty; +} + + +// Designate for processing events `ev` at head of each event stream `i` +// until `event_time(ev)` > `t_until[i]`. +void multi_event_stream::mark_until_after(const_view t_until) { + EXPECTS(n_streams()==util::size(t_until)); + + constexpr int blockwidth = 128; + int nblock = 1+(n_stream_-1)/blockwidth; + kernels::mark_until_after<value_type, size_type><<<nblock, blockwidth>>>( + n_stream_, mark_.data(), span_end_.data(), ev_time_.data(), t_until.data()); +} + +// Remove marked events from front of each event stream. +void multi_event_stream::drop_marked_events() { + constexpr int blockwidth = 128; + int nblock = 1+(n_stream_-1)/blockwidth; + kernels::drop_marked_events<value_type, size_type><<<nblock, blockwidth>>>( + n_stream_, n_nonempty_stream_.data(), span_begin_.data(), span_end_.data(), mark_.data()); +} + +// If the head of `i`th event stream exists and has time less than `t_until[i]`, set +// `t_until[i]` to the event time. +void multi_event_stream::event_time_if_before(view t_until) { + constexpr int blockwidth = 128; + int nblock = 1+(n_stream_-1)/blockwidth; + kernels::event_time_if_before<value_type, size_type><<<nblock, blockwidth>>>( + n_stream_, span_begin_.data(), span_end_.data(), ev_time_.data(), t_until.data()); +} + + +} // namespace gpu +} // namespace nest +} // namespace mc diff --git a/src/backends/gpu/multi_event_stream.hpp b/src/backends/gpu/multi_event_stream.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9fcce25f17d408da3bbd7a5e1c4e1ee4ac94a52c --- /dev/null +++ b/src/backends/gpu/multi_event_stream.hpp @@ -0,0 +1,83 @@ +#pragma once + +// Indexed collection of pop-only event queues --- multicore back-end implementation. + +#include <common_types.hpp> +#include <backends/event.hpp> +#include <memory/array.hpp> +#include <memory/copy.hpp> + +namespace nest { +namespace mc { +namespace gpu { + +class multi_event_stream { +public: + using size_type = cell_size_type; + using value_type = double; + + using array = memory::device_vector<value_type>; + using iarray = memory::device_vector<size_type>; + + using const_view = array::const_view_type; + using view = array::view_type; + + multi_event_stream() {} + + explicit multi_event_stream(size_type n_stream): + n_stream_(n_stream), + span_begin_(n_stream), + span_end_(n_stream), + mark_(n_stream), + n_nonempty_stream_(1) + {} + + size_type n_streams() const { return n_stream_; } + + bool empty() const { return n_nonempty_stream_[0]==0; } + + void clear(); + + // Initialize event streams from a vector of `deliverable_event`. + void init(std::vector<deliverable_event> staged); + + // Designate for processing events `ev` at head of each event stream `i` + // until `event_time(ev)` > `t_until[i]`. + void mark_until_after(const_view t_until); + + // Remove marked events from front of each event stream. + void drop_marked_events(); + + // If the head of `i`th event stream exists and has time less than `t_until[i]`, set + // `t_until[i]` to the event time. + void event_time_if_before(view t_until); + + // Interface for access by mechanism kernels: + struct span_state { + size_type n; + const size_type* ev_mech_id; + const size_type* ev_index; + const value_type* ev_weight; + const size_type* span_begin; + const size_type* mark; + }; + + span_state delivery_data() const { + return {n_stream_, ev_mech_id_.data(), ev_index_.data(), ev_weight_.data(), span_begin_.data(), mark_.data()}; + } + +private: + size_type n_stream_; + array ev_time_; + array ev_weight_; + iarray ev_mech_id_; + iarray ev_index_; + iarray span_begin_; + iarray span_end_; + iarray mark_; + iarray n_nonempty_stream_; +}; + +} // namespace gpu +} // namespace nest +} // namespace mc diff --git a/src/backends/gpu/stimulus.hpp b/src/backends/gpu/stimulus.hpp index 6cc41703dba92be14fd0a87cad59ccf20da0691f..28d36f8565e4ccc1fa2d697e49a03661729ece8e 100644 --- a/src/backends/gpu/stimulus.hpp +++ b/src/backends/gpu/stimulus.hpp @@ -19,7 +19,7 @@ namespace kernels { __global__ void stim_current( const T* delay, const T* duration, const T* amplitude, - const I* node_index, int n, T t, T* current) + const I* node_index, int n, const I* cell_index, const T* time, T* current) { using value_type = T; using iarray = I; @@ -27,7 +27,8 @@ namespace kernels { auto i = threadIdx.x + blockDim.x*blockIdx.x; if (i<n) { - if (t>=delay[i] && t<(delay[i]+duration[i])) { + auto t = time[cell_index[i]]; + if (t>=delay[i] && t<delay[i]+duration[i]) { // use subtraction because the electrode currents are specified // in terms of current into the compartment cuda_atomic_add(current+node_index[i], -amplitude[i]); @@ -47,11 +48,14 @@ public: using iarray = typename base::iarray; using view = typename base::view; using iview = typename base::iview; + using const_view = typename base::const_view; using const_iview = typename base::const_iview; using ion_type = typename base::ion_type; - stimulus(view vec_v, view vec_i, iarray&& node_index): - base(vec_v, vec_i, std::move(node_index)) + static constexpr size_type no_mech_id = (size_type)-1; + + stimulus(const_iview vec_ci, const_view vec_t, const_view vec_t_to, const_view vec_dt, view vec_v, view vec_i, iarray&& node_index): + base(no_mech_id, vec_ci, vec_t, vec_t_to, vec_dt, vec_v, vec_i, std::move(node_index)) {} using base::size; @@ -60,10 +64,7 @@ public: return 0; } - void set_params(value_type t_, value_type dt_) override { - t = t_; - dt = dt_; - } + void set_params() override {} std::string name() const override { return "stimulus"; @@ -114,19 +115,18 @@ public: kernels::stim_current<value_type, size_type><<<dim_grid, dim_block>>>( delay.data(), duration.data(), amplitude.data(), - node_index_.data(), n, t, + node_index_.data(), n, vec_ci_.data(), vec_t_.data(), vec_i_.data() ); } - value_type dt = 0; - value_type t = 0; - array amplitude; array duration; array delay; + using base::vec_ci_; + using base::vec_t_; using base::vec_v_; using base::vec_i_; using base::node_index_; diff --git a/src/backends/gpu/threshold_watcher.hpp b/src/backends/gpu/threshold_watcher.hpp index e4624c077239eff3a5a74cda42c60939b1ea8ae8..1d28bb329d01e587ee3bdaae5dc2f5efbffeedf7 100644 --- a/src/backends/gpu/threshold_watcher.hpp +++ b/src/backends/gpu/threshold_watcher.hpp @@ -23,6 +23,7 @@ public: using array = memory::device_vector<T>; using iarray = memory::device_vector<I>; using const_view = typename array::const_view_type; + using const_iview = typename iarray::const_view_type; /// stores a single crossing event struct threshold_crossing { @@ -41,18 +42,24 @@ public: threshold_watcher() = default; threshold_watcher( + const_iview vec_ci, + const_view vec_t_before, + const_view vec_t_after, const_view values, const std::vector<size_type>& index, const std::vector<value_type>& thresh, value_type t=0): + cv_to_cell_(vec_ci), + t_before_(vec_t_before), + t_after_(vec_t_after), values_(values), - index_(memory::make_const_view(index)), + cv_index_(memory::make_const_view(index)), thresholds_(memory::make_const_view(thresh)), prev_values_(values), is_crossed_(size()), stack_(memory::make_managed_ptr<stack_type>(10*size())) { - reset(t); + reset(); } /// Remove all stored crossings that were detected in previous calls @@ -64,26 +71,23 @@ public: /// Reset state machine for each detector. /// Assume that the values in values_ have been set correctly before /// calling, because the values are used to determine the initial state - void reset(value_type t=0) { + void reset() { clear_crossings(); // Make host-side copies of the information needed to calculate // the initial crossed state auto values = memory::on_host(values_); auto thresholds = memory::on_host(thresholds_); - auto index = memory::on_host(index_); + auto cv_index = memory::on_host(cv_index_); // calculate the initial crossed state in host memory - auto crossed = std::vector<size_type>(size()); + std::vector<size_type> crossed(size()); for (auto i: util::make_span(0u, size())) { - crossed[i] = values[index[i]] < thresholds[i] ? 0 : 1; + crossed[i] = values[cv_index[i]] < thresholds[i] ? 0 : 1; } // copy the initial crossed state to device memory - is_crossed_ = memory::on_gpu(crossed); - - // reset time of last test - t_prev_ = t; + memory::copy(crossed, is_crossed_); } bool is_crossed(size_type i) const { @@ -94,36 +98,28 @@ public: return std::vector<threshold_crossing>(stack_->begin(), stack_->end()); } - /// The time at which the last test was performed - value_type last_test_time() const { - return t_prev_; - } - /// Tests each target for changed threshold state. /// Crossing events are recorded for each threshold that has been /// crossed since current time t, and the last time the test was /// performed. - void test(value_type t) { - EXPECTS(t_prev_<t); - + void test() { constexpr int block_dim = 128; const int grid_dim = (size()+block_dim-1)/block_dim; test_thresholds<<<grid_dim, block_dim>>>( - t, t_prev_, size(), + cv_to_cell_.data(), t_after_.data(), t_before_.data(), + size(), *stack_, is_crossed_.data(), prev_values_.data(), - index_.data(), values_.data(), thresholds_.data()); + cv_index_.data(), values_.data(), thresholds_.data()); // Check that the number of spikes has not exceeded // the capacity of the stack. EXPECTS(stack_->size() <= stack_->capacity()); - - t_prev_ = t; } /// the number of threashold values that are being monitored std::size_t size() const { - return index_.size(); + return cv_index_.size(); } /// Data type used to store the crossings. @@ -131,13 +127,14 @@ public: using crossing_list = std::vector<threshold_crossing>; private: - + const_iview cv_to_cell_; // index to cell mapping: on gpu + const_view t_before_; // times per cell corresponding to prev_values_: on gpu + const_view t_after_; // times per cell corresponding to values_: on gpu const_view values_; // values to watch: on gpu - iarray index_; // indexes of values to watch: on gpu + iarray cv_index_; // compartment indexes of values to watch: on gpu array thresholds_; // threshold for each watch: on gpu - value_type t_prev_; // time of previous sample: on host - array prev_values_; // values at previous sample time: on host + array prev_values_; // values at previous sample time: on gpu iarray is_crossed_; // bool flag for state of each watch: on gpu memory::managed_ptr<stack_type> stack_; diff --git a/src/backends/multicore/fvm.hpp b/src/backends/multicore/fvm.hpp index 08e919ded66de399494e149291edd2e4f3cfe28a..fa3b1b14e46b5492a7a3735b5a97b6fee119d462 100644 --- a/src/backends/multicore/fvm.hpp +++ b/src/backends/multicore/fvm.hpp @@ -3,13 +3,18 @@ #include <map> #include <string> +#include <backends/event.hpp> #include <common_types.hpp> +#include <event_queue.hpp> #include <mechanism.hpp> #include <memory/memory.hpp> #include <memory/wrappers.hpp> +#include <util/meta.hpp> +#include <util/rangeutil.hpp> #include <util/span.hpp> #include "matrix_state.hpp" +#include "multi_event_stream.hpp" #include "stimulus.hpp" #include "threshold_watcher.hpp" @@ -42,9 +47,12 @@ struct backend { using host_view = view; using host_iview = iview; - /// matrix state - using matrix_state = - nest::mc::multicore::matrix_state<value_type, size_type>; + using matrix_state = nest::mc::multicore::matrix_state<value_type, size_type>; + using multi_event_stream = nest::mc::multicore::multi_event_stream; + + // re-expose common backend event types + using deliverable_event = nest::mc::deliverable_event; + using target_handle = nest::mc::target_handle; // // mechanism infrastructure @@ -57,6 +65,9 @@ struct backend { static mechanism make_mechanism( const std::string& name, + size_type mech_id, + const_iview vec_ci, + const_view vec_t, const_view vec_t_to, const_view vec_dt, view vec_v, view vec_i, const std::vector<value_type>& weights, const std::vector<size_type>& node_indices) @@ -65,7 +76,7 @@ struct backend { throw std::out_of_range("no mechanism in database : " + name); } - return mech_map_.find(name)->second(vec_v, vec_i, array(weights), iarray(node_indices)); + return mech_map_.find(name)->second(mech_id, vec_ci, vec_t, vec_t_to, vec_dt, vec_v, vec_i, array(weights), iarray(node_indices)); } static bool has_mechanism(const std::string& name) { @@ -82,15 +93,47 @@ struct backend { nest::mc::multicore::threshold_watcher<value_type, size_type>; -private: + // perform min/max reductions on 'array' type + template <typename V> + static std::pair<V, V> minmax_value(const memory::host_vector<V>& v) { + return util::minmax_value(v); + } + + // perform element-wise comparison on 'array' type against `t_test`. + template <typename V> + static bool any_time_before(const memory::host_vector<V>& t, V t_test) { + return minmax_value(t).first<t_test; + } + + static void update_time_to(array& time_to, const_view time, value_type dt, value_type tmax) { + size_type ncell = util::size(time); + for (size_type i = 0; i<ncell; ++i) { + time_to[i] = std::min(time[i]+dt, tmax); + } + } - using maker_type = mechanism (*)(view, view, array&&, iarray&&); + // set the per-cell and per-compartment dt_ from time_to_ - time_. + static void set_dt(array& dt_cell, array& dt_comp, const_view time_to, const_view time, const_iview cv_to_cell) { + size_type ncell = util::size(dt_cell); + size_type ncomp = util::size(dt_comp); + + for (size_type j = 0; j<ncell; ++j) { + dt_cell[j] = time_to[j]-time[j]; + } + + for (size_type i = 0; i<ncomp; ++i) { + dt_comp[i] = dt_cell[cv_to_cell[i]]; + } + } + +private: + using maker_type = mechanism (*)(value_type, const_iview, const_view, const_view, const_view, view, view, array&&, iarray&&); static std::map<std::string, maker_type> mech_map_; template <template <typename> class Mech> - static mechanism maker(view vec_v, view vec_i, array&& weights, iarray&& node_indices) { + static mechanism maker(value_type mech_id, const_iview vec_ci, const_view vec_t, const_view vec_t_to, const_view vec_dt, view vec_v, view vec_i, array&& weights, iarray&& node_indices) { return mechanisms::make_mechanism<Mech<backend>> - (vec_v, vec_i, std::move(weights), std::move(node_indices)); + (mech_id, vec_ci, vec_t, vec_t_to, vec_dt, vec_v, vec_i, std::move(weights), std::move(node_indices)); } }; diff --git a/src/backends/multicore/matrix_state.hpp b/src/backends/multicore/matrix_state.hpp index 0414a9d02ce7f6d30d9535b3c5ca7ab0553c9c12..15507ca0bc0a8c7f8abf6b16922677daba4b165c 100644 --- a/src/backends/multicore/matrix_state.hpp +++ b/src/backends/multicore/matrix_state.hpp @@ -1,6 +1,7 @@ #pragma once #include <memory/memory.hpp> +#include <util/partition.hpp> #include <util/span.hpp> namespace nest { @@ -17,7 +18,7 @@ public: using const_view = typename array::const_view_type; using iarray = memory::host_vector<size_type>; iarray parent_index; - iarray cell_index; + iarray cell_cv_divs; array d; // [μS] array u; // [μS] @@ -29,23 +30,21 @@ public: // the invariant part of the matrix diagonal array invariant_d; // [μS] - const_view solution; - matrix_state() = default; matrix_state(const std::vector<size_type>& p, - const std::vector<size_type>& cell_idx, + const std::vector<size_type>& cell_cv_divs, const std::vector<value_type>& cap, const std::vector<value_type>& cond): parent_index(memory::make_const_view(p)), - cell_index(memory::make_const_view(cell_idx)), + cell_cv_divs(memory::make_const_view(cell_cv_divs)), d(size(), 0), u(size(), 0), rhs(size()), cv_capacitance(memory::make_const_view(cap)), face_conductance(memory::make_const_view(cond)) { EXPECTS(cap.size() == size()); EXPECTS(cond.size() == size()); - EXPECTS(cell_idx.back() == size()); + EXPECTS(cell_cv_divs.back() == size()); auto n = size(); invariant_d = array(n, 0); @@ -56,49 +55,66 @@ public: invariant_d[i] += gij; invariant_d[p[i]] += gij; } + } + const_view solution() const { // In this back end the solution is a simple view of the rhs, which // contains the solution after the matrix_solve is performed. - solution = rhs; + return const_view(rhs); } + // Assemble the matrix - // Afterwards the diagonal and RHS will have been set given dt, voltage and current - // dt [ms] + // Afterwards the diagonal and RHS will have been set given dt, voltage and current. + // dt_cell [ms] (per cell) // voltage [mV] // current [nA] - void assemble(value_type dt, const_view voltage, const_view current) { - auto n = size(); - value_type factor = 1e-3/dt; - for (auto i: util::make_span(0u, n)) { - auto gi = factor*cv_capacitance[i]; + void assemble(const_view dt_cell, const_view voltage, const_view current) { + auto cell_cv_part = util::partition_view(cell_cv_divs); + const size_type ncells = cell_cv_part.size(); + + // loop over submatrices + for (auto m: util::make_span(0, ncells)) { + auto dt = dt_cell[m]; - d[i] = gi + invariant_d[i]; + if (dt>0) { + value_type factor = 1e-3/dt; + for (auto i: util::make_span(cell_cv_part[m])) { + auto gi = factor*cv_capacitance[i]; - rhs[i] = gi*voltage[i] - current[i]; + d[i] = gi + invariant_d[i]; + rhs[i] = gi*voltage[i] - current[i]; + } + } + else { + for (auto i: util::make_span(cell_cv_part[m])) { + d[i] = 0; + rhs[i] = voltage[i]; + } + } } } void solve() { - const size_type ncells = cell_index.size()-1; - // loop over submatrices - for (auto m: util::make_span(0, ncells)) { - auto first = cell_index[m]; - auto last = cell_index[m+1]; - - // backward sweep - for(auto i=last-1; i>first; --i) { - auto factor = u[i] / d[i]; - d[parent_index[i]] -= factor * u[i]; - rhs[parent_index[i]] -= factor * rhs[i]; - } - rhs[first] /= d[first]; - - // forward sweep - for(auto i=first+1; i<last; ++i) { - rhs[i] -= u[i] * rhs[parent_index[i]]; - rhs[i] /= d[i]; + for (auto cv_span: util::partition_view(cell_cv_divs)) { + auto first = cv_span.first; + auto last = cv_span.second; // one past the end + + if (d[first]!=0) { + // backward sweep + for(auto i=last-1; i>first; --i) { + auto factor = u[i] / d[i]; + d[parent_index[i]] -= factor * u[i]; + rhs[parent_index[i]] -= factor * rhs[i]; + } + rhs[first] /= d[first]; + + // forward sweep + for(auto i=first+1; i<last; ++i) { + rhs[i] -= u[i] * rhs[parent_index[i]]; + rhs[i] /= d[i]; + } } } } diff --git a/src/backends/multicore/multi_event_stream.hpp b/src/backends/multicore/multi_event_stream.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9b3ce70511af37be5cd39ab0dcc92d52a1af1faf --- /dev/null +++ b/src/backends/multicore/multi_event_stream.hpp @@ -0,0 +1,143 @@ +#pragma once + +// Indexed collection of pop-only event queues --- multicore back-end implementation. + +#include <limits> +#include <ostream> +#include <utility> + +#include <common_types.hpp> +#include <backends/event.hpp> +#include <util/debug.hpp> +#include <util/range.hpp> +#include <util/rangeutil.hpp> +#include <util/strprintf.hpp> + +namespace nest { +namespace mc { +namespace multicore { + +class multi_event_stream { +public: + using size_type = cell_size_type; + using value_type = double; + + multi_event_stream() {} + + explicit multi_event_stream(size_type n_stream): + span_(n_stream), mark_(n_stream) {} + + size_type n_streams() const { return span_.size(); } + + bool empty() const { return remaining_==0; } + + void clear() { + ev_.clear(); + remaining_ = 0; + + util::fill(span_, span_type(0u, 0u)); + util::fill(mark_, 0u); + } + + // Initialize event streams from a vector of `deliverable_event`. + void init(std::vector<deliverable_event> staged) { + if (staged.size()>std::numeric_limits<size_type>::max()) { + throw std::range_error("too many events"); + } + + ev_ = std::move(staged); + util::stable_sort_by(ev_, [](const deliverable_event& e) { return e.handle.cell_index; }); + + util::fill(span_, span_type(0u, 0u)); + util::fill(mark_, 0u); + + size_type si = 0; + for (size_type ev_i = 0; ev_i<ev_.size(); ++ev_i) { + size_type i = ev_[ev_i].handle.cell_index; + EXPECTS(i<n_streams()); + + if (si<i) { + // Moved on to a new cell: make empty spans for any skipped cells. + for (size_type j = si+1; j<i; ++j) { + span_[j].second = span_[si].second; + } + si = i; + } + // Include event in this cell's span. + span_[si].second = ev_i+1; + } + + while (++si<n_streams()) { + span_[si].second = span_[si-1].second; + } + + for (size_type i = 1u; i<n_streams(); ++i) { + mark_[i] = span_[i].first = span_[i-1].second; + } + + remaining_ = ev_.size(); + } + + // Designate for processing events `ev` at head of each event stream `i` + // until `event_time(ev)` > `t_until[i]`. + template <typename TimeSeq> + void mark_until_after(const TimeSeq& t_until) { + EXPECTS(n_streams()==util::size(t_until)); + + // note: operation on each `i` is independent. + for (size_type i = 0; i<n_streams(); ++i) { + auto& span = span_[i]; + auto& mark = mark_[i]; + auto t = t_until[i]; + + mark = span.first; + while (mark!=span.second && !(ev_[mark].time>t)) { + ++mark; + } + } + } + + // Remove marked events from front of each event stream. + void drop_marked_events() { + // note: operation on each `i` is independent. + for (size_type i = 0; i<n_streams(); ++i) { + remaining_ -= (mark_[i]-span_[i].first); + span_[i].first = mark_[i]; + } + } + + // Return range of marked events on stream `i`. + util::range<deliverable_event*> marked_events(size_type i) { + return {&ev_[span_[i].first], &ev_[mark_[i]]}; + } + + // If the head of `i`th event stream exists and has time less than `t_until[i]`, set + // `t_until[i]` to the event time. + template <typename TimeSeq> + void event_time_if_before(TimeSeq& t_until) { + // note: operation on each `i` is independent. + for (size_type i = 0; i<n_streams(); ++i) { + const auto& span = span_[i]; + if (span.second==span.first) { + continue; + } + + auto ev_t = ev_[span.first].time; + if (t_until[i]>ev_t) { + t_until[i] = ev_t; + } + } + } + +private: + using span_type = std::pair<size_type, size_type>; + + std::vector<deliverable_event> ev_; + std::vector<span_type> span_; + std::vector<size_type> mark_; + size_type remaining_ = 0; +}; + +} // namespace multicore +} // namespace nest +} // namespace mc diff --git a/src/backends/multicore/stimulus.hpp b/src/backends/multicore/stimulus.hpp index 4ca4f0eff0dd603878458e19704e71f2cc7ce5a0..659cd7aec3fb9bb55a8c34d6ad08e6b177646b27 100644 --- a/src/backends/multicore/stimulus.hpp +++ b/src/backends/multicore/stimulus.hpp @@ -24,11 +24,14 @@ public: using iarray = typename base::iarray; using view = typename base::view; using iview = typename base::iview; + using const_view = typename base::const_view; using const_iview = typename base::const_iview; using ion_type = typename base::ion_type; - stimulus(view vec_v, view vec_i, iarray&& node_index): - base(vec_v, vec_i, std::move(node_index)) + static constexpr size_type no_mech_id = (size_type)-1; + + stimulus(const_iview vec_ci, const_view vec_t, const_view vec_t_to, const_view vec_dt, view vec_v, view vec_i, iarray&& node_index): + base(no_mech_id, vec_ci, vec_t, vec_t_to, vec_dt, vec_v, vec_i, std::move(node_index)) {} using base::size; @@ -37,9 +40,7 @@ public: return 0; } - void set_params(value_type t_, value_type dt_) override { - t = t_; - dt = dt_; + void set_params() override { } std::string name() const override { @@ -80,10 +81,12 @@ public: if (amplitude.size() != size()) { throw std::domain_error("stimulus called with mismatched parameter size\n"); } + auto vec_t = util::indirect_view(util::indirect_view(vec_t_, vec_ci_), node_index_); auto vec_i = util::indirect_view(vec_i_, node_index_); - int n = size(); - for(int i=0; i<n; ++i) { - if (t>=delay[i] && t<(delay[i]+duration[i])) { + size_type n = size(); + for (size_type i=0; i<n; ++i) { + auto t = vec_t[i]; + if (t>=delay[i] && t<delay[i]+duration[i]) { // use subtraction because the electrod currents are specified // in terms of current into the compartment vec_i[i] -= amplitude[i]; @@ -91,13 +94,12 @@ public: } } - value_type dt = 0; - value_type t = 0; - std::vector<value_type> amplitude; std::vector<value_type> duration; std::vector<value_type> delay; + using base::vec_ci_; + using base::vec_t_; using base::vec_v_; using base::vec_i_; using base::node_index_; diff --git a/src/backends/multicore/threshold_watcher.hpp b/src/backends/multicore/threshold_watcher.hpp index c7cfb3ab5db0abca8662e6685edbaabb25183088..8a09cdca4f72f59ea534261c207ddcbed161f02c 100644 --- a/src/backends/multicore/threshold_watcher.hpp +++ b/src/backends/multicore/threshold_watcher.hpp @@ -1,5 +1,6 @@ #pragma once +#include <math.hpp> #include <memory/memory.hpp> namespace nest { @@ -15,6 +16,7 @@ public: using array = memory::host_vector<value_type>; using const_view = typename array::const_view_type; using iarray = memory::host_vector<size_type>; + using const_iview = typename iarray::const_view_type; /// stores a single crossing event struct threshold_crossing { @@ -30,17 +32,22 @@ public: threshold_watcher() = default; threshold_watcher( + const_iview vec_ci, + const_view vec_t_before, + const_view vec_t_after, const_view vals, const std::vector<size_type>& indxs, - const std::vector<value_type>& thresh, - value_type t=0): + const std::vector<value_type>& thresh): + cv_to_cell_(vec_ci), + t_before_(vec_t_before), + t_after_(vec_t_after), values_(vals), - index_(memory::make_const_view(indxs)), + cv_index_(memory::make_const_view(indxs)), thresholds_(memory::make_const_view(thresh)), v_prev_(vals) { is_crossed_ = iarray(size()); - reset(t); + reset(); } /// Remove all stored crossings that were detected in previous calls @@ -52,37 +59,34 @@ public: /// Reset state machine for each detector. /// Assume that the values in values_ have been set correctly before /// calling, because the values are used to determine the initial state - void reset(value_type t=0) { + void reset() { clear_crossings(); for (auto i=0u; i<size(); ++i) { - is_crossed_[i] = values_[index_[i]]>=thresholds_[i]; + is_crossed_[i] = values_[cv_index_[i]]>=thresholds_[i]; } - t_prev_ = t; } const std::vector<threshold_crossing>& crossings() const { return crossings_; } - /// The time at which the last test was performed - value_type last_test_time() const { - return t_prev_; - } - /// Tests each target for changed threshold state /// Crossing events are recorded for each threshold that /// is crossed since the last call to test - void test(value_type t) { + void test() { for (auto i=0u; i<size(); ++i) { + auto cv = cv_index_[i]; + auto cell = cv_to_cell_[cv]; auto v_prev = v_prev_[i]; - auto v = values_[index_[i]]; + auto v = values_[cv]; auto thresh = thresholds_[i]; + if (!is_crossed_[i]) { if (v>=thresh) { - // the threshold has been passed, so estimate the time using - // linear interpolation + // The threshold has been passed, so estimate the time using + // linear interpolation. auto pos = (thresh - v_prev)/(v - v_prev); - auto crossing_time = t_prev_ + pos*(t - t_prev_); + auto crossing_time = math::lerp(t_before_[cell], t_after_[cell], pos); crossings_.push_back({i, crossing_time}); is_crossed_[i] = true; @@ -96,7 +100,6 @@ public: v_prev_[i] = v; } - t_prev_ = t; } bool is_crossed(size_type i) const { @@ -105,7 +108,7 @@ public: /// the number of threashold values that are being monitored std::size_t size() const { - return index_.size(); + return cv_index_.size(); } /// Data type used to store the crossings. @@ -113,11 +116,13 @@ public: using crossing_list = std::vector<threshold_crossing>; private: + const_iview cv_to_cell_; + const_view t_before_; + const_view t_after_; const_view values_; - iarray index_; + iarray cv_index_; array thresholds_; - value_type t_prev_; array v_prev_; crossing_list crossings_; iarray is_crossed_; diff --git a/src/event_queue.hpp b/src/event_queue.hpp index c796da31d7f17b9c3144bd8024dadacbc86d81a8..ea97e48fa0537b44580b547d513fcdc3c54d2d48 100644 --- a/src/event_queue.hpp +++ b/src/event_queue.hpp @@ -1,6 +1,7 @@ #pragma once #include <cstdint> +#include <limits> #include <ostream> #include <queue> #include <type_traits> @@ -9,6 +10,8 @@ #include "common_types.hpp" #include "util/meta.hpp" #include "util/optional.hpp" +#include "util/range.hpp" +#include "util/strprintf.hpp" namespace nest { namespace mc { diff --git a/src/fvm_multicell.hpp b/src/fvm_multicell.hpp index b952d80765dc8f4c6dc6474c77a2557444f1509b..2a7bee9efef4b1cfc89c87e35073881c6d321ef0 100644 --- a/src/fvm_multicell.hpp +++ b/src/fvm_multicell.hpp @@ -58,9 +58,11 @@ public: /// the container used for indexes using iarray = typename backend::iarray; - using target_handle = std::pair<size_type, size_type>; using probe_handle = std::pair<const array fvm_multicell::*, size_type>; + using target_handle = typename backend::target_handle; + using deliverable_event = typename backend::deliverable_event; + fvm_multicell() = default; void resting_potential(value_type potential_mV) { @@ -75,63 +77,74 @@ public: void reset(); + // fvm_multicell::deliver_event is used only for testing void deliver_event(target_handle h, value_type weight) { - mechanisms_[h.first]->net_receive(h.second, weight); + mechanisms_[h.mech_id]->net_receive(h.index, weight); } value_type probe(probe_handle h) const { return (this->*h.first)[h.second]; } - /// Initialize state prior to a sequence of integration steps. + // Initialize state prior to a sequence of integration steps. void setup_integration(value_type tfinal, value_type dt_max) { - EXPECTS(tfinal>t_); EXPECTS(dt_max>0); tfinal_ = tfinal; dt_max_ = dt_max; - integration_running_ = true; - - // TODO: Placeholder; construct backend event delivery - // data structure from events added. - EXPECTS(events_.empty()); - for (auto& ev: staged_events_) { - EXPECTS(ev.time<tfinal); - events_.push(std::move(ev)); - } + compute_min_remaining(); + + EXPECTS(!has_pending_events()); + + events_->init(std::move(staged_events_)); staged_events_.clear(); } - /// Advance one integration step, up to `dt_max_` in each cell. + // Advance one integration step. void step_integration(); - /// Query integration completion state. + // Query integration completion state. bool integration_complete() const { return !integration_running_; } - /// Query per-cell time state. (Placeholders) - value_type time(size_type cell_index) const { - return t_; + // Query per-cell time state. + // Placeholder: external time queries will no longer be required when + // complete integration loop is in lowered cell. + value_type time(size_type cell_idx) const { + refresh_time_cache(); + return cached_time_[cell_idx]; + } + + value_type min_time() const { + return backend::minmax_value(time_).first; } value_type max_time() const { - return t_; + return backend::minmax_value(time_).second; } bool state_synchronized() const { - return true; + auto mm = backend::minmax_value(time_); + return mm.first==mm.second; + } + + /// Set times for all cells (public for testing purposes only). + void set_time_global(value_type t) { + memory::fill(time_, t); + invalidate_time_cache(); + } + + void set_time_to_global(value_type t) { + memory::fill(time_to_, t); + invalidate_time_cache(); } /// Add an event for processing in next integration stage. void add_event(value_type ev_time, target_handle h, value_type weight) { EXPECTS(!integration_running_); - - // TODO: Placeholder; either add to backend event list structure - // incrementally here, or store by cell gid offset for construction - // all at once in `start_integration()`. - staged_events_.push_back({ev_time, h, weight}); + staged_events_.push_back(deliverable_event(ev_time, h, weight)); } /// Following types and methods are public only for testing: @@ -238,10 +251,10 @@ public: } private: - threshold_watcher threshold_watcher_; + /// number of distinct cells (integration domains) + size_type ncell_; - /// current time [ms] - value_type t_ = 0; + threshold_watcher threshold_watcher_; /// resting potential (initial voltage condition) value_type resting_potential_ = -65; @@ -256,19 +269,30 @@ private: /// once integration to `tfinal_` is complete. bool integration_running_ = false; - struct deliverable_event { - double time; - target_handle handle; - double weight; - }; + /// minimum number of integration steps left in integration period. + unsigned min_remaining_steps_ = 0; + + void compute_min_remaining() { + auto tmin = min_time(); + min_remaining_steps_ = tmin>=tfinal_? 0: 1 + (unsigned)((tfinal_-tmin)/dt_max_); + integration_running_ = min_remaining_steps_>0; + } + + void decrement_min_remaining() { + if (min_remaining_steps_>0) --min_remaining_steps_; + integration_running_ = min_remaining_steps_>0; + } /// events staged for upcoming integration stage std::vector<deliverable_event> staged_events_; /// event queue for integration period - /// TODO: Placeholder; this will change when event lists are moved to - /// a backend structure. - event_queue<deliverable_event> events_; + using multi_event_stream = typename backend::multi_event_stream; + std::unique_ptr<multi_event_stream> events_; + + bool has_pending_events() const { + return events_ && !events_->empty(); + } /// the linear system for implicit time stepping of cell state matrix_type matrix_; @@ -276,6 +300,37 @@ private: /// cv_areas_[i] is the surface area of CV i [µm^2] array cv_areas_; + /// the map from compartment index to cell index + iarray cv_to_cell_; + + /// the per-cell simulation time + array time_; + + /// the per-cell integration period end point + array time_to_; + + // the per-compartment dt + // (set to dt_cell_[j] for each compartment in cell j). + array dt_comp_; + + // the per-cell dt + // (set to time_to_[j]-time_[j] for each cell j). + array dt_cell_; + + // Maintain cached copy of time vector for querying by + // cell_group. This will no longer be necessary when full + // integration loop is in lowered cell. + mutable std::vector<value_type> cached_time_; + mutable bool cached_time_valid_ = false; + + void invalidate_time_cache() { cached_time_valid_ = false; } + void refresh_time_cache() const { + if (!cached_time_valid_) { + memory::copy(time_, memory::make_view(cached_time_)); + } + cached_time_valid_ = true; + } + /// the transmembrane current over the surface of each CV [nA] /// I = area*i_m - I_e array current_; @@ -488,7 +543,7 @@ void fvm_multicell<Backend>::initialize( auto targets_size = size(target_handles); auto probes_size = size(probe_handles); - auto ncell = size(cells); + ncell_ = size(cells); auto cell_num_compartments = transform_view(cells, [](const cell& c) { return c.num_compartments(); }); @@ -499,9 +554,23 @@ void fvm_multicell<Backend>::initialize( // initialize storage from total compartment count current_ = array(ncomp, 0); voltage_ = array(ncomp, resting_potential_); + cv_to_cell_ = iarray(ncomp, 0); + time_ = array(ncell_, 0); + time_to_ = array(ncell_, 0); + cached_time_.resize(ncell_); + cached_time_valid_ = false; + dt_cell_ = array(ncell_, 0); + dt_comp_ = array(ncomp, 0); + + // initialize cv_to_cell_ values from compartment partition + std::vector<size_type> cv_to_cell_tmp(ncomp); + for (size_type i = 0; i<ncell_; ++i) { + util::fill(util::subrange_view(cv_to_cell_tmp, cell_comp_part[i]), i); + } + memory::copy(cv_to_cell_tmp, cv_to_cell_); - // look up table: (density) mechanism name -> list of CV range objects - std::map<std::string, std::vector<segment_cv_range>> mech_map; + // look up table: mechanism name -> list of cv_range objects + std::map<std::string, std::vector<segment_cv_range>> mech_to_cv_range; // look up table: point mechanism (synapse) name -> CV indices and target numbers. struct syn_cv_and_target { @@ -513,6 +582,9 @@ void fvm_multicell<Backend>::initialize( // initialize vector used for matrix creation. std::vector<size_type> group_parent_index(ncomp); + // setup per-cell event stores. + events_ = util::make_unique<multi_event_stream>(ncell_); + // create each cell: auto probe_hi = probe_handles.begin(); @@ -524,7 +596,7 @@ void fvm_multicell<Backend>::initialize( /// cv_capacitance_[i] is the capacitance of CV membrane std::vector<value_type> cv_capacitance(ncomp); // [µm^2*F*m^-2 = pF] /// membrane area of each cv - std::vector<value_type> tmp_cv_areas(ncomp); // [µm^2] + std::vector<value_type> tmp_cv_areas(ncomp); // [µm^2] // used to build the information required to construct spike detectors std::vector<size_type> spike_detector_index; @@ -533,13 +605,13 @@ void fvm_multicell<Backend>::initialize( // Iterate over the input cells and build the indexes etc that descrbe the // fused cell group. On completion: // - group_paranet_index contains the full parent index for the fused cells. - // - mech_map and syn_mech_map provide a map from mechanism names to an + // - mech_to_cv_range and syn_mech_map provide a map from mechanism names to an // iterable container of compartment ranges, which are used later to // generate the node index for each mechanism kind. // - the tmp_* vectors contain compartment-specific information for each // compartment in the fused cell group (areas, capacitance, etc). // - each probe, stimulus and detector is attached to its compartment. - for (auto i: make_span(0, ncell)) { + for (auto i: make_span(0, ncell_)) { const auto& c = cells[i]; auto comp_ival = cell_comp_part[i]; @@ -567,7 +639,7 @@ void fvm_multicell<Backend>::initialize( for (const auto& mech: seg->mechanisms()) { if (mech.name()!="membrane") { - mech_map[mech.name()].push_back(cv_range); + mech_to_cv_range[mech.name()].push_back(cv_range); } } } @@ -610,6 +682,7 @@ void fvm_multicell<Backend>::initialize( // optimizations that rely on this assumption. if (stim_index.size()) { auto stim = new stimulus( + cv_to_cell_, time_, time_to_, dt_comp_, voltage_, current_, memory::make_const_view(stim_index)); stim->set_parameters(stim_amplitudes, stim_durations, stim_delays); mechanisms_.push_back(mechanism(stim)); @@ -643,7 +716,7 @@ void fvm_multicell<Backend>::initialize( // set a back-end supplied watcher on the voltage vector threshold_watcher_ = - threshold_watcher(voltage_, spike_detector_index, thresholds, 0); + threshold_watcher(cv_to_cell_, time_, time_to_, voltage_, spike_detector_index, thresholds); // confirm user-supplied container probes were appropriately sized. EXPECTS(probes_size==probes_count); @@ -659,15 +732,17 @@ void fvm_multicell<Backend>::initialize( // compartments with that mechanism, then build the mechanism instance. std::vector<size_type> mech_cv_index(ncomp); std::vector<value_type> mech_cv_weight(ncomp); - std::map<std::string, std::vector<size_type>> mech_index_map; - for (auto const& mech: mech_map) { + std::map<std::string, std::vector<size_type>> mech_to_cv_index; + for (auto const& entry: mech_to_cv_range) { + const auto& mech_name = entry.first; + const auto& seg_cv_ranges = entry.second; + // Clear the pre-allocated storage for mechanism indexes and weights. // Reuse the same vectors each time to have only one malloc and free // outside of the loop for each mech_cv_index.clear(); mech_cv_weight.clear(); - const auto& seg_cv_ranges = mech.second; for (auto& rng: seg_cv_ranges) { if (rng.has_parent()) { // locate the parent cv in the partially constructed list of cv indexes @@ -697,39 +772,39 @@ void fvm_multicell<Backend>::initialize( w *= 1e-2; } + size_type mech_id = mechanisms_.size(); mechanisms_.push_back( - backend::make_mechanism(mech.first, voltage_, current_, mech_cv_weight, mech_cv_index)); + backend::make_mechanism(mech_name, mech_id, cv_to_cell_, time_, time_to_, dt_comp_, voltage_, current_, mech_cv_weight, mech_cv_index)); // Save the indices for ion set up below. - mech_index_map[mech.first] = mech_cv_index; + mech_to_cv_index[mech_name] = mech_cv_index; } // Create point (synapse) mechanisms. for (auto& syni: syn_mech_map) { - size_type mech_index = mechanisms_.size(); + size_type mech_id = mechanisms_.size(); const auto& mech_name = syni.first; auto& cv_assoc = syni.second; - // Sort CV indices but keep track of their corresponding targets. auto cv_index = [](syn_cv_and_target x) { return x.cv; }; - util::sort_by(cv_assoc, cv_index); + util::stable_sort_by(cv_assoc, cv_index); std::vector<cell_lid_type> cv_indices = assign_from(transform_view(cv_assoc, cv_index)); // Create the mechanism. // An empty weight vector is supplied, because there are no weights applied to point // processes, because their currents are calculated with the target units of [nA] mechanisms_.push_back( - backend::make_mechanism(mech_name, voltage_, current_, {}, cv_indices)); + backend::make_mechanism(mech_name, mech_id, cv_to_cell_, time_, time_to_, dt_comp_, voltage_, current_, {}, cv_indices)); // Save the indices for ion set up below. - mech_index_map[mech_name] = cv_indices; + mech_to_cv_index[mech_name] = cv_indices; // Make the target handles. cell_lid_type instance = 0; for (auto entry: cv_assoc) { - target_handles[entry.target] = {mech_index, instance++}; + target_handles[entry.target] = target_handle(mech_id, instance++, cv_to_cell_tmp[entry.cv]); } } @@ -743,7 +818,7 @@ void fvm_multicell<Backend>::initialize( std::set<size_type> index_set; for (auto const& mech : mechanisms_) { if(mech->uses_ion(ion)) { - auto const& ni = mech_index_map[mech->name()]; + auto const& ni = mech_to_cv_index[mech->name()]; index_set.insert(ni.begin(), ni.end()); } } @@ -757,7 +832,7 @@ void fvm_multicell<Backend>::initialize( // join the ion reference in each mechanism into the cell-wide ion state for (auto& mech : mechanisms_) { if (mech->uses_ion(ion)) { - auto const& ni = mech_index_map[mech->name()]; + auto const& ni = mech_to_cv_index[mech->name()]; mech->set_ion(ion, ions_[ion], util::make_copy<std::vector<size_type>> (algorithms::index_into(ni, indexes))); } @@ -783,69 +858,70 @@ void fvm_multicell<Backend>::initialize( memory::fill(ion_ca().internal_concentration(), 5e-5); // mM memory::fill(ion_ca().external_concentration(), 2.0); // mM - // initialise mechanism and voltage state + // initialize mechanism and voltage state reset(); } template <typename Backend> void fvm_multicell<Backend>::reset() { memory::fill(voltage_, resting_potential_); - t_ = 0.; + + set_time_global(0); + set_time_to_global(0); + for (auto& m : mechanisms_) { // TODO : the parameters have to be set before the nrn_init // for now use a dummy value of dt. - m->set_params(t_, 0.025); + m->set_params(); m->nrn_init(); } // Reset state of the threshold watcher. // NOTE: this has to come after the voltage_ values have been reinitialized, // because these values are used by the watchers to set their initial state. - threshold_watcher_.reset(t_); + threshold_watcher_.reset(); // Reset integration state. tfinal_ = 0; dt_max_ = 0; integration_running_ = false; staged_events_.clear(); - events_.clear(); + events_->clear(); } template <typename Backend> void fvm_multicell<Backend>::step_integration() { - // Integrate cell states from `t_` by `dt_max_` if permissible, - // or otherwise until the next event time or `t_final`. EXPECTS(integration_running_); - while (auto ev = events_.pop_if_not_after(t_)) { - deliver_event(ev->handle, ev->weight); - } - - value_type t_to = std::min(t_+dt_max_, tfinal_); - - if (auto t = events_.time_if_before(t_to)) { - t_to = *t; - } - - value_type dt = t_to-t_; - EXPECTS(dt>0); - PE("current"); memory::fill(current_, 0.); - // update currents from ion channels - for(auto& m : mechanisms_) { + // mark pending events for delivery + events_->mark_until_after(time_); + + // deliver pending events and update current contributions from mechanisms + for(auto& m: mechanisms_) { PE(m->name().c_str()); - m->set_params(t_, dt); + m->deliver_events(*events_); m->nrn_current(); PL(); } + + // remove delivered events from queue and set time_to_ + events_->drop_marked_events(); + + backend::update_time_to(time_to_, time_, dt_max_, tfinal_); + invalidate_time_cache(); + events_->event_time_if_before(time_to_); PL(); + // set per-cell and per-compartment dt (constant within a cell) + backend::set_dt(dt_cell_, dt_comp_, time_to_, time_, cv_to_cell_); + // solve the linear system PE("matrix", "setup"); - matrix_.assemble(dt, voltage_, current_); + matrix_.assemble(dt_cell_, voltage_, current_); PL(); PE("solve"); matrix_.solve(); @@ -855,22 +931,28 @@ void fvm_multicell<Backend>::step_integration() { // integrate state of gating variables etc. PE("state"); - for(auto& m : mechanisms_) { + for(auto& m: mechanisms_) { PE(m->name().c_str()); m->nrn_state(); PL(); } PL(); - t_ = t_to; + memory::copy(time_to_, time_); + invalidate_time_cache(); // update spike detector thresholds - threshold_watcher_.test(t_); + threshold_watcher_.test(); // are we there yet? - integration_running_ = t_<tfinal_; + if (!min_remaining_steps_) { + compute_min_remaining(); + } + else { + decrement_min_remaining(); + } - EXPECTS(integration_running_ || events_.empty()); + EXPECTS(integration_running_ || !has_pending_events()); } } // namespace fvm diff --git a/src/matrix.hpp b/src/matrix.hpp index 8f010763cce427011f191b36c821fe7c0167e9df..0afadd79f897d1ce2bf9a0a8b9ed5377e225d629 100644 --- a/src/matrix.hpp +++ b/src/matrix.hpp @@ -36,10 +36,10 @@ public: matrix() = default; - matrix( const std::vector<size_type>& pi, - const std::vector<size_type>& ci, - const std::vector<value_type>& cv_capacitance, - const std::vector<value_type>& face_conductance): + matrix(const std::vector<size_type>& pi, + const std::vector<size_type>& ci, + const std::vector<value_type>& cv_capacitance, + const std::vector<value_type>& face_conductance): parent_index_(memory::make_const_view(pi)), cell_index_(memory::make_const_view(ci)), state_(pi, ci, cv_capacitance, face_conductance) @@ -69,17 +69,16 @@ public: } /// Assemble the matrix for given dt - void assemble(double dt, const_view voltage, const_view current) { - state_.assemble(dt, voltage, current); + void assemble(const_view dt_cell, const_view voltage, const_view current) { + state_.assemble(dt_cell, voltage, current); } /// Get a view of the solution const_view solution() const { - return state_.solution; + return state_.solution(); } - private: - +private: /// the parent indice that describe matrix structure iarray parent_index_; diff --git a/src/mc_cell_group.hpp b/src/mc_cell_group.hpp index 2ed838e8f04e9921449183b554f57e150c8d5a4f..d1e2dae8a8e9f160d901daa780267bbb96306747 100644 --- a/src/mc_cell_group.hpp +++ b/src/mc_cell_group.hpp @@ -117,36 +117,40 @@ public: lowered_.setup_integration(tfinal, dt); + util::optional<time_type> first_sample_time = sample_events_.time_if_before(tfinal); std::vector<sample_event> requeue_sample_events; while (!lowered_.integration_complete()) { // Take any pending samples. // TODO: Placeholder: this will be replaced by a backend polling implementation. - PE("sampling"); - time_type cell_max_time = lowered_.max_time(); + if (first_sample_time) { + PE("sampling"); + time_type cell_max_time = lowered_.max_time(); - requeue_sample_events.clear(); - while (auto m = sample_events_.pop_if_before(cell_max_time)) { - auto& s = samplers_[m->sampler_index]; - EXPECTS((bool)s.sampler); + requeue_sample_events.clear(); + while (auto m = sample_events_.pop_if_before(cell_max_time)) { + auto& s = samplers_[m->sampler_index]; + EXPECTS((bool)s.sampler); - time_type cell_time = lowered_.time(s.cell_gid-gid_base_); - if (cell_time<m->time) { - // This cell hasn't reached this sample time yet. - requeue_sample_events.push_back(*m); - } - else { - auto next = s.sampler(cell_time, lowered_.probe(s.handle)); - if (next) { - m->time = std::max(*next, cell_time); + time_type cell_time = lowered_.time(s.cell_gid-gid_base_); + if (cell_time<m->time) { + // This cell hasn't reached this sample time yet. requeue_sample_events.push_back(*m); } + else { + auto next = s.sampler(cell_time, lowered_.probe(s.handle)); + if (next) { + m->time = std::max(*next, cell_time); + requeue_sample_events.push_back(*m); + } + } } + for (auto& ev: requeue_sample_events) { + sample_events_.push(std::move(ev)); + } + first_sample_time = sample_events_.time_if_before(tfinal); + PL(); } - for (auto& ev: requeue_sample_events) { - sample_events_.push(std::move(ev)); - } - PL(); // Ask lowered_ cell to integrate 'one step', delivering any // events accordingly. diff --git a/src/mechanism.hpp b/src/mechanism.hpp index 8cde22d2f1103006de589904c23ff1bc9f2c0b65..b0bc86e6b7f81d973f452dffa5f33a6781466746 100644 --- a/src/mechanism.hpp +++ b/src/mechanism.hpp @@ -3,11 +3,11 @@ #include <algorithm> #include <memory> #include <string> -#include <util/meta.hpp> #include <ion.hpp> #include <parameter_list.hpp> #include <util/indirect.hpp> +#include <util/meta.hpp> #include <util/make_unique.hpp> namespace nest { @@ -39,7 +39,14 @@ public: using ion_type = ion<backend>; - mechanism(view vec_v, view vec_i, iarray&& node_index): + using multi_event_stream = typename backend::multi_event_stream; + + mechanism(size_type mech_id, const_iview vec_ci, const_view vec_t, const_view vec_t_to, const_view vec_dt, view vec_v, view vec_i, iarray&& node_index): + mech_id_(mech_id), + vec_ci_(vec_ci), + vec_t_(vec_t), + vec_t_to_(vec_t_to), + vec_dt_(vec_dt), vec_v_(vec_v), vec_i_(vec_i), node_index_(std::move(node_index)) @@ -53,22 +60,39 @@ public: return node_index_; } - virtual void set_params(value_type t_, value_type dt_) = 0; + virtual void set_params() = 0; virtual std::string name() const = 0; virtual std::size_t memory() const = 0; virtual void nrn_init() = 0; virtual void nrn_state() = 0; virtual void nrn_current() = 0; - virtual void net_receive(int, value_type) {}; + virtual void deliver_events(multi_event_stream& events) {}; virtual bool uses_ion(ionKind) const = 0; virtual void set_ion(ionKind k, ion_type& i, const std::vector<size_type>& index) = 0; - virtual mechanismKind kind() const = 0; + // net_receive() is used internally by deliver_events(), but + // is exposed primarily for unit testing. + virtual void net_receive(int, value_type) {}; + virtual ~mechanism() = default; + // Mechanism identifier: index into list of mechanisms on cell group. + size_type mech_id_; + + // Maps compartment index to cell index. + const_iview vec_ci_; + // Maps cell index to integration start time. + const_view vec_t_; + // Maps cell index to integration stop time. + const_view vec_t_to_; + // Maps compartment index to (stop time) - (start time). + const_view vec_dt_; + // Maps compartment index to voltage. view vec_v_; + // Maps compartment index to current. view vec_i_; + // Maps mechanism instance index to compartment index. iarray node_index_; }; @@ -77,14 +101,17 @@ using mechanism_ptr = std::unique_ptr<mechanism<Backend>>; template <typename M> auto make_mechanism( - typename M::view vec_v, - typename M::view vec_i, - typename M::array&& weights, - typename M::iarray&& node_indices) --> decltype(util::make_unique<M>(vec_v, vec_i, std::move(weights), std::move(node_indices))) -{ - return util::make_unique<M>(vec_v, vec_i, std::move(weights), std::move(node_indices)); -} + typename M::size_type mech_id, + typename M::const_iview vec_ci, + typename M::const_view vec_t, + typename M::const_view vec_t_to, + typename M::const_view vec_dt, + typename M::view vec_v, + typename M::view vec_i, + typename M::array&& weights, + typename M::iarray&& node_indices +) +DEDUCED_RETURN_TYPE(util::make_unique<M>(mech_id, vec_ci, vec_t, vec_t_to, vec_dt, vec_v, vec_i, std::move(weights), std::move(node_indices))) } // namespace mechanisms } // namespace mc diff --git a/src/util/compat.hpp b/src/util/compat.hpp index 6c12be4e3e06e2ed92042bb410652f40fb4d644d..4f1c2d34aa24a3b3d277f5a6115123fb7ca9ada7 100644 --- a/src/util/compat.hpp +++ b/src/util/compat.hpp @@ -44,5 +44,4 @@ inline void compiler_barrier_if_icc_leq(unsigned ver) { template <typename X> inline constexpr bool isinf(X x) { return std::isinf(x); } - } // namespace compat diff --git a/src/util/debug.hpp b/src/util/debug.hpp index 3c3eaa853d70f4f93e0ab71eb3e81efd8424d9c9..08c6c0dc9e4ad366af0034316772cb3de2787cc2 100644 --- a/src/util/debug.hpp +++ b/src/util/debug.hpp @@ -3,6 +3,7 @@ #include <iostream> #include <sstream> #include <mutex> +#include <utility> #include <threading/threading.hpp> #include "unwind.hpp" @@ -50,6 +51,7 @@ template <typename... Args> void debug_emit_trace(const char* file, int line, const char* varlist, const Args&... args) { if (nest::mc::threading::multithreaded()) { std::stringstream buffer; + buffer.precision(17); debug_emit_trace_leader(buffer, file, line, varlist); debug_emit(buffer, args...); @@ -65,6 +67,34 @@ void debug_emit_trace(const char* file, int line, const char* varlist, const Arg } } +namespace impl { + template <typename Seq, typename Separator> + struct sepval { + const Seq& seq; + Separator sep; + + sepval(const Seq& seq, Separator sep): seq(seq), sep(std::move(sep)) {} + + friend std::ostream& operator<<(std::ostream& out, const sepval& sv) { + bool emitsep = false; + for (const auto& v: sv.seq) { + if (emitsep) out << sv.sep; + emitsep = true; + out << v; + } + return out; + } + }; +} + +// Wrap a sequence or container of values so that they can be printed +// to an `std::ostream` with the elements separated by the supplied +// separator. +template <typename Seq, typename Separator> +impl::sepval<Seq, Separator> sepval(const Seq& seq, Separator sep) { + return impl::sepval<Seq, Separator>(seq, std::move(sep)); +} + } // namespace util } // namespace mc } // namespace nest diff --git a/src/util/meta.hpp b/src/util/meta.hpp index b1ce5f7540407844e60832b03714f93fd86b0b1b..35b1b3e3363c14b6fcb9f0162c0845b748667ea3 100644 --- a/src/util/meta.hpp +++ b/src/util/meta.hpp @@ -43,9 +43,38 @@ constexpr auto cend(const T& c) -> decltype(compat::end(c)) { return compat::end(c); } -template <typename T> -constexpr bool empty(const T& c) { - return c.empty(); +// Use sequence `empty() const` method if exists, otherwise +// compare begin and end. + +namespace impl { + template <typename C> + struct has_const_empty_method { + template <typename T> + static decltype(std::declval<const T>().empty(), std::true_type{}) test(int); + template <typename T> + static std::false_type test(...); + + using type = decltype(test<C>(0)); + }; + + // For correct ADL on begin and end: + using std::begin; + using std::end; + + template <typename Seq> + constexpr bool empty(const Seq& seq, std::false_type) { + return begin(seq)==end(seq); + } + + template <typename Seq> + constexpr bool empty(const Seq& seq, std::true_type) { + return seq.empty(); + } +} + +template <typename Seq> +constexpr bool empty(const Seq& seq) { + return impl::empty(seq, typename impl::has_const_empty_method<Seq>::type{}); } template <typename T, std::size_t N> diff --git a/src/util/path.hpp b/src/util/path.hpp index e3114518cf597611f101597b7f4146b5b12c98ed..fa206a27c3b84ca48d67355ee8355f418ed7323c 100644 --- a/src/util/path.hpp +++ b/src/util/path.hpp @@ -26,6 +26,10 @@ #include <util/meta.hpp> #include <util/rangeutil.hpp> +#ifndef _POSIX_C_SOURCE +#error "no non-POSIX implementations for filesystem classes or methods" +#endif + namespace nest { namespace mc { namespace util { diff --git a/src/util/rangeutil.hpp b/src/util/rangeutil.hpp index 7dee14b911f9e5bb81d8778d12ec141a0e258ffe..8d03be492d52ade3b90c5007e1bd83919cc2636c 100644 --- a/src/util/rangeutil.hpp +++ b/src/util/rangeutil.hpp @@ -41,26 +41,43 @@ range_view(Seq& seq) { template < typename Seq, - typename Iter = typename sequence_traits<Seq>::iterator, - typename Size = typename sequence_traits<Seq>::size_type + typename Offset1, + typename Offset2, + typename Iter = typename sequence_traits<Seq>::iterator > enable_if_t<is_forward_iterator<Iter>::value, range<Iter>> -subrange_view(Seq& seq, Size bi, Size ei) { - Iter b = std::next(std::begin(seq), bi); - Iter e = std::next(b, ei-bi); +subrange_view(Seq& seq, Offset1 bi, Offset2 ei) { + Iter b = std::begin(seq); + std::advance(b, bi); + + Iter e = b; + std::advance(e, ei-bi); return make_range(b, e); } template < typename Seq, - typename Iter = typename sequence_traits<Seq>::iterator, - typename Size = typename sequence_traits<Seq>::size_type + typename Offset1, + typename Offset2, + typename Iter = typename sequence_traits<Seq>::iterator > enable_if_t<is_forward_iterator<Iter>::value, range<Iter>> -subrange_view(Seq& seq, std::pair<Size, Size> index) { - Iter b = std::next(std::begin(seq), index.first); - Iter e = std::next(b, index.second-index.first); - return make_range(b, e); +subrange_view(Seq& seq, std::pair<Offset1, Offset2> index) { + return subrange_view(seq, index.first, index.second); +} + +// Fill container or range. + +template <typename Seq, typename V> +void fill(Seq& seq, const V& value) { + auto canon = canonical_view(seq); + std::fill(canon.begin(), canon.end(), value); +} + +template <typename Range, typename V> +void fill(const Range& seq, const V& value) { + auto canon = canonical_view(seq); + std::fill(canon.begin(), canon.end(), value); } // Append sequence to a container @@ -238,7 +255,7 @@ max_element_by(const Seq& seq, const Proj& proj) { }); } -// Maximum value +// Maximum value. // // Value semantics instead of iterator semantics means it will operate // with input iterators. Will return default-constructed value if sequence @@ -259,7 +276,7 @@ Value max_value(const Seq& seq, Compare cmp = Compare{}) { auto i = std::begin(seq); auto e = std::end(seq); - auto m = *i; + Value m = *i; while (++i!=e) { Value x = *i; if (cmp(m, x)) { @@ -269,6 +286,34 @@ Value max_value(const Seq& seq, Compare cmp = Compare{}) { return m; } +// Minimum and maximum value. + +template < + typename Seq, + typename Value = typename sequence_traits<Seq>::value_type, + typename Compare = std::less<Value> +> +std::pair<Value, Value> minmax_value(const Seq& seq, Compare cmp = Compare{}) { + if (util::empty(seq)) { + return {Value{}, Value{}}; + } + + auto i = std::begin(seq); + auto e = std::end(seq); + Value lower = *i; + Value upper = *i; + while (++i!=e) { + Value x = *i; + if (cmp(upper, x)) { + upper = std::move(x); + } + else if (cmp(x, lower)) { + lower = std::move(x); + } + } + return {lower, upper}; +} + template <typename C, typename Seq> C make_copy(Seq const& seq) { return C{std::begin(seq), std::end(seq)}; diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 06899d2d9498959dbf44116ec2d75700c86d39bb..789969bd47d4dbb968d5c0a771fcfd7587f23b6a 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -21,6 +21,7 @@ set(TEST_CUDA_SOURCES test_mc_cell_group.cu test_gpu_stack.cu test_matrix.cu + test_multi_event_stream.cu test_spikes.cu test_vector.cu @@ -50,6 +51,7 @@ set(TEST_SOURCES test_math.cpp test_matrix.cpp test_mechanisms.cpp + test_multi_event_stream.cpp test_nop.cpp test_optional.cpp test_parameters.cpp @@ -89,6 +91,8 @@ endif() target_include_directories(test.exe PRIVATE "${mech_proto_dir}/..") if(NMC_WITH_CUDA) + # Omit -DDATADIR for cuda target because of CMake 3.7/3.8 FindCUDA quoting bug. + set(TARGETS ${TARGETS} test_cuda.exe) cuda_add_executable(test_cuda.exe ${TEST_CUDA_SOURCES} ${HEADERS}) endif() @@ -108,3 +112,9 @@ foreach(target ${TARGETS}) RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests" ) endforeach() + +# Temporary hack (pending PR #266) +if(NMC_WITH_CUDA) + target_link_libraries(test_cuda.exe LINK_PUBLIC nestmc) +endif() + diff --git a/tests/unit/test_backend.cpp b/tests/unit/test_backend.cpp index 08679d0d0b6e750e1611cbf0a9cbfc87ca68975f..109d75962f6772a90503ea8589ff617d926c11e0 100644 --- a/tests/unit/test_backend.cpp +++ b/tests/unit/test_backend.cpp @@ -17,7 +17,7 @@ TEST(backends, gpu_is_null) { EXPECT_FALSE(backend::has_mechanism("hh")); EXPECT_THROW( - backend::make_mechanism("hh", backend::view(), backend::view(), {}, {}), + backend::make_mechanism("hh", 0, backend::const_iview(), backend::const_view(), backend::const_view(), backend::const_view(), backend::view(), backend::view(), {}, {}), std::runtime_error); } } diff --git a/tests/unit/test_fvm_multi.cpp b/tests/unit/test_fvm_multi.cpp index 7f491f9fb7d7b748e3743b081a5351bf4dbe364b..6a9863191e2f71d4bc1733caabb15fc4311cda55 100644 --- a/tests/unit/test_fvm_multi.cpp +++ b/tests/unit/test_fvm_multi.cpp @@ -209,14 +209,17 @@ TEST(fvm_multi, stimulus) // test 1: Test that no current is injected at t=0 memory::fill(I, 0.); - stims->set_params(0, 0.1); + fvcell.set_time_global(0.); + fvcell.set_time_to_global(0.1); + stims->set_params(); stims->nrn_current(); for (auto i: I) { EXPECT_EQ(i, 0.); } // test 2: Test that current is injected at soma at t=1 - stims->set_params(1, 0.1); + fvcell.set_time_global(1.); + fvcell.set_time_to_global(1.1); stims->nrn_current(); EXPECT_EQ(I[soma_idx], -0.1); @@ -224,13 +227,16 @@ TEST(fvm_multi, stimulus) // Note that we test for injection of -0.2, because the // current contributions are accumulative, and the current // values have not been cleared since the last update. - stims->set_params(1.5, 0.1); + fvcell.set_time_global(1.5); + fvcell.set_time_to_global(1.6); + stims->set_params(); stims->nrn_current(); EXPECT_EQ(I[soma_idx], -0.2); // test 4: test at t=10ms, when the the soma stim is not active, and // dendrite stimulus is injecting a current of 0.3 nA - stims->set_params(10, 0.1); + fvcell.set_time_global(10.); + fvcell.set_time_to_global(10.1); stims->nrn_current(); EXPECT_EQ(I[soma_idx], -0.2); EXPECT_EQ(I[dend_idx], -0.3); @@ -385,10 +391,11 @@ void run_target_handle_test(std::vector<handle_info> all_handles) { for (unsigned ci = 0; ci<=1; ++ci) { for (auto h: handles[ci]) { // targets are represented by a pair of mechanism index and instance index - const auto& mech = fvcell.mechanisms()[targets[i].first]; + const auto& mech = fvcell.mechanisms()[targets[i].mech_id]; const auto& cvidx = mech->node_index(); EXPECT_EQ(h.mech, mech->name()); - EXPECT_EQ(h.cv, cvidx[targets[i].second]); + EXPECT_EQ(h.cv, cvidx[targets[i].index]); + EXPECT_EQ(h.cell, targets[i].cell_index); ++i; } } diff --git a/tests/unit/test_matrix.cpp b/tests/unit/test_matrix.cpp index f74a6760047119678ef8192d1bca26c1ab15350c..7cc1e959acbc17dba8620d467122958219c4d457 100644 --- a/tests/unit/test_matrix.cpp +++ b/tests/unit/test_matrix.cpp @@ -8,6 +8,8 @@ #include <backends/multicore/fvm.hpp> #include <util/span.hpp> +#include "common.hpp" + using namespace nest::mc; using matrix_type = matrix<nest::mc::multicore::backend>; @@ -77,3 +79,96 @@ TEST(matrix, solve_host) } } } + +TEST(matrix, zero_diagonal) +{ + // Combined matrix may have zero-blocks, corresponding to a zero dt. + // Zero-blocks are indicated by zero value in the diagonal (the off-diagonal + // elements should be ignored). + // These submatrices should leave the rhs as-is when solved. + + // Three matrices, sizes 3, 3 and 2, with no branching. + std::vector<size_type> p = {0, 0, 1, 3, 3, 5, 5}; + std::vector<size_type> c = {0, 3, 5, 7}; + matrix_type m(p, c, vvec(7), vvec(7)); + + EXPECT_EQ(7u, m.size()); + EXPECT_EQ(3u, m.num_cells()); + + auto& A = m.state_; + A.d = vvec({2, 3, 2, 0, 0, 4, 5}); + A.u = vvec({0, -1, -1, 0, -1, 0, -2}); + A.rhs = vvec({3, 5, 7, 7, 8, 16, 32}); + + // Expected solution: + std::vector<value_type> expected = {4, 5, 6, 7, 8, 9, 10}; + + m.solve(); + auto x = m.solution(); + + EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x)); +} + +TEST(matrix, zero_diagonal_assembled) +{ + // Use assemble method to construct same zero-diagonal + // test case from CV data. + + using util::assign; + using memory::make_view; + + // Combined matrix may have zero-blocks, corresponding to a zero dt. + // Zero-blocks are indicated by zero value in the diagonal (the off-diagonal + // elements should be ignored). + // These submatrices should leave the rhs as-is when solved. + + // Three matrices, sizes 3, 3 and 2, with no branching. + std::vector<size_type> p = {0, 0, 1, 3, 3, 5, 5}; + std::vector<size_type> c = {0, 3, 5, 7}; + + // Face conductances. + vvec g = {0, 1, 1, 0, 1, 0, 2}; + + // dt of 1e-3. + vvec dt(3, 1.0e-3); + + // Capacitances. + vvec Cm = {1, 1, 1, 1, 1, 2, 3}; + + // Intial voltage of zero; currents alone determine rhs. + vvec v(7, 0.0); + vvec i = {-3, -5, -7, -6, -9, -16, -32}; + + // Expected matrix and rhs: + // u = [ 0 -1 -1 0 -1 0 -2] + // d = [ 2 3 2 2 2 4 5] + // b = [ 3 5 7 2 4 16 32] + // + // Expected solution: + // x = [ 4 5 6 7 8 9 10] + + matrix_type m(p, c, Cm, g); + m.assemble(make_view(dt), make_view(v), make_view(i)); + m.solve(); + + vvec x; + assign(x, on_host(m.solution())); + std::vector<value_type> expected = {4, 5, 6, 7, 8, 9, 10}; + + EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x)); + + // Set dt of 2nd (middle) submatrix to zero. Solution + // should then return voltage values for that submatrix. + + dt[1] = 0; + v[3] = 20; + v[4] = 30; + m.assemble(make_view(dt), make_view(v), make_view(i)); + m.solve(); + + assign(x, m.solution()); + expected = {4, 5, 6, 20, 30, 9, 10}; + + EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x)); +} + diff --git a/tests/unit/test_matrix.cu b/tests/unit/test_matrix.cu index dac64cdf78e45b1bf0f53aaaf435e596f776c788..3e7a730d82f1cc1e8683c2a42f860d482cbc9f27 100644 --- a/tests/unit/test_matrix.cu +++ b/tests/unit/test_matrix.cu @@ -12,6 +12,8 @@ #include <memory/memory.hpp> #include <util/span.hpp> +#include <cuda.h> + using namespace nest::mc; using gpu::impl::npos; @@ -265,7 +267,6 @@ TEST(matrix, assemble) // Build the capacitance and conductance vectors and // populate with nonzero random values. - auto gen = std::mt19937(); auto dist = std::uniform_real_distribution<T>(1, 2); @@ -275,20 +276,26 @@ TEST(matrix, assemble) std::vector<T> g(group_size); std::generate(g.begin(), g.end(), [&](){return dist(gen);}); - // Make the referenace matrix and the gpu matrix + // Make the reference matrix and the gpu matrix auto m_mc = mc_state( p, cell_index, Cm, g); // on host auto m_gpu = gpu_state(p, cell_index, Cm, g); // on gpu + // Set the integration times for the cells to be between 0.1 and 0.2 ms. + std::vector<T> dt(num_mtx); + + auto dt_dist = std::uniform_real_distribution<T>(0.1, 0.2); + std::generate(dt.begin(), dt.end(), [&](){return dt_dist(gen);}); + // Voltage and current values - m_mc.assemble( 0.2, host_array(group_size, -64), host_array(group_size, 10)); + m_mc.assemble(host_array(dt), host_array(group_size, -64), host_array(group_size, 10)); m_mc.solve(); - m_gpu.assemble(0.2, gpu_array(group_size, -64), gpu_array(group_size, 10)); + m_gpu.assemble(on_gpu(dt), gpu_array(group_size, -64), gpu_array(group_size, 10)); m_gpu.solve(); // Compare the GPU and CPU results. // Cast result to float, because we are happy to ignore small differencs - std::vector<float> result_h = util::assign_from(m_mc.solution); - std::vector<float> result_g = util::assign_from(on_host(m_gpu.solution)); + std::vector<float> result_h = util::assign_from(m_mc.solution()); + std::vector<float> result_g = util::assign_from(on_host(m_gpu.solution())); EXPECT_TRUE(seq_almost_eq<float>(result_h, result_g)); } @@ -339,22 +346,21 @@ TEST(matrix, backends) const int num_mtx = 200; std::vector<I> p; - std::vector<I> cell_index; + std::vector<I> cell_cv_divs; for (auto m=0; m<num_mtx; ++m) { auto &p_ref = p_base[m%2]; auto first = p.size(); for (auto i: p_ref) { p.push_back(i + first); } - cell_index.push_back(first); + cell_cv_divs.push_back(first); } - cell_index.push_back(p.size()); + cell_cv_divs.push_back(p.size()); - auto group_size = cell_index.back(); + auto group_size = cell_cv_divs.back(); // Build the capacitance and conductance vectors and // populate with nonzero random values - auto gen = std::mt19937(); gen.seed(100); auto dist = std::uniform_real_distribution<T>(1, 200); @@ -369,13 +375,23 @@ TEST(matrix, backends) std::generate(v.begin(), v.end(), [&](){return dist(gen);}); std::generate(i.begin(), i.end(), [&](){return dist(gen);}); - // Make the referenace matrix and the gpu matrix - auto flat = state_flat(p, cell_index, Cm, g); // flat - auto intl = state_intl(p, cell_index, Cm, g); // interleaved + // Make the reference matrix and the gpu matrix + auto flat = state_flat(p, cell_cv_divs, Cm, g); // flat + auto intl = state_intl(p, cell_cv_divs, Cm, g); // interleaved - // voltage and current values - flat.assemble(0.02, on_gpu(v), on_gpu(i)); - intl.assemble(0.02, on_gpu(v), on_gpu(i)); + // Set the integration times for the cells to be between 0.01 and 0.02 ms. + std::vector<T> dt(num_mtx, 0); + + auto dt_dist = std::uniform_real_distribution<T>(0.01, 0.02); + std::generate(dt.begin(), dt.end(), [&](){return dt_dist(gen);}); + + // Voltage and current values. + auto gpu_dt = on_gpu(dt); + auto gpu_v = on_gpu(v); + auto gpu_i = on_gpu(i); + + flat.assemble(gpu_dt, gpu_v, gpu_i); + intl.assemble(gpu_dt, gpu_v, gpu_i); flat.solve(); intl.solve(); @@ -383,7 +399,80 @@ TEST(matrix, backends) // Compare the results. // We expect exact equality for the two gpu matrix implementations because both // perform the same operations in the same order on the same inputs. - std::vector<double> x_flat = assign_from(on_host(flat.solution)); - std::vector<double> x_intl = assign_from(on_host(intl.solution)); + std::vector<double> x_flat = assign_from(on_host(flat.solution())); + std::vector<double> x_intl = assign_from(on_host(intl.solution())); EXPECT_EQ(x_flat, x_intl); } + +// Test for special zero diagonal behaviour. (see `test_matrix.cpp`.) +TEST(matrix, zero_diagonal) +{ + using util::assign; + + using value_type = gpu::backend::value_type; + using size_type = gpu::backend::size_type; + using matrix_type = gpu::backend::matrix_state; + using vvec = std::vector<value_type>; + + // Combined matrix may have zero-blocks, corresponding to a zero dt. + // Zero-blocks are indicated by zero value in the diagonal (the off-diagonal + // elements should be ignored). + // These submatrices should leave the rhs as-is when solved. + + // Three matrices, sizes 3, 3 and 2, with no branching. + std::vector<size_type> p = {0, 0, 1, 3, 3, 5, 5}; + std::vector<size_type> c = {0, 3, 5, 7}; + + // Face conductances. + std::vector<value_type> g = {0, 1, 1, 0, 1, 0, 2}; + + // dt of 1e-3. + std::vector<value_type> dt(3, 1.0e-3); + + // Capacitances. + std::vector<value_type> Cm = {1, 1, 1, 1, 1, 2, 3}; + + // Intial voltage of zero; currents alone determine rhs. + std::vector<value_type> v(7, 0.0); + std::vector<value_type> i = {-3, -5, -7, -6, -9, -16, -32}; + + // Expected matrix and rhs: + // u = [ 0 -1 -1 0 -1 0 -2] + // d = [ 2 3 2 2 2 4 5] + // b = [ 3 5 7 2 4 16 32] + // + // Expected solution: + // x = [ 4 5 6 7 8 9 10] + + matrix_type m(p, c, Cm, g); + auto gpu_dt = on_gpu(dt); + auto gpu_v = on_gpu(v); + auto gpu_i = on_gpu(i); + m.assemble(gpu_dt, gpu_v, gpu_i); + m.solve(); + + vvec x; + assign(x, on_host(m.solution())); + std::vector<value_type> expected = {4, 5, 6, 7, 8, 9, 10}; + + EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x)); + + // Set dt of 2nd (middle) submatrix to zero. Solution + // should then return voltage values for that submatrix. + + dt[1] = 0; + gpu_dt = on_gpu(dt); + + v[3] = 20; + v[4] = 30; + gpu_v = on_gpu(v); + + m.assemble(gpu_dt, gpu_v, gpu_i); + m.solve(); + + assign(x, on_host(m.solution())); + expected = {4, 5, 6, 20, 30, 9, 10}; + + EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x)); +} + diff --git a/tests/unit/test_mechanisms.cpp b/tests/unit/test_mechanisms.cpp index f91d3bf592a518d8c64c5dfbdb6d893ae0386e0e..0f53af29f86132c75a3b5706a2582eb8fd3a3c99 100644 --- a/tests/unit/test_mechanisms.cpp +++ b/tests/unit/test_mechanisms.cpp @@ -34,22 +34,32 @@ TEST(mechanisms, helpers) { EXPECT_TRUE(multicore::backend::has_mechanism("pas")); std::vector<size_type> parent_index = {0,0,1,2,3,4,0,6,7,8}; - auto node_indices = std::vector<size_type>{0,6,7,8,9}; - auto weights = std::vector<value_type>(node_indices.size(), 1.0); - auto n = node_indices.size(); + auto node_index = std::vector<size_type>{0,6,7,8,9}; + auto weights = std::vector<value_type>(node_index.size(), 1.0); + auto n = node_index.size(); + + // one cell + size_type ncell = 1; + std::vector<size_type> cell_index(n, 0u); multicore::backend::array vec_i(n, 0.); multicore::backend::array vec_v(n, 0.); + multicore::backend::array vec_t(ncell, 0.); + multicore::backend::array vec_t_to(ncell, 0.); + multicore::backend::array vec_dt(n, 0.); - auto mech = multicore::backend::make_mechanism( - "hh", memory::make_view(vec_v), memory::make_view(vec_i), weights, node_indices); + auto mech = multicore::backend::make_mechanism("hh", 0, + memory::make_view(cell_index), vec_t, vec_t_to, vec_dt, + vec_v, vec_i, weights, node_index); EXPECT_EQ(mech->name(), "hh"); EXPECT_EQ(mech->size(), 5u); // check that an out_of_range exception is thrown if an invalid mechanism is requested ASSERT_THROW( - multicore::backend::make_mechanism("dachshund", vec_v, vec_i, weights, node_indices), + multicore::backend::make_mechanism("dachshund", 0, + memory::make_view(cell_index), vec_t, vec_t_to, vec_dt, + vec_v, vec_i, weights, node_index), std::out_of_range ); } @@ -61,7 +71,7 @@ void mech_update(T* mech, unsigned num_iters) { using namespace nest::mc; std::map<mechanisms::ionKind, mechanisms::ion<typename T::backend>> ions; - mech->set_params(2., 0.1); + mech->set_params(); mech->nrn_init(); for (auto ion_kind : mechanisms::ion_kinds()) { auto ion_indexes = util::make_copy<std::vector<typename T::size_type>>( @@ -124,13 +134,21 @@ TYPED_TEST_P(mechanisms, update) { EXPECT_TRUE((std::is_same<typename proto_mechanism_type::array, typename mechanism_type::array>::value)); - auto num_syn = 32; + int num_cell = 1; + int num_syn = 32; + int num_comp = num_syn; + + typename mechanism_type::iarray node_index(num_syn); + typename mechanism_type::array voltage(num_comp, -65.0); + typename mechanism_type::array current(num_comp, 1.0); - typename mechanism_type::iarray indexes(num_syn); - typename mechanism_type::array voltage(num_syn, -65.0); - typename mechanism_type::array current(num_syn, 1.0); typename mechanism_type::array weights(num_syn, 1.0); + typename mechanism_type::iarray cell_index(num_comp, 0); + typename mechanism_type::array time(num_cell, 2.); + typename mechanism_type::array time_to(num_cell, 2.1); + typename mechanism_type::array dt(num_comp, 2.1-2.); + array_init(voltage, nest::mc::util::cyclic_view({ -65.0, -61.0, -63.0 })); array_init(current, nest::mc::util::cyclic_view({ 1.0, 0.9, 1.1 })); array_init(weights, nest::mc::util::cyclic_view({ 1.0 })); @@ -146,29 +164,30 @@ TYPED_TEST_P(mechanisms, update) { auto freq_begin = nest::mc::util::cyclic_view(index_freq).cbegin(); auto freq = freq_begin; - auto index = indexes.begin(); - while (index != indexes.end()) { - for (auto i = 0; i < *freq && index != indexes.end(); ++i) { + auto index = node_index.begin(); + while (index != node_index.end()) { + for (auto i = 0; i < *freq && index != node_index.end(); ++i) { *index++ = freq - freq_begin; } ++freq; } - // Copy indexes, voltage and current to use for the prototype mechanism - typename mechanism_type::iarray indexes_copy(indexes); + typename mechanism_type::iarray node_index_copy(node_index); typename mechanism_type::array voltage_copy(voltage); typename mechanism_type::array current_copy(current); typename mechanism_type::array weights_copy(weights); // Create mechanisms auto mech = nest::mc::mechanisms::make_mechanism<mechanism_type>( - voltage, current, std::move(weights), std::move(indexes) + 0, cell_index, time, time_to, dt, + voltage, current, std::move(weights), std::move(node_index) ); auto mech_proto = nest::mc::mechanisms::make_mechanism<proto_mechanism_type>( + 0, cell_index, time, time_to, dt, voltage_copy, current_copy, - std::move(weights_copy), std::move(indexes_copy) + std::move(weights_copy), std::move(node_index_copy) ); mech_update(dynamic_cast<mechanism_type*>(mech.get()), 10); diff --git a/tests/unit/test_multi_event_stream.cpp b/tests/unit/test_multi_event_stream.cpp new file mode 100644 index 0000000000000000000000000000000000000000..754b9f7071fb9d7a6a28ecce9e101627689626ed --- /dev/null +++ b/tests/unit/test_multi_event_stream.cpp @@ -0,0 +1,168 @@ +#include <vector> +#include "../gtest.h" + +#include <backends/event.hpp> +#include <backends/multicore/multi_event_stream.hpp> + +using namespace nest::mc; + +namespace common_events { + // set up four targets across three streams and two mech ids. + + constexpr cell_local_size_type mech_1 = 10u; + constexpr cell_local_size_type mech_2 = 13u; + constexpr cell_size_type cell_1 = 20u; + constexpr cell_size_type cell_2 = 77u; + constexpr cell_size_type cell_3 = 33u; + constexpr cell_size_type n_cell = 100u; + + target_handle handle[4] = { + target_handle(mech_1, 0u, cell_1), + target_handle(mech_2, 1u, cell_2), + target_handle(mech_1, 4u, cell_2), + target_handle(mech_2, 2u, cell_3) + }; + + // cell_1 (handle 0) has one event at t=3 + // cell_2 (handle 1 and 2) has two events at t=2 and t=5 + // cell_3 (handle 3) has one event at t=3 + + std::vector<deliverable_event> events = { + deliverable_event(2.f, handle[1], 2.f), + deliverable_event(3.f, handle[0], 1.f), + deliverable_event(3.f, handle[3], 4.f), + deliverable_event(5.f, handle[2], 3.f) + }; +} + +TEST(multi_event_stream, init) { + using multi_event_stream = multicore::multi_event_stream; + using namespace common_events; + + multi_event_stream m(n_cell); + EXPECT_EQ(n_cell, m.n_streams()); + + m.init(events); + EXPECT_FALSE(m.empty()); + + m.clear(); + EXPECT_TRUE(m.empty()); +} + +TEST(multi_event_stream, mark) { + using multi_event_stream = multicore::multi_event_stream; + using namespace common_events; + + multi_event_stream m(n_cell); + ASSERT_EQ(n_cell, m.n_streams()); + + m.init(events); + + for (cell_size_type i = 0; i<n_cell; ++i) { + EXPECT_TRUE(m.marked_events(i).empty()); + } + + std::vector<time_type> t_until(n_cell); + t_until[cell_1] = 2.f; + t_until[cell_2] = 2.5f; + t_until[cell_3] = 4.f; + m.mark_until_after(t_until); + + // Only two events should be marked: + // events[0] (with handle 1) at t=2.f on cell_2 + // events[2] (with handle 3) at t=3.f on cell_3 + + for (cell_size_type i = 0; i<n_cell; ++i) { + auto evs = m.marked_events(i); + auto n_marked = evs.size(); + switch (i) { + case cell_2: + EXPECT_EQ(1u, n_marked); + EXPECT_EQ(handle[1].mech_id, evs.front().handle.mech_id); + EXPECT_EQ(handle[1].index, evs.front().handle.index); + break; + case cell_3: + EXPECT_EQ(1u, n_marked); + EXPECT_EQ(handle[3].mech_id, evs.front().handle.mech_id); + EXPECT_EQ(handle[3].index, evs.front().handle.index); + break; + default: + EXPECT_EQ(0u, n_marked); + break; + } + } + + // Drop these events and mark all events up to t=5.f. + // cell_1 should have one marked event (events[1], handle 0) + // cell_2 should have one marked event (events[3], handle 2) + + m.drop_marked_events(); + t_until.assign(n_cell, 5.f); + m.mark_until_after(t_until); + + for (cell_size_type i = 0; i<n_cell; ++i) { + auto evs = m.marked_events(i); + auto n_marked = evs.size(); + switch (i) { + case cell_1: + EXPECT_EQ(1u, n_marked); + EXPECT_EQ(handle[0].mech_id, evs.front().handle.mech_id); + EXPECT_EQ(handle[0].index, evs.front().handle.index); + break; + case cell_2: + EXPECT_EQ(1u, n_marked); + EXPECT_EQ(handle[2].mech_id, evs.front().handle.mech_id); + EXPECT_EQ(handle[2].index, evs.front().handle.index); + break; + default: + EXPECT_EQ(0u, n_marked); + break; + } + } + + // No more events after these. + EXPECT_FALSE(m.empty()); + m.drop_marked_events(); + EXPECT_TRUE(m.empty()); +} + +TEST(multi_event_stream, time_if_before) { + using multi_event_stream = multicore::multi_event_stream; + using namespace common_events; + + multi_event_stream m(n_cell); + ASSERT_EQ(n_cell, m.n_streams()); + + m.init(events); + + // Test times less than all event times (first event at t=2). + std::vector<double> before(n_cell); + std::vector<double> after; + + for (unsigned i = 0; i<n_cell; ++i) { + before[i] = 0.1+i/(double)n_cell; + } + + std::vector<double> t(before); + m.event_time_if_before(t); + + EXPECT_EQ(before, t); + + // With times between 2 and 3, expect the event at time t=2 + // on cell_2 to restrict corresponding element of t. + + for (unsigned i = 0; i<n_cell; ++i) { + before[i] = 2.1+0.5*i/(double)n_cell; + } + t = before; + m.event_time_if_before(t); + + for (unsigned i = 0; i<n_cell; ++i) { + if (i==cell_2) { + EXPECT_EQ(2., t[i]); + } + else { + EXPECT_EQ(before[i], t[i]); + } + } +} diff --git a/tests/unit/test_multi_event_stream.cu b/tests/unit/test_multi_event_stream.cu new file mode 100644 index 0000000000000000000000000000000000000000..a6d5d3a5a5ec440b9f16a6fcd5efeff6f939eaa7 --- /dev/null +++ b/tests/unit/test_multi_event_stream.cu @@ -0,0 +1,242 @@ +#include <cstdio> +#include <random> +#include <vector> + +#include <cuda.h> +#include "../gtest.h" + +#include <backends/event.hpp> +#include <backends/gpu/multi_event_stream.hpp> +#include <backends/gpu/kernels/time_ops.hpp> +#include <memory/wrappers.hpp> +#include <util/rangeutil.hpp> + +using namespace nest::mc; + +namespace common_events { + // set up four targets across three streams and two mech ids. + + constexpr cell_local_size_type mech_1 = 10u; + constexpr cell_local_size_type mech_2 = 13u; + constexpr cell_size_type cell_1 = 20u; + constexpr cell_size_type cell_2 = 77u; + constexpr cell_size_type cell_3 = 33u; + constexpr cell_size_type n_cell = 100u; + + target_handle handle[4] = { + target_handle(mech_1, 0u, cell_1), + target_handle(mech_2, 1u, cell_2), + target_handle(mech_1, 4u, cell_2), + target_handle(mech_2, 2u, cell_3) + }; + + // cell_1 (handle 0) has one event at t=3 + // cell_2 (handle 1 and 2) has two events at t=2 and t=5 + // cell_3 (handle 3) has one event at t=3 + + std::vector<deliverable_event> events = { + deliverable_event(2.f, handle[1], 2.f), + deliverable_event(3.f, handle[0], 1.f), + deliverable_event(3.f, handle[3], 4.f), + deliverable_event(5.f, handle[2], 3.f) + }; +} + +TEST(multi_event_stream, init) { + using multi_event_stream = gpu::multi_event_stream; + using namespace common_events; + + multi_event_stream m(n_cell); + EXPECT_EQ(n_cell, m.n_streams()); + + m.init(events); + EXPECT_FALSE(m.empty()); + + m.clear(); + EXPECT_TRUE(m.empty()); +} + +struct ev_info { + unsigned mech_id; + unsigned index; + double weight; +}; + +__global__ +void copy_marked_events_kernel( + unsigned ci, + gpu::multi_event_stream::span_state state, + ev_info* store, + unsigned& count, + unsigned max_ev) +{ + // use only one thread here + if (threadIdx.x || blockIdx.x) return; + + unsigned k = 0; + for (auto j = state.span_begin[ci]; j<state.mark[ci]; ++j) { + if (k>=max_ev) break; + store[k++] = {state.ev_mech_id[j], state.ev_index[j], state.ev_weight[j]}; + } + count = k; +} + +std::vector<ev_info> copy_marked_events(int ci, gpu::multi_event_stream& m) { + unsigned max_ev = 1000; + memory::device_vector<ev_info> store(max_ev); + memory::device_vector<unsigned> counter(1); + + copy_marked_events_kernel<<<1,1>>>(ci, m.delivery_data(), store.data(), *counter.data(), max_ev); + unsigned n_ev = counter[0]; + std::vector<ev_info> ev(n_ev); + memory::copy(store(0, n_ev), ev); + return ev; +} + +TEST(multi_event_stream, mark) { + using multi_event_stream = gpu::multi_event_stream; + using namespace common_events; + + multi_event_stream m(n_cell); + ASSERT_EQ(n_cell, m.n_streams()); + + m.init(events); + + for (cell_size_type i = 0; i<n_cell; ++i) { + EXPECT_TRUE(copy_marked_events(i, m).empty()); + } + + memory::device_vector<double> t_until(n_cell); + t_until[cell_1] = 2.; + t_until[cell_2] = 2.5; + t_until[cell_3] = 4.; + + m.mark_until_after(t_until); + + // Only two events should be marked: + // events[0] (with handle 1) at t=2 on cell_2 + // events[2] (with handle 3) at t=3 on cell_3 + + for (cell_size_type i = 0; i<n_cell; ++i) { + auto evs = copy_marked_events(i, m); + auto n_marked = evs.size(); + switch (i) { + case cell_2: + ASSERT_EQ(1u, n_marked); + EXPECT_EQ(handle[1].mech_id, evs.front().mech_id); + EXPECT_EQ(handle[1].index, evs.front().index); + break; + case cell_3: + ASSERT_EQ(1u, n_marked); + EXPECT_EQ(handle[3].mech_id, evs.front().mech_id); + EXPECT_EQ(handle[3].index, evs.front().index); + break; + default: + EXPECT_EQ(0u, n_marked); + break; + } + } + + // Drop these events and mark all events up to t=5. + // cell_1 should have one marked event (events[1], handle 0) + // cell_2 should have one marked event (events[3], handle 2) + + m.drop_marked_events(); + memory::fill(t_until, 5.); + m.mark_until_after(memory::make_view(t_until)); + + for (cell_size_type i = 0; i<n_cell; ++i) { + auto evs = copy_marked_events(i, m); + auto n_marked = evs.size(); + switch (i) { + case cell_1: + ASSERT_EQ(1u, n_marked); + EXPECT_EQ(handle[0].mech_id, evs.front().mech_id); + EXPECT_EQ(handle[0].index, evs.front().index); + break; + case cell_2: + ASSERT_EQ(1u, n_marked); + EXPECT_EQ(handle[2].mech_id, evs.front().mech_id); + EXPECT_EQ(handle[2].index, evs.front().index); + break; + default: + EXPECT_EQ(0u, n_marked); + break; + } + } + + // No more events after these. + EXPECT_FALSE(m.empty()); + m.drop_marked_events(); + EXPECT_TRUE(m.empty()); +} + +TEST(multi_event_stream, time_if_before) { + using multi_event_stream = gpu::multi_event_stream; + using namespace common_events; + + multi_event_stream m(n_cell); + ASSERT_EQ(n_cell, m.n_streams()); + + m.init(events); + + // Test times less than all event times (first event at t=2). + std::vector<double> before(n_cell); + std::vector<double> after; + + for (unsigned i = 0; i<n_cell; ++i) { + before[i] = 0.1+i/(double)n_cell; + } + + memory::device_vector<double> t = memory::on_gpu(before); + m.event_time_if_before(t); + util::assign(after, memory::on_host(t)); + + EXPECT_EQ(before, after); + + // With times between 2 and 3, expect the event at time t=2 + // on cell_2 to restrict corresponding element of t. + + for (unsigned i = 0; i<n_cell; ++i) { + before[i] = 2.1+0.5*i/(double)n_cell; + } + t = memory::make_view(before); + m.event_time_if_before(t); + util::assign(after, memory::on_host(t)); + + for (unsigned i = 0; i<n_cell; ++i) { + if (i==cell_2) { + EXPECT_EQ(2., after[i]); + } + else { + EXPECT_EQ(before[i], after[i]); + } + } +} + +TEST(multi_event_stream, any_time_before) { + constexpr std::size_t n = 10000; + std::minstd_rand R; + std::uniform_real_distribution<float> g(0, 10); + + std::vector<double> t(n); + std::generate(t.begin(), t.end(), [&]{ return g(R); }); + + memory::device_vector<double> t0 = memory::on_gpu(t); + + double tmin = *std::min_element(t.begin(), t.end()); + EXPECT_TRUE(gpu::any_time_before(n, t0.data(), tmin+0.01)); + EXPECT_FALSE(gpu::any_time_before(n, t0.data(), tmin)); + + memory::device_vector<double> t1 = memory::on_gpu(t); + EXPECT_FALSE(gpu::any_time_before(n, t0.data(), t1.data())); + + t[2*n/3] += 20; + t1 = memory::on_gpu(t); + EXPECT_TRUE(gpu::any_time_before(n, t0.data(), t1.data())); + + t[2*n/3] -= 30; + t1 = memory::on_gpu(t); + EXPECT_FALSE(gpu::any_time_before(n, t0.data(), t1.data())); +} + diff --git a/tests/unit/test_range.cpp b/tests/unit/test_range.cpp index fa2458faab208aa9677f295b267fd720be5c8331..e058604f7b31d1341328f42ee06efba97eaf550e 100644 --- a/tests/unit/test_range.cpp +++ b/tests/unit/test_range.cpp @@ -190,6 +190,31 @@ TEST(range, strictify) { EXPECT_EQ(cstr+11, ptr_range.right); } +TEST(range, subrange) { + int values[] = {10, 11, 12, 13, 14, 15, 16}; + + // `subrange_view` should handle offsets of different integral types sanely. + auto sub1 = util::subrange_view(values, 1, 6u); + EXPECT_EQ(11, sub1[0]); + EXPECT_EQ(15, sub1.back()); + + // Should be able to take subranges of subranges, and modify underlying + // sequence. + auto sub2 = util::subrange_view(sub1, 3ull, (short)4); + EXPECT_EQ(1u, sub2.size()); + + sub2[0] = 23; + EXPECT_EQ(23, values[4]); + + // Taking a subrange view of a const range over non-const iterators + // should still allow modification of underlying sequence. + const util::range<int*> const_view(values, values+4); + auto sub3 = util::subrange_view(const_view, std::make_pair(1, 3u)); + sub3[1] = 42; + EXPECT_EQ(42, values[2]); +} + + TEST(range, max_element_by) { const char *cstr = "helloworld"; auto cstr_range = util::make_range(cstr, null_terminated); @@ -223,6 +248,25 @@ TEST(range, max_value) { EXPECT_EQ('x', i); } +TEST(range, minmax_value) { + auto cstr_empty_range = util::make_range((const char*)"", null_terminated); + auto p1 = util::minmax_value(cstr_empty_range); + EXPECT_EQ('\0', p1.first); + EXPECT_EQ('\0', p1.second); + + const char *cstr = "hello world"; + auto cstr_range = util::make_range(cstr, null_terminated); + auto p2 = util::minmax_value(cstr_range); + EXPECT_EQ(' ', p2.first); + EXPECT_EQ('w', p2.second); + + auto p3 = util::minmax_value( + util::transform_view(cstr_range, [](char c) { return -(int)c; })); + + EXPECT_EQ('w', -p3.first); + EXPECT_EQ(' ', -p3.second); +} + template <typename V> class counter_range: public ::testing::Test {}; @@ -325,6 +369,16 @@ TEST(range, assign) { EXPECT_EQ("00110", text); } +TEST(range, fill) { + std::vector<char> aaaa(4); + util::fill(aaaa, 'a'); + EXPECT_EQ("aaaa", std::string(aaaa.begin(), aaaa.end())); + + char cstr[] = "howdy"; + util::fill(util::make_range((char *)cstr, null_terminated), 'q'); + EXPECT_EQ("qqqqq", std::string(cstr)); +} + TEST(range, assign_from) { int in[] = {0,1,2}; diff --git a/tests/unit/test_spikes.cpp b/tests/unit/test_spikes.cpp index 35ef18e7d7b2574b79097f06d2a87aff7c429451..5e759025138a493ebdc449b3ce7ac9c69c87e248 100644 --- a/tests/unit/test_spikes.cpp +++ b/tests/unit/test_spikes.cpp @@ -2,14 +2,27 @@ #include <spike.hpp> #include <backends/multicore/fvm.hpp> +#include <memory/memory.hpp> +#include <util/rangeutil.hpp> using namespace nest::mc; +// This source is included in `test_spikes.cu`, which defines +// USE_BACKEND to override the default `multicore::backend` +// used for CPU tests. + +#ifndef USE_BACKEND +using backend = multicore::backend; +#else +using backend = USE_BACKEND; +#endif + TEST(spikes, threshold_watcher) { using backend = multicore::backend; using size_type = backend::size_type; using value_type = backend::value_type; using array = backend::array; + using iarray = backend::iarray; using list = backend::threshold_watcher::crossing_list; // the test creates a watch on 3 values in the array values (which has 10 @@ -24,11 +37,21 @@ TEST(spikes, threshold_watcher) { array values(n, 0); values[5] = 3.; + // the values are tied to two 'cells' with independent times: + // compartments [0, 5] -> cell 0 + // compartments [6, 9] -> cell 1 + iarray cell_index(n, 0); + for (unsigned i = 6; i<n; ++i) { + cell_index[i] = 1; + } + array time_before(2, 0.); + array time_after(2, 0.); + // list for storing expected crossings for validation at the end list expected; // create the watch - backend::threshold_watcher watch(values, index, thresh, 0.f); + backend::threshold_watcher watch(cell_index, time_before, time_after, values, index, thresh); // initially the first and third watch should not be spiking // the second is spiking @@ -38,7 +61,8 @@ TEST(spikes, threshold_watcher) { // test again at t=1, with unchanged values // - nothing should change - watch.test(1.); + util::fill(time_after, 1.); + watch.test(); EXPECT_FALSE(watch.is_crossed(0)); EXPECT_TRUE(watch.is_crossed(1)); EXPECT_FALSE(watch.is_crossed(2)); @@ -47,30 +71,37 @@ TEST(spikes, threshold_watcher) { // test at t=2, with all values set to zero // - 2nd watch should now stop spiking memory::fill(values, 0.); - watch.test(2.); + memory::copy(time_after, time_before); + util::fill(time_after, 2.); + watch.test(); EXPECT_FALSE(watch.is_crossed(0)); EXPECT_FALSE(watch.is_crossed(1)); EXPECT_FALSE(watch.is_crossed(2)); EXPECT_EQ(watch.crossings().size(), 0u); - // test at t=3, with all values set to 4. + // test at t=(2.5, 3), with all values set to 4. // - all watches should now be spiking memory::fill(values, 4.); - watch.test(3.); + memory::copy(time_after, time_before); + time_after[0] = 2.5; + time_after[1] = 3.0; + watch.test(); EXPECT_TRUE(watch.is_crossed(0)); EXPECT_TRUE(watch.is_crossed(1)); EXPECT_TRUE(watch.is_crossed(2)); EXPECT_EQ(watch.crossings().size(), 3u); // record the expected spikes - expected.push_back({0u, 2.25f}); - expected.push_back({1u, 2.50f}); - expected.push_back({2u, 2.75f}); + expected.push_back({0u, 2.125f}); // 2. + (2.5-2)*(1./4.) + expected.push_back({1u, 2.250f}); // 2. + (2.5-2)*(2./4.) + expected.push_back({2u, 2.750f}); // 2. + (3.0-2)*(3./4.) // test at t=4, with all values set to 0. // - all watches should stop spiking memory::fill(values, 0.); - watch.test(4.); + memory::copy(time_after, time_before); + util::fill(time_after, 4.); + watch.test(); EXPECT_FALSE(watch.is_crossed(0)); EXPECT_FALSE(watch.is_crossed(1)); EXPECT_FALSE(watch.is_crossed(2)); @@ -79,7 +110,9 @@ TEST(spikes, threshold_watcher) { // test at t=5, with value on 3rd watch set to 6 // - watch 3 should be spiking values[index[2]] = 6.; - watch.test(5.); + memory::copy(time_after, time_before); + util::fill(time_after, 5.); + watch.test(); EXPECT_FALSE(watch.is_crossed(0)); EXPECT_FALSE(watch.is_crossed(1)); EXPECT_TRUE(watch.is_crossed(2)); @@ -109,11 +142,10 @@ TEST(spikes, threshold_watcher) { // // test that resetting works // - EXPECT_EQ(watch.last_test_time(), 5); memory::fill(values, 0); values[index[0]] = 10.; // first watch should be intialized to spiking state - watch.reset(0); - EXPECT_EQ(watch.last_test_time(), 0); + util::fill(time_before, 0.); + watch.reset(); EXPECT_EQ(watch.crossings().size(), 0u); EXPECT_TRUE(watch.is_crossed(0)); EXPECT_FALSE(watch.is_crossed(1)); diff --git a/tests/unit/test_spikes.cu b/tests/unit/test_spikes.cu index 12bb89ee975b90020337b91b3acf4a4e5d482780..52d245a20b27f6e5d0e08d9998b0d6a24d4c4aec 100644 --- a/tests/unit/test_spikes.cu +++ b/tests/unit/test_spikes.cu @@ -1,122 +1,6 @@ -#include "../gtest.h" +// Override backend class in `test_spikes.cpp` -#include <spike.hpp> #include <backends/gpu/fvm.hpp> +#define USE_BACKEND gpu::backend -using namespace nest::mc; - -TEST(spikes, threshold_watcher) { - using backend = gpu::backend; - using size_type = backend::size_type; - using value_type = backend::value_type; - using array = backend::array; - using list = backend::threshold_watcher::crossing_list; - - // the test creates a watch on 3 values in the array values (which has 10 - // elements in total). - const auto n = 10; - - const std::vector<size_type> index{0, 5, 7}; - const std::vector<value_type> thresh{1., 2., 3.}; - - // all values are initially 0, except for values[5] which we set - // to exceed the threshold of 2. for the second watch - array values(n, 0); - values[5] = 3.; - - // list for storing expected crossings for validation at the end - list expected; - - // create the watch - backend::threshold_watcher watch(values, index, thresh, 0.f); - - // initially the first and third watch should not be spiking - // the second is spiking - EXPECT_FALSE(watch.is_crossed(0)); - EXPECT_TRUE(watch.is_crossed(1)); - EXPECT_FALSE(watch.is_crossed(2)); - - // test again at t=1, with unchanged values - // - nothing should change - watch.test(1.); - EXPECT_FALSE(watch.is_crossed(0)); - EXPECT_TRUE(watch.is_crossed(1)); - EXPECT_FALSE(watch.is_crossed(2)); - EXPECT_EQ(watch.crossings().size(), 0u); - - // test at t=2, with all values set to zero - // - 2nd watch should now stop spiking - memory::fill(values, 0.); - watch.test(2.); - EXPECT_FALSE(watch.is_crossed(0)); - EXPECT_FALSE(watch.is_crossed(1)); - EXPECT_FALSE(watch.is_crossed(2)); - EXPECT_EQ(watch.crossings().size(), 0u); - - // test at t=3, with all values set to 4. - // - all watches should now be spiking - memory::fill(values, 4.); - watch.test(3.); - EXPECT_TRUE(watch.is_crossed(0)); - EXPECT_TRUE(watch.is_crossed(1)); - EXPECT_TRUE(watch.is_crossed(2)); - EXPECT_EQ(watch.crossings().size(), 3u); - - // record the expected spikes - expected.push_back({0u, 2.25f}); - expected.push_back({1u, 2.50f}); - expected.push_back({2u, 2.75f}); - - // test at t=4, with all values set to 0. - // - all watches should stop spiking - memory::fill(values, 0.); - watch.test(4.); - EXPECT_FALSE(watch.is_crossed(0)); - EXPECT_FALSE(watch.is_crossed(1)); - EXPECT_FALSE(watch.is_crossed(2)); - EXPECT_EQ(watch.crossings().size(), 3u); - - // test at t=5, with value on 3rd watch set to 6 - // - watch 3 should be spiking - values[index[2]] = 6.; - watch.test(5.); - EXPECT_FALSE(watch.is_crossed(0)); - EXPECT_FALSE(watch.is_crossed(1)); - EXPECT_TRUE(watch.is_crossed(2)); - EXPECT_EQ(watch.crossings().size(), 4u); - expected.push_back({2u, 4.5f}); - - // - // test that all generated spikes matched the expected values - // - if (expected.size()!=watch.crossings().size()) { - FAIL() << "count of recorded crosssings did not match expected count"; - } - auto const& spikes = watch.crossings(); - for (auto i=0u; i<expected.size(); ++i) { - EXPECT_EQ(expected[i], spikes[i]); - } - - // - // test that clearing works - // - watch.clear_crossings(); - EXPECT_EQ(watch.crossings().size(), 0u); - EXPECT_FALSE(watch.is_crossed(0)); - EXPECT_FALSE(watch.is_crossed(1)); - EXPECT_TRUE(watch.is_crossed(2)); - - // - // test that resetting works - // - EXPECT_EQ(watch.last_test_time(), 5); - memory::fill(values, 0); - values[index[0]] = 10.; // first watch should be intialized to spiking state - watch.reset(0); - EXPECT_EQ(watch.last_test_time(), 0); - EXPECT_EQ(watch.crossings().size(), 0u); - EXPECT_TRUE(watch.is_crossed(0)); - EXPECT_FALSE(watch.is_crossed(1)); - EXPECT_FALSE(watch.is_crossed(2)); -} - +#include "./test_spikes.cpp" diff --git a/tests/unit/test_synapses.cpp b/tests/unit/test_synapses.cpp index 3e9b8ef4aa5b903fbf494ab5e79d4a12581d5927..6a45685d501ba1e8405c75f88f1146f335b1f060 100644 --- a/tests/unit/test_synapses.cpp +++ b/tests/unit/test_synapses.cpp @@ -41,7 +41,6 @@ TEST(synapses, add_to_cell) EXPECT_EQ(syns[2].mechanism.name(), "expsyn"); } -// compares results with those generated by nrn/ball_and_stick.py TEST(synapses, expsyn_basic_state) { using namespace nest::mc; @@ -49,14 +48,21 @@ TEST(synapses, expsyn_basic_state) using value_type = multicore::backend::value_type; using synapse_type = mechanisms::expsyn::mechanism_expsyn<multicore::backend>; - auto num_syn = 4; + int num_syn = 4; + int num_comp = 4; + int num_cell = 1; + + synapse_type::iarray cell_index(num_comp, 0); + synapse_type::array time(num_cell, 0); + synapse_type::array time_to(num_cell, 0.1); + synapse_type::array dt(num_comp, 0.1); - std::vector<size_type> indexes(num_syn); - std::vector<value_type> weights(indexes.size(), 1.0); - synapse_type::array voltage(num_syn, -65.0); - synapse_type::array current(num_syn, 1.0); - auto mech = mechanisms::make_mechanism<synapse_type>(voltage, current, weights, indexes); + std::vector<size_type> node_index(num_syn, 0); + std::vector<value_type> weights(num_syn, 1.0); + synapse_type::array voltage(num_comp, -65.0); + synapse_type::array current(num_comp, 1.0); + auto mech = mechanisms::make_mechanism<synapse_type>(0, cell_index, time, time_to, dt, voltage, current, weights, node_index); auto ptr = dynamic_cast<synapse_type*>(mech.get()); auto n = ptr->size(); @@ -99,19 +105,25 @@ TEST(synapses, expsyn_basic_state) TEST(synapses, exp2syn_basic_state) { using namespace nest::mc; + using size_type = multicore::backend::size_type; + using value_type = multicore::backend::value_type; using synapse_type = mechanisms::exp2syn::mechanism_exp2syn<multicore::backend>; - auto num_syn = 4; + int num_syn = 4; + int num_comp = 4; + int num_cell = 1; - using size_type = multicore::backend::size_type; - using value_type = multicore::backend::value_type; + synapse_type::iarray cell_index(num_comp, 0); + synapse_type::array time(num_cell, 0); + synapse_type::array time_to(num_cell, 0.1); + synapse_type::array dt(num_comp, 0.1); - std::vector<size_type> indexes(num_syn); - std::vector<value_type> weights(indexes.size(), 1.0); - synapse_type::array voltage(num_syn, -65.0); - synapse_type::array current(num_syn, 1.0); - auto mech = mechanisms::make_mechanism<synapse_type>(voltage, current, weights, indexes); + std::vector<size_type> node_index(num_syn, 0); + std::vector<value_type> weights(num_syn, 1.0); + synapse_type::array voltage(num_comp, -65.0); + synapse_type::array current(num_comp, 1.0); + auto mech = mechanisms::make_mechanism<synapse_type>(0, cell_index, time, time_to, dt, voltage, current, weights, node_index); auto ptr = dynamic_cast<synapse_type*>(mech.get()); auto n = ptr->size();