diff --git a/CMakeLists.txt b/CMakeLists.txt index ba243a643eeff1db995de7283fc579e6c10add86..48aba8e102b368b9cd2afd4ff3c1a15c2aa6063b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -197,12 +197,6 @@ elseif(NMC_VECTORIZE_TARGET STREQUAL "AVX2") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXXOPT_AVX2}") endif() -#---------------------------------------------------------- -# whether to generate optimized kernels from NMODL -#---------------------------------------------------------- -option(NMC_USE_OPTIMIZED_KERNELS - "generate optimized code that vectorizes with the Intel compiler" OFF) - #---------------------------------------------------------- # Only build modcc if it has not already been installed. # This is useful if cross compiling for KNL, when it is not desirable to compile diff --git a/mechanisms/CMakeLists.txt b/mechanisms/CMakeLists.txt index 0fef1d41d505e9ab5d1f29c0192728eb05ff079b..01e08e46ef7379de76a78a43e372fb701bc1ab6d 100644 --- a/mechanisms/CMakeLists.txt +++ b/mechanisms/CMakeLists.txt @@ -4,19 +4,25 @@ include(BuildModules.cmake) set(mechanisms pas hh expsyn exp2syn test_kin1 test_kinlva) set(modcc_opt) -if(NMC_USE_OPTIMIZED_KERNELS) # generate optimized kernels - set(modcc_opt "-O") -endif() set(mod_srcdir "${CMAKE_CURRENT_SOURCE_DIR}/mod") set(mech_dir "${CMAKE_CURRENT_SOURCE_DIR}/multicore") file(MAKE_DIRECTORY "${mech_dir}") +if(NMC_VECTORIZE_TARGET STREQUAL "KNL") + set(modcc_target "avx512") +elseif(NMC_VECTORIZE_TARGET STREQUAL "AVX") + set(modcc_opt "-O") +elseif(NMC_VECTORIZE_TARGET STREQUAL "AVX2") + set(modcc_opt "-O") +else() + set(modcc_target "cpu") +endif() build_modules( ${mechanisms} SOURCE_DIR "${mod_srcdir}" DEST_DIR "${mech_dir}" - MODCC_FLAGS -t cpu ${modcc_opt} + MODCC_FLAGS -t ${modcc_target} ${modcc_opt} TARGET build_all_mods ) diff --git a/modcc/CMakeLists.txt b/modcc/CMakeLists.txt index 29dffa3c309634e9366d6c7359fa5ff888f1c38f..2398b12fe4b2f722f870afcf241500e9887d47fc 100644 --- a/modcc/CMakeLists.txt +++ b/modcc/CMakeLists.txt @@ -29,4 +29,3 @@ set_target_properties(modcc PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/modcc" ) - diff --git a/modcc/backends/avx512.hpp b/modcc/backends/avx512.hpp new file mode 100644 index 0000000000000000000000000000000000000000..153e329e1c25ebc331b695d0212e57267d013ad0 --- /dev/null +++ b/modcc/backends/avx512.hpp @@ -0,0 +1,137 @@ +// +// AVX512 backend +// + +#pragma once + +#include "backends/base.hpp" + + +namespace nest { +namespace mc { +namespace modcc { + +// Specialize for the different architectures +template<> +struct simd_intrinsics<targetKind::avx512> { + + static bool has_gather_scatter() { + return true; + } + + static std::string emit_headers() { + return "#include <immintrin.h>"; + }; + + static std::string emit_simd_width() { + return "512"; + } + + static std::string emit_value_type() { + return "__m512d"; + } + + static std::string emit_index_type() { + return "__m256i"; + } + + template<typename T1, typename T2> + static void emit_binary_op(TextBuffer& tb, tok op, + const T1& arg1, const T2& arg2) { + switch (op) { + case tok::plus: + tb << "_mm512_add_pd("; + break; + case tok::minus: + tb << "_mm512_sub_pd("; + break; + case tok::times: + tb << "_mm512_mul_pd("; + break; + case tok::divide: + tb << "_mm512_div_pd("; + break; + default: + throw std::invalid_argument("Unknown binary operator"); + } + + emit_operands(tb, arg_emitter(arg1), arg_emitter(arg2)); + tb << ")"; + } + + template<typename T> + static void emit_unary_op(TextBuffer& tb, tok op, const T& arg) { + switch (op) { + case tok::minus: + tb << "_mm512_sub_pd(_mm512_set1_pd(0), "; + break; + case tok::exp: + tb << "_mm512_exp_pd("; + break; + case tok::log: + tb << "_mm512_log_pd("; + break; + default: + throw std::invalid_argument("Unknown unary operator"); + } + + emit_operands(tb, arg_emitter(arg)); + tb << ")"; + } + + template<typename B, typename E> + static void emit_pow(TextBuffer& tb, const B& base, const E& exp) { + tb << "_mm512_pow_pd("; + emit_operands(tb, arg_emitter(base), arg_emitter(exp)); + tb << ")"; + } + + template<typename A, typename V> + static void emit_store_unaligned(TextBuffer& tb, const A& addr, + const V& value) { + tb << "_mm512_storeu_pd("; + emit_operands(tb, arg_emitter(addr), arg_emitter(value)); + tb << ")"; + } + + template<typename A> + static void emit_load_unaligned(TextBuffer& tb, const A& addr) { + tb << "_mm512_loadu_pd("; + emit_operands(tb, arg_emitter(addr)); + tb << ")"; + } + + template<typename A> + static void emit_load_index(TextBuffer& tb, const A& addr) { + tb << "_mm256_lddqu_si256("; + emit_operands(tb, arg_emitter(addr)); + tb << ")"; + } + + template<typename A, typename I, typename V, typename S> + static void emit_scatter(TextBuffer& tb, const A& addr, + const I& index, const V& value, const S& scale) { + tb << "_mm512_i32scatter_pd("; + emit_operands(tb, arg_emitter(addr), arg_emitter(index), + arg_emitter(value), arg_emitter(scale)); + tb << ")"; + } + + template<typename A, typename I, typename S> + static void emit_gather(TextBuffer& tb, const A& addr, + const I& index, const S& scale) { + tb << "_mm512_i32gather_pd("; + emit_operands(tb, arg_emitter(addr), arg_emitter(index), + arg_emitter(scale)); + tb << ")"; + } + + template<typename T> + static void emit_set_value(TextBuffer& tb, const T& arg) { + tb << "_mm512_set1_pd("; + emit_operands(tb, arg_emitter(arg)); + tb << ")"; + } +}; + +}}} // closing namespaces diff --git a/modcc/backends/base.hpp b/modcc/backends/base.hpp new file mode 100644 index 0000000000000000000000000000000000000000..487c0722b775610cc64db29ee5f08f6b62b17888 --- /dev/null +++ b/modcc/backends/base.hpp @@ -0,0 +1,85 @@ +// +// Base SIMD backend functionality +// + +#pragma once + +#include <functional> +#include <stdexcept> +#include <string> + +#include "options.hpp" +#include "token.hpp" +#include "textbuffer.hpp" +#include "util/meta.hpp" + +namespace nest { +namespace mc { +namespace modcc { + +using nest::mc::util::enable_if_t; +using operand_fn_t = std::function<void(TextBuffer&)>; + +static void emit_operands(TextBuffer& tb, operand_fn_t emitter) { + emitter(tb); +} + +template<typename ... Args> +static void emit_operands(TextBuffer& tb, operand_fn_t emitter, Args ... args) { + emitter(tb); + tb << ", "; + emit_operands(tb, args ...); +} + +template<typename T> +static enable_if_t<!std::is_convertible<T, operand_fn_t>::value, operand_fn_t> +arg_emitter(const T& arg) { + return [arg](TextBuffer& tb) { tb << arg; }; +} + +static operand_fn_t arg_emitter(const operand_fn_t& arg) { + return arg; +} + + +template<targetKind Arch> +struct simd_intrinsics { + static std::string emit_headers(); + static std::string emit_simd_width(); + static std::string emit_simd_value_type(); + static std::string emit_simd_index_type(); + + template<typename T1, typename T2> + static void emit_binary_op(TextBuffer& tb, tok op, + const T1& arg1, const T2& arg2); + + template<typename T> + static void emit_unary_op(TextBuffer& tb, tok op, const T& arg); + + template<typename B, typename E> + static void emit_pow(TextBuffer& tb, const B& base, const E& exp); + + template<typename A, typename V> + static void emit_store_unaligned(TextBuffer& tb, const A& addr, const V& value); + + template<typename A> + static void emit_load_unaligned(TextBuffer& tb, const A& addr); + + template<typename A> + static void emit_load_index(TextBuffer& tb, const A& addr); + + template<typename A, typename I, typename V, typename S> + static void emit_scatter(TextBuffer& tb, const A& addr, + const I& index, const V& value, const S& scale); + + template<typename A, typename I, typename S> + static void emit_gather(TextBuffer& tb, const A& addr, + const I& index, const S& scale); + + template<typename T> + static void emit_set_value(TextBuffer& tb, const T& arg); + + static bool has_gather_scatter(); +}; + +}}} // closing namespaces diff --git a/modcc/backends/simd.hpp b/modcc/backends/simd.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1f7d3485c9c2254d1b1a53591a72febe9f521dc2 --- /dev/null +++ b/modcc/backends/simd.hpp @@ -0,0 +1,3 @@ +#pragma once + +#include "backends/avx512.hpp" diff --git a/modcc/cprinter.cpp b/modcc/cprinter.cpp index f4c9de899782ae0f9fca2aa5e4ac9f406669759b..a30e8253718bc3921c983e6f112b69b4e6c5f0fe 100644 --- a/modcc/cprinter.cpp +++ b/modcc/cprinter.cpp @@ -9,14 +9,16 @@ ******************************************************************************/ CPrinter::CPrinter(Module &m, bool o) -: module_(&m), - optimize_(o) -{ + : module_(&m), + optimize_(o) +{ } + +std::string CPrinter::emit_source() { // make a list of vector types, both parameters and assigned // and a list of all scalar types std::vector<VariableExpression*> scalar_variables; std::vector<VariableExpression*> array_variables; - for(auto& sym: m.symbols()) { + for(auto& sym: module_->symbols()) { if(auto var = sym.second->is_variable()) { if(var->is_range()) { array_variables.push_back(var); @@ -29,20 +31,12 @@ CPrinter::CPrinter(Module &m, bool o) std::string module_name = Options::instance().modulename; if (module_name == "") { - module_name = m.name(); + module_name = module_->name(); } ////////////////////////////////////////////// ////////////////////////////////////////////// - text_.add_line("#pragma once"); - text_.add_line(); - text_.add_line("#include <cmath>"); - text_.add_line("#include <limits>"); - text_.add_line(); - text_.add_line("#include <mechanism.hpp>"); - text_.add_line("#include <algorithms.hpp>"); - text_.add_line("#include <util/pprintf.hpp>"); - text_.add_line(); + emit_headers(); ////////////////////////////////////////////// ////////////////////////////////////////////// @@ -69,7 +63,7 @@ CPrinter::CPrinter(Module &m, bool o) ////////////////////////////////////////////// ////////////////////////////////////////////// - for(auto& ion: m.neuron_block().ions) { + for(auto& ion: module_->neuron_block().ions) { auto tname = "Ion" + ion.name; text_.add_line("struct " + tname + " {"); text_.increase_indentation(); @@ -133,7 +127,7 @@ CPrinter::CPrinter(Module &m, bool o) text_.add_line(); // copy in the weights if this is a density mechanism - if (m.kind() == moduleKind::density) { + if (module_->kind() == moduleKind::density) { text_.add_line("// add the user-supplied weights for converting from current density"); text_.add_line("// to per-compartment current in nA"); if(optimize_) { @@ -175,7 +169,7 @@ CPrinter::CPrinter(Module &m, bool o) text_.increase_indentation(); text_.add_line("auto s = std::size_t{0};"); text_.add_line("s += data_.size()*sizeof(value_type);"); - for(auto& ion: m.neuron_block().ions) { + for(auto& ion: module_->neuron_block().ions) { text_.add_line("s += ion_" + ion.name + ".memory();"); } text_.add_line("return s;"); @@ -198,7 +192,7 @@ CPrinter::CPrinter(Module &m, bool o) text_.add_line("}"); text_.add_line(); - std::string kind_str = m.kind() == moduleKind::density + std::string kind_str = module_->kind() == moduleKind::density ? "mechanismKind::density" : "mechanismKind::point"; text_.add_line("mechanismKind kind() const override {"); @@ -209,7 +203,7 @@ CPrinter::CPrinter(Module &m, bool o) text_.add_line(); // return true/false indicating if cell has dependency on k - auto const& ions = m.neuron_block().ions; + auto const& ions = module_->neuron_block().ions; auto find_ion = [&ions] (ionKind k) { return std::find_if( ions.begin(), ions.end(), @@ -326,7 +320,7 @@ CPrinter::CPrinter(Module &m, bool o) auto proctest = [] (procedureKind k) { return is_in(k, {procedureKind::normal, procedureKind::api, procedureKind::net_receive}); }; - for(auto const& var: m.symbols()) { + for(auto const& var: module_->symbols()) { auto isproc = var.second->kind()==symbolKind::procedure; if(isproc ) { @@ -376,9 +370,23 @@ CPrinter::CPrinter(Module &m, bool o) text_.add_line(); text_.add_line("}}}} // namespaces"); + return text_.str(); } + +void CPrinter::emit_headers() { + text_.add_line("#pragma once"); + text_.add_line(); + text_.add_line("#include <cmath>"); + text_.add_line("#include <limits>"); + text_.add_line(); + text_.add_line("#include <mechanism.hpp>"); + text_.add_line("#include <algorithms.hpp>"); + text_.add_line("#include <util/pprintf.hpp>"); + text_.add_line(); +} + /****************************************************************************** CPrinter ******************************************************************************/ @@ -612,12 +620,13 @@ void CPrinter::visit(APIMethod *e) { text_.add_line(); } -void CPrinter::print_APIMethod_unoptimized(APIMethod* e) { - // there can not be more than 1 instance of a density channel per grid point, - // so we can assert that aliasing will not occur. - if(optimize_) text_.add_line("#pragma ivdep"); - - text_.add_line("for(int i_=0; i_<n_; ++i_) {"); +void CPrinter::emit_api_loop(APIMethod* e, + const std::string& start, + const std::string& end, + const std::string& inc) { + text_.add_gutter(); + text_ << "for (" << start << "; " << end << "; " << inc << ") {"; + text_.end_line(); text_.increase_indentation(); // loads from external indexed arrays @@ -651,6 +660,14 @@ void CPrinter::print_APIMethod_unoptimized(APIMethod* e) { text_.decrease_indentation(); text_.add_line("}"); +} + +void CPrinter::print_APIMethod_unoptimized(APIMethod* e) { + // there can not be more than 1 instance of a density channel per grid point, + // so we can assert that aliasing will not occur. + if(optimize_) text_.add_line("#pragma ivdep"); + + emit_api_loop(e, "int i_ = 0", "i_ < n_", "++i_"); //text_.add_line("STOP_PROFILE"); decrease_indentation(); @@ -678,6 +695,7 @@ void CPrinter::print_APIMethod_optimized(APIMethod* e) { } } } + aliased_output_ = aliased_variables.size()>0; // only proceed with optimized output if the ouputs are aliased diff --git a/modcc/cprinter.hpp b/modcc/cprinter.hpp index ef47977c05fe13e1b6f3206bcffecf344855d355..2b58a9fc85d7b891eb2453e84b86520430f25fec 100644 --- a/modcc/cprinter.hpp +++ b/modcc/cprinter.hpp @@ -11,25 +11,23 @@ public: CPrinter() {} CPrinter(Module &m, bool o=false); - void visit(Expression *e) override; - void visit(UnaryExpression *e) override; - void visit(BinaryExpression *e) override; - void visit(AssignmentExpression *e) override; - void visit(PowBinaryExpression *e) override; - void visit(NumberExpression *e) override; - void visit(VariableExpression *e) override; - - void visit(Symbol *e) override; - void visit(LocalVariable *e) override; - void visit(IndexedVariable *e) override; - - void visit(IdentifierExpression *e) override; - void visit(CallExpression *e) override; - void visit(ProcedureExpression *e) override; - void visit(APIMethod *e) override; - void visit(LocalDeclaration *e) override; - void visit(BlockExpression *e) override; - void visit(IfExpression *e) override; + virtual void visit(Expression *e) override; + virtual void visit(UnaryExpression *e) override; + virtual void visit(BinaryExpression *e) override; + virtual void visit(AssignmentExpression *e) override; + virtual void visit(PowBinaryExpression *e) override; + virtual void visit(NumberExpression *e) override; + virtual void visit(VariableExpression *e) override; + virtual void visit(Symbol *e) override; + virtual void visit(LocalVariable *e) override; + virtual void visit(IndexedVariable *e) override; + virtual void visit(IdentifierExpression *e) override; + virtual void visit(CallExpression *e) override; + virtual void visit(ProcedureExpression *e) override; + virtual void visit(APIMethod *e) override; + virtual void visit(LocalDeclaration *e) override; + virtual void visit(BlockExpression *e) override; + virtual void visit(IfExpression *e) override; std::string text() const { return text_.str(); @@ -47,8 +45,18 @@ public: void clear_text() { text_.clear(); } -private: + virtual ~CPrinter() { } + + virtual std::string emit_source(); + virtual void emit_headers(); + virtual void emit_api_loop(APIMethod* e, + const std::string& start, + const std::string& end, + const std::string& inc); + +protected: + void print_mechanism(Visitor *backend); void print_APIMethod_optimized(APIMethod* e); void print_APIMethod_unoptimized(APIMethod* e); @@ -114,5 +122,6 @@ private: bool is_point_process() { return module_->kind() == moduleKind::point; } -}; + std::vector<LocalVariable*> aliased_vars(APIMethod* e); +}; diff --git a/modcc/expression.hpp b/modcc/expression.hpp index a44bdc5bdea229a98ec4529c0a0e305f571009d4..0da1dac24f9926cfef31d6c78aa597b9a0ef7a74 100644 --- a/modcc/expression.hpp +++ b/modcc/expression.hpp @@ -555,6 +555,7 @@ compiler_error: accessKind access() const { return access_; } + ionKind ion_channel() const { return ion_channel_; } @@ -1048,7 +1049,7 @@ public: std::string name, std::vector<expression_ptr>&& args, expression_ptr&& body) - : ProcedureExpression(loc, std::move(name), std::move(args), std::move(body), procedureKind::api) + : ProcedureExpression(loc, std::move(name), std::move(args), std::move(body), procedureKind::api) {} APIMethod* is_api_method() override {return this;} diff --git a/modcc/modcc.cpp b/modcc/modcc.cpp index c1c210ebce0c098278bad2ae0a1a1e0b80dbf6e3..2954c31dbb061e33ce1444e189804f0820191987 100644 --- a/modcc/modcc.cpp +++ b/modcc/modcc.cpp @@ -13,6 +13,10 @@ #include "modccutil.hpp" #include "options.hpp" +#include "simd_printer.hpp" + +using namespace nest::mc; + //#define VERBOSE int main(int argc, char **argv) { @@ -61,6 +65,9 @@ int main(int argc, char **argv) { else if(targstr == "gpu") { Options::instance().target = targetKind::gpu; } + else if(targstr == "avx512") { + Options::instance().target = targetKind::avx512; + } else { std::cerr << red("error") << " target must be one in {cpu, gpu}\n"; return 1; @@ -137,11 +144,15 @@ int main(int argc, char **argv) { std::string text; switch(Options::instance().target) { case targetKind::cpu : - text = CPrinter(m, Options::instance().optimize).text(); + text = CPrinter(m, Options::instance().optimize).emit_source(); break; case targetKind::gpu : text = CUDAPrinter(m, Options::instance().optimize).text(); break; + case targetKind::avx512: + text = SimdPrinter<targetKind::avx512>( + m, Options::instance().optimize).emit_source(); + break; default : std::cerr << red("error") << ": unknown printer" << std::endl; exit(1); diff --git a/modcc/options.hpp b/modcc/options.hpp index d994740e79e73c06030b774b710b56dc805da7a4..1ca12eea929469a1e0d9d5189ff6c8b3d54298a4 100644 --- a/modcc/options.hpp +++ b/modcc/options.hpp @@ -1,8 +1,14 @@ #pragma once #include <iostream> - -enum class targetKind { cpu, gpu }; +#include "modccutil.hpp" + +enum class targetKind { + cpu, + gpu, + // Vectorisation targets + avx512, + }; struct Options { std::string filename; diff --git a/modcc/simd_printer.hpp b/modcc/simd_printer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..53b072221e388333215ce98e3ec87928764bce22 --- /dev/null +++ b/modcc/simd_printer.hpp @@ -0,0 +1,565 @@ +#pragma once + +#include <set> +#include <sstream> + +#include "backends/simd.hpp" +#include "cprinter.hpp" +#include "modccutil.hpp" +#include "options.hpp" +#include "textbuffer.hpp" + + +using namespace nest::mc; + +template<targetKind Arch> +class SimdPrinter : public CPrinter { +public: + SimdPrinter() + : cprinter_(make_unique<CPrinter>()) + {} + + // Initialize our base CPrinter in default unoptimized mode; we handle the + // vectorization ourselves + SimdPrinter(Module& m, bool optimize = false) + : CPrinter(m), + cprinter_(make_unique<CPrinter>(m)) + { } + + void visit(NumberExpression *e) override { + simd_backend::emit_set_value(text_, e->value()); + } + + void visit(UnaryExpression *e) override; + void visit(BinaryExpression *e) override; + void visit(PowBinaryExpression *e) override; + void visit(ProcedureExpression *e) override; + void visit(AssignmentExpression *e) override; + void visit(VariableExpression *e) override; + void visit(LocalVariable *e) override { + const std::string& name = e->name(); + text_ << name; + } + + void visit(IndexedVariable *e) override; + void visit(APIMethod *e) override; + void visit(BlockExpression *e) override; + void visit(CallExpression *e) override; + + void emit_headers() override { + CPrinter::emit_headers(); + text_.add_line("#include <climits>"); + text_ << simd_backend::emit_headers(); + text_.add_line(); + } + + void emit_api_loop(APIMethod* e, + const std::string& start, + const std::string& end, + const std::string& inc) override; + +private: + using simd_backend = modcc::simd_intrinsics<Arch>; + + void emit_indexed_view(LocalVariable* var, std::set<std::string>& decls); + void emit_indexed_view_simd(LocalVariable* var, std::set<std::string>& decls); + + // variable naming conventions + std::string emit_member_name(const std::string& varname) { + return varname + "_"; + } + + + std::string emit_rawptr_name(const std::string& varname) { + return "r_" + varname; + } + + std::pair<std::string, std::string> + emit_rawptr_ion(const std::string& iname, const std::string& ifield) { + return std::make_pair(emit_rawptr_name(iname), + emit_rawptr_name(iname + "_" + ifield)); + } + + std::string emit_vindex_name(const std::string& varname) { + return "v_" + varname + "_index"; + } + + std::string emit_vtmp_name(const std::string& varname) { + return "v_" + varname; + } + + // CPrinter to delegate generation of unvectorised code + std::unique_ptr<CPrinter> cprinter_; + + // Treat range access as loads + bool range_load_ = true; +}; + +template<targetKind Arch> +void SimdPrinter<Arch>::visit(APIMethod *e) { + text_.add_gutter() << "void " << e->name() << "() override {\n"; + if (!e->scope()) { // error: semantic analysis has not been performed + throw compiler_exception( + "SimdPrinter attempt to print APIMethod " + e->name() + + " for which semantic analysis has not been performed", + e->location()); + } + + // only print the body if it has contents + if (e->is_api_method()->body()->statements().size()) { + text_.increase_indentation(); + + // First emit the raw pointer of node_index_ + text_.add_line("constexpr size_t simd_width = " + + simd_backend::emit_simd_width() + + " / (CHAR_BIT*sizeof(value_type));"); + text_.add_line("const size_type* " + emit_rawptr_name("node_index_") + + " = node_index_.data();"); + text_.add_line(); + + // create local indexed views + std::set<std::string> index_decls; + for (auto const& symbol : e->scope()->locals()) { + auto var = symbol.second->is_local_variable(); + if (var->is_indexed()) { + emit_indexed_view(var, index_decls); + emit_indexed_view_simd(var, index_decls); + text_.add_line(); + } + } + + // get loop dimensions + text_.add_line("int n_ = node_index_.size();"); + + // print the vectorized version of the loop + emit_api_loop(e, "int i_ = 0", "i_ < n_/simd_width", "++i_"); + text_.add_line(); + + // delegate the printing of the remainder unvectorized loop + auto cprinter = cprinter_.get(); + cprinter->clear_text(); + cprinter->set_gutter(text_.get_gutter()); + cprinter->emit_api_loop(e, "int i_ = n_ - n_ % simd_width", "i_ < n_", "++i_"); + text_ << cprinter->text(); + + text_.decrease_indentation(); + } + + text_.add_line("}\n"); +} + +template<targetKind Arch> +void SimdPrinter<Arch>::emit_indexed_view(LocalVariable* var, + std::set<std::string>& decls) { + auto const& name = var->name(); + auto const& index_name = var->external_variable()->index_name(); + text_.add_gutter(); + + if (var->is_read()) + text_ << "const "; + + if (decls.find(index_name) == decls.cend()) { + text_ << "indexed_view_type "; + decls.insert(index_name); + } + + text_ << index_name; + auto channel = var->external_variable()->ion_channel(); + if (channel == ionKind::none) { + text_ << "(" + emit_member_name(index_name) + ", node_index_);\n"; + } + else { + auto iname = ion_store(channel); + text_ << "(" << iname << "." << name << ", " + << ion_store(channel) << ".index);\n"; + } +} + +template<targetKind Arch> +void SimdPrinter<Arch>::emit_indexed_view_simd(LocalVariable* var, + std::set<std::string>& decls) { + auto const& name = var->name(); + auto const& index_name = var->external_variable()->index_name(); + + // We need to work with with raw pointers in the vectorized version + auto channel = var->external_variable()->ion_channel(); + if (channel==ionKind::none) { + auto raw_index_name = emit_rawptr_name(index_name); + if (decls.find(raw_index_name) == decls.cend()) { + text_.add_gutter(); + if (var->is_read()) + text_ << "const "; + + text_ << "value_type* "; + decls.insert(raw_index_name); + text_ << raw_index_name << " = " + << emit_member_name(index_name) << ".data()"; + } + } + else { + auto iname = ion_store(channel); + auto ion_var_names = emit_rawptr_ion(iname, name); + if (decls.find(ion_var_names.first) == decls.cend()) { + text_.add_gutter(); + text_ << "size_type* "; + decls.insert(ion_var_names.first); + text_ << ion_var_names.first << " = " << iname << ".index.data()"; + text_.end_line(";"); + } + + if (decls.find(ion_var_names.second) == decls.cend()) { + text_.add_gutter(); + if (var->is_read()) + text_ << "const "; + + text_ << "value_type* "; + decls.insert(ion_var_names.second); + text_ << ion_var_names.second << " = " << iname << "." + << name << ".data()"; + } + } + + text_.end_line(";"); +} + +template<targetKind Arch> +void SimdPrinter<Arch>::emit_api_loop(APIMethod* e, + const std::string& start, + const std::string& end, + const std::string& inc) { + text_.add_gutter(); + text_ << "for (" << start << "; " << end << "; " << inc << ") {"; + text_.end_line(); + text_.increase_indentation(); + text_.add_line("int off_ = i_*simd_width;"); + + // First load the index vectors of all involved ions + std::set<std::string> declared_ion_vars; + for (auto& symbol : e->scope()->locals()) { + auto var = symbol.second->is_local_variable(); + if (var->is_indexed()) { + auto channel = var->external_variable()->ion_channel(); + std::string cast_type = + "(const " + simd_backend::emit_index_type() + " *) "; + + + std::string vindex_name, index_ptr_name; + if (channel == ionKind::none) { + vindex_name = emit_vtmp_name("node_index_"); + index_ptr_name = emit_rawptr_name("node_index_"); + } + else { + auto iname = ion_store(channel); + vindex_name = emit_vindex_name(iname); + index_ptr_name = emit_rawptr_name(iname); + + } + + + if (declared_ion_vars.find(vindex_name) == declared_ion_vars.cend()) { + declared_ion_vars.insert(vindex_name); + text_.add_gutter(); + text_ << simd_backend::emit_index_type() << " " + << vindex_name << " = "; + simd_backend::emit_load_index( + text_, cast_type + "&" + index_ptr_name + "[off_]"); + text_.end_line(";"); + } + } + } + + text_.add_line(); + for (auto& symbol : e->scope()->locals()) { + auto var = symbol.second->is_local_variable(); + if (is_input(var)) { + auto ext = var->external_variable(); + text_.add_gutter() << simd_backend::emit_value_type() << " "; + var->accept(this); + text_ << " = "; + ext->accept(this); + text_.end_line(";"); + } + } + + text_.add_line(); + e->body()->accept(this); + + std::vector<LocalVariable*> aliased_variables; + + // perform update of external variables (currents etc) + for (auto &symbol : e->scope()->locals()) { + auto var = symbol.second->is_local_variable(); + if (is_output(var) && + !is_point_process() && + simd_backend::has_gather_scatter()) { + // We can safely use scatter, but we need to fetch the variable + // first + text_.add_line(); + auto ext = var->external_variable(); + auto ext_tmpname = "_" + ext->index_name(); + text_.add_gutter() << simd_backend::emit_value_type() << " " + << ext_tmpname << " = "; + ext->accept(this); + text_.end_line(";"); + text_.add_gutter(); + text_ << ext_tmpname << " = "; + simd_backend::emit_binary_op(text_, ext->op(), ext_tmpname, + [this,var](TextBuffer& tb) { + var->accept(this); + }); + text_.end_line(";"); + text_.add_gutter(); + + // Build up the index name + std::string vindex_name, raw_index_name; + auto channel = ext->ion_channel(); + if (channel != ionKind::none) { + auto iname = ion_store(channel); + vindex_name = emit_vindex_name(iname); + raw_index_name = emit_rawptr_ion(iname, ext->name()).second; + } + else { + vindex_name = emit_vtmp_name("node_index_"); + raw_index_name = emit_rawptr_name(ext->index_name()); + } + + simd_backend::emit_scatter(text_, raw_index_name, vindex_name, + ext_tmpname, "sizeof(value_type)"); + text_.end_line(";"); + } + else if (is_output(var)) { + // var is aliased; collect all the aliased variables and we will + // update them later in a fused loop all at once + aliased_variables.push_back(var); + } + } + + // Emit update code for the aliased variables + // First, define their scalar equivalents + constexpr auto scalar_var_prefix = "s_"; + for (auto& v: aliased_variables) { + text_.add_gutter(); + text_ << "value_type* " << scalar_var_prefix << v->name() + << " = (value_type*) &" << v->name(); + text_.end_line(";"); + } + + if (aliased_variables.size() > 0) { + // Update them all in a single loop + text_.add_line("for (int k_ = 0; k_ < simd_width; ++k_) {"); + text_.increase_indentation(); + for (auto& v: aliased_variables) { + auto ext = v->external_variable(); + text_.add_gutter(); + text_ << ext->index_name() << "[off_+k_]"; + text_ << (ext->op() == tok::plus ? " += " : " -= "); + text_ << scalar_var_prefix << v->name() << "[k_]"; + text_.end_line(";"); + } + text_.decrease_indentation(); + text_.add_line("}"); + } + + text_.decrease_indentation(); + text_.add_line("}"); +} + +template<targetKind Arch> +void SimdPrinter<Arch>::visit(IndexedVariable *e) { + std::string vindex_name, value_name; + + auto channel = e->ion_channel(); + if (channel != ionKind::none) { + auto iname = ion_store(channel); + vindex_name = emit_vindex_name(iname); + value_name = emit_rawptr_ion(iname, e->name()).second; + } + else { + vindex_name = emit_vtmp_name("node_index_"); + value_name = emit_rawptr_name(e->index_name()); + } + + simd_backend::emit_gather(text_, vindex_name, value_name, "sizeof(value_type)"); +} + +template<targetKind Arch> +void SimdPrinter<Arch>::visit(BlockExpression *e) { + if (!e->is_nested()) { + std::vector<std::string> names; + for(auto& symbol : e->scope()->locals()) { + auto sym = symbol.second.get(); + // input variables are declared earlier, before the + // block body is printed + if (is_stack_local(sym) && !is_input(sym)) { + names.push_back(sym->name()); + } + } + + if (names.size() > 0) { + text_.add_gutter() << simd_backend::emit_value_type() << " " + << *(names.begin()); + for(auto it=names.begin()+1; it!=names.end(); ++it) { + text_ << ", " << *it; + } + text_.end_line(";"); + } + } + + for (auto& stmt : e->statements()) { + if (stmt->is_local_declaration()) + continue; + + // these all must be handled + text_.add_gutter(); + stmt->accept(this); + if (not stmt->is_if()) { + text_.end_line(";"); + } + } +} + +template<targetKind Arch> +void SimdPrinter<Arch>::visit(BinaryExpression *e) { + auto lhs = e->lhs(); + auto rhs = e->rhs(); + + auto emit_lhs = [this, lhs](TextBuffer& tb) { + lhs->accept(this); + }; + auto emit_rhs = [this, rhs](TextBuffer& tb) { + rhs->accept(this); + }; + + try { + simd_backend::emit_binary_op(text_, e->op(), emit_lhs, emit_rhs); + } catch (const std::exception& exc) { + // Rethrow it as a compiler_exception + throw compiler_exception( + "SimdPrinter: " + std::string(exc.what()) + ": " + + yellow(token_string(e->op())), e->location()); + } +} + +template<targetKind Arch> +void SimdPrinter<Arch>::visit(AssignmentExpression *e) { + auto is_memop = [](Expression *e) { + auto ident = e->is_identifier(); + auto var = (ident) ? ident->symbol()->is_variable() : nullptr; + return var != nullptr && var->is_range(); + }; + + auto lhs = e->lhs(); + auto rhs = e->rhs(); + if (is_memop(lhs)) { + // that's a store; change printer's state so as not to emit a load + // instruction for the lhs visit + simd_backend::emit_store_unaligned(text_, + [this, lhs](TextBuffer&) { + auto range_load_save = range_load_; + range_load_ = false; + lhs->accept(this); + range_load_ = range_load_save; + }, + [this, rhs](TextBuffer&) { + rhs->accept(this); + }); + } + else { + // that's an ordinary assignment; use base printer + CPrinter::visit(e); + } +} + + +template<targetKind Arch> +void SimdPrinter<Arch>::visit(VariableExpression *e) { + if (e->is_range() && range_load_) { + simd_backend::emit_load_unaligned(text_, "&" + e->name() + "[off_]"); + } + else if (e->is_range()) { + text_ << "&" << e->name() << "[off_]"; + } + else { + simd_backend::emit_set_value(text_, e->name()); + } +} + +template<targetKind Arch> +void SimdPrinter<Arch>::visit(UnaryExpression *e) { + + auto arg = e->expression(); + auto emit_arg = [this, arg](TextBuffer& tb) { arg->accept(this); }; + + try { + simd_backend::emit_unary_op(text_, e->op(), emit_arg); + } catch (std::exception& exc) { + throw compiler_exception( + "SimdPrinter: " + std::string(exc.what()) + ": " + + yellow(token_string(e->op())), e->location()); + } +} + +template<targetKind Arch> +void SimdPrinter<Arch>::visit(PowBinaryExpression *e) { + auto lhs = e->lhs(); + auto rhs = e->rhs(); + auto emit_lhs = [this, lhs](TextBuffer&) { lhs->accept(this); }; + auto emit_rhs = [this, rhs](TextBuffer&) { rhs->accept(this); }; + simd_backend::emit_pow(text_, emit_lhs, emit_rhs); +} + +template<targetKind Arch> +void SimdPrinter<Arch>::visit(CallExpression *e) { + text_ << e->name() << "(off_"; + for (auto& arg: e->args()) { + text_ << ", "; + arg->accept(this); + } + text_ << ")"; +} + +template<targetKind Arch> +void SimdPrinter<Arch>::visit(ProcedureExpression *e) { + auto emit_procedure_unoptimized = [this](ProcedureExpression* e) { + auto cprinter = cprinter_.get(); + cprinter->clear_text(); + cprinter->set_gutter(text_.get_gutter()); + cprinter->visit(e); + text_ << cprinter->text(); + }; + + if (e->kind() == procedureKind::net_receive) { + // Use non-vectorized printer for printing net_receive + emit_procedure_unoptimized(e); + return; + } + + // Two versions of each procedure are needed: vectorized and unvectorized + text_.add_gutter() << "void " << e->name() << "(int off_"; + for(auto& arg : e->args()) { + text_ << ", " << simd_backend::emit_value_type() << " " + << arg->is_argument()->name(); + } + text_ << ") {\n"; + + if (!e->scope()) { + // error: semantic analysis has not been performed + throw compiler_exception( + "SimdPrinter attempt to print Procedure " + e->name() + + " for which semantic analysis has not been performed", + e->location()); + } + + // print body + increase_indentation(); + e->body()->accept(this); + + // close the function body + decrease_indentation(); + + text_.add_line("}"); + text_.add_line(); + + // Emit also the unvectorised version of the procedure + emit_procedure_unoptimized(e); +} diff --git a/modcc/textbuffer.hpp b/modcc/textbuffer.hpp index 5ab11396c813e943ee1b9b35d705bbf8ad3f55e0..ade2f38c14165ff5c6a51b002b6e8d867349dfb6 100644 --- a/modcc/textbuffer.hpp +++ b/modcc/textbuffer.hpp @@ -9,6 +9,7 @@ public: TextBuffer() { text_.precision(std::numeric_limits<double>::max_digits10); } + TextBuffer(const TextBuffer& other): indent_(other.indent_), indentation_width_(other.indentation_width_), @@ -29,12 +30,11 @@ public: void increase_indentation(); void decrease_indentation(); - std::stringstream &text(); + std::stringstream& text(); void clear(); private: - int indent_ = 0; const int indentation_width_=4; std::string gutter_ = ""; @@ -42,9 +42,7 @@ private: }; template <typename T> -TextBuffer& operator<< (TextBuffer& buffer, T const& v) { +TextBuffer& operator<<(TextBuffer& buffer, T const& v) { buffer.text() << v; - return buffer; } - diff --git a/tests/modcc/CMakeLists.txt b/tests/modcc/CMakeLists.txt index 34f87ed79d325d6954f50a3c0473e9aa2d31ae14..681839de853f21a71c8534e1eb520c7f96d83d7c 100644 --- a/tests/modcc/CMakeLists.txt +++ b/tests/modcc/CMakeLists.txt @@ -17,6 +17,8 @@ set(MODCC_TEST_SOURCES # utility expr_expand.cpp + + test_simd_backend.cpp test.cpp ) diff --git a/tests/modcc/test.cpp b/tests/modcc/test.cpp index 72e0db777b525350a6e6ddfa5591df5bc372f955..cc337703929a31d97739f79c01082419cf34048e 100644 --- a/tests/modcc/test.cpp +++ b/tests/modcc/test.cpp @@ -7,7 +7,11 @@ bool g_verbose_flag = false; std::string plain_text(Expression* expr) { static std::regex csi_code(R"_(\x1B\[.*?[\x40-\x7E])_"); - return !expr? "null": regex_replace(expr->to_string(), csi_code, ""); + + // We need to pass a std::string as a third argument; intel compiler + // complains otherwise + return !expr? "null": regex_replace( + expr->to_string(), csi_code, std::string("")); } ::testing::AssertionResult assert_expr_eq(const char *arg1, const char *arg2, Expression* expected, Expression* value) { diff --git a/tests/modcc/test_simd_backend.cpp b/tests/modcc/test_simd_backend.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2e379ae6aa4bfa29d2fa151c10c996039ffcc057 --- /dev/null +++ b/tests/modcc/test_simd_backend.cpp @@ -0,0 +1,66 @@ +#include "backends/simd.hpp" +#include "options.hpp" +#include "textbuffer.hpp" +#include "token.hpp" +#include "test.hpp" + +using namespace nest::mc; + + +TEST(avx512, emit_binary_op) { + TextBuffer tb; + + using simd_backend = modcc::simd_intrinsics<targetKind::avx512>; + + simd_backend::emit_binary_op(tb, tok::plus, "a", "b"); + EXPECT_EQ("_mm512_add_pd(a, b)", tb.str()); + + // Test also lambdas + std::string lhs = "a"; + std::string rhs = "b"; + tb.clear(); + simd_backend::emit_binary_op(tb, tok::minus, + [lhs](TextBuffer& tb) { tb << lhs; }, + [rhs](TextBuffer& tb) { tb << rhs; }); + EXPECT_EQ("_mm512_sub_pd(a, b)", tb.str()); + + + // Test mixed: lambdas + strings + tb.clear(); + simd_backend::emit_binary_op(tb, tok::times, + [lhs](TextBuffer& tb) { tb << lhs; }, "b"); + EXPECT_EQ("_mm512_mul_pd(a, b)", tb.str()); + + tb.clear(); + simd_backend::emit_binary_op(tb, tok::divide, "a", "b"); + EXPECT_EQ("_mm512_div_pd(a, b)", tb.str()); + + + tb.clear(); + simd_backend::emit_pow(tb, "a", "b"); + EXPECT_EQ("_mm512_pow_pd(a, b)", tb.str()); +} + +TEST(avx512, emit_unary_op) { + TextBuffer tb; + + using simd_backend = modcc::simd_intrinsics<targetKind::avx512>; + + // Test lambdas for generating the argument + std::string arg = "a"; + simd_backend::emit_unary_op(tb, tok::minus, + [arg](TextBuffer& tb) { tb << arg; }); + EXPECT_EQ("_mm512_sub_pd(_mm512_set1_pd(0), a)", tb.str()); + + tb.clear(); + simd_backend::emit_unary_op(tb, tok::exp, "a"); + EXPECT_EQ("_mm512_exp_pd(a)", tb.str()); + + tb.clear(); + simd_backend::emit_unary_op(tb, tok::log, "a"); + EXPECT_EQ("_mm512_log_pd(a)", tb.str()); + + tb.clear(); + simd_backend::emit_load_index(tb, "&a"); + EXPECT_EQ("_mm256_lddqu_si256(&a)", tb.str()); +} diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 4e51c3720ec1089a88dc8c88bad3d68ec9bdec86..50548d733e16f30f0374478e49dc23f2da9fa5ac 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -1,7 +1,7 @@ include(${PROJECT_SOURCE_DIR}/mechanisms/BuildModules.cmake) # Build prototype mechanisms for testing in test_mechanisms. -set(proto_mechanisms pas hh expsyn exp2syn) +set(proto_mechanisms pas hh expsyn exp2syn test_kin1 test_kinlva) set(mech_proto_dir "${CMAKE_CURRENT_BINARY_DIR}/mech_proto") file(MAKE_DIRECTORY "${mech_proto_dir}") diff --git a/tests/unit/test_mechanisms.cpp b/tests/unit/test_mechanisms.cpp index eaf24d7b2fe91ccb441feb17325221e807316534..7a4f57da152afcea0649254cb1456491efece61f 100644 --- a/tests/unit/test_mechanisms.cpp +++ b/tests/unit/test_mechanisms.cpp @@ -5,12 +5,16 @@ #include "mech_proto/exp2syn.hpp" #include "mech_proto/hh.hpp" #include "mech_proto/pas.hpp" +#include "mech_proto/test_kin1.hpp" +#include "mech_proto/test_kinlva.hpp" // modcc generated mechanisms #include "mechanisms/multicore/expsyn.hpp" #include "mechanisms/multicore/exp2syn.hpp" #include "mechanisms/multicore/hh.hpp" #include "mechanisms/multicore/pas.hpp" +#include "mechanisms/multicore/test_kin1.hpp" +#include "mechanisms/multicore/test_kinlva.hpp" #include <initializer_list> #include <backends/fvm_multicore.hpp> @@ -182,7 +186,7 @@ using mechanism_types = ::testing::Types< mechanism_info< nest::mc::mechanisms::hh::mechanism_hh<nest::mc::multicore::backend>, nest::mc::mechanisms::hh_proto::mechanism_hh_proto<nest::mc::multicore::backend> - >, + >, mechanism_info< nest::mc::mechanisms::pas::mechanism_pas<nest::mc::multicore::backend>, nest::mc::mechanisms::pas_proto::mechanism_pas_proto<nest::mc::multicore::backend> @@ -196,6 +200,14 @@ using mechanism_types = ::testing::Types< nest::mc::mechanisms::exp2syn::mechanism_exp2syn<nest::mc::multicore::backend>, nest::mc::mechanisms::exp2syn_proto::mechanism_exp2syn_proto<nest::mc::multicore::backend>, true + >, + mechanism_info< + nest::mc::mechanisms::test_kin1::mechanism_test_kin1<nest::mc::multicore::backend>, + nest::mc::mechanisms::test_kin1_proto::mechanism_test_kin1_proto<nest::mc::multicore::backend> + >, + mechanism_info< + nest::mc::mechanisms::test_kinlva::mechanism_test_kinlva<nest::mc::multicore::backend>, + nest::mc::mechanisms::test_kinlva_proto::mechanism_test_kinlva_proto<nest::mc::multicore::backend> > >;