diff --git a/CMakeLists.txt b/CMakeLists.txt
index 11efe3df7d5417689a6d7e84ca52d6a466a9ccbc..f69ee6367180fcf7e8f505aba4827c8c4315fcdc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -201,13 +201,11 @@ endif()
 #----------------------------------------------------------
 # vectorization target
 #----------------------------------------------------------
-set(ARB_VECTORIZE_TARGET "none" CACHE STRING "CPU target for vectorization {KNL,AVX,AVX2,AVX512}")
-set_property(CACHE ARB_VECTORIZE_TARGET PROPERTY STRINGS none KNL AVX AVX2 AVX512)
+set(ARB_VECTORIZE_TARGET "none" CACHE STRING "CPU target for vectorization {KNL,AVX2,AVX512}")
+set_property(CACHE ARB_VECTORIZE_TARGET PROPERTY STRINGS none KNL AVX2 AVX512)
 
 if(ARB_VECTORIZE_TARGET STREQUAL "KNL")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXXOPT_KNL}")
-elseif(ARB_VECTORIZE_TARGET STREQUAL "AVX")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXXOPT_AVX}")
 elseif(ARB_VECTORIZE_TARGET STREQUAL "AVX2")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXXOPT_AVX2}")
 elseif(ARB_VECTORIZE_TARGET STREQUAL "AVX512")
diff --git a/mechanisms/CMakeLists.txt b/mechanisms/CMakeLists.txt
index ba6e30623285036e735ceb5519a4dc05cc3c97b0..f5df463219cdf7e0ae586d4b2c47c5bf4ce4eb26 100644
--- a/mechanisms/CMakeLists.txt
+++ b/mechanisms/CMakeLists.txt
@@ -7,27 +7,27 @@ set(mod_srcdir "${CMAKE_CURRENT_SOURCE_DIR}/mod")
 
 # Generate mechanism implementations for host/cpu environment
 
-set(modcc_opt)
 set(mech_dir "${CMAKE_CURRENT_SOURCE_DIR}/multicore")
 file(MAKE_DIRECTORY "${mech_dir}")
-if(ARB_VECTORIZE_TARGET STREQUAL "KNL")
-    set(modcc_target "avx512")
-elseif(ARB_VECTORIZE_TARGET STREQUAL "AVX")
-    set(modcc_opt "-O")
-    set(modcc_target "cpu")
-elseif(ARB_VECTORIZE_TARGET STREQUAL "AVX2")
-    set(modcc_target "avx2")
+
+if(ARB_VECTORIZE_TARGET STREQUAL "none")
+    set(modcc_simd "")
+elseif(ARB_VECTORIZE_TARGET STREQUAL "KNL")
+    set(modcc_simd "-s avx512")
 elseif(ARB_VECTORIZE_TARGET STREQUAL "AVX512")
-    set(modcc_target "avx512")
+    set(modcc_simd "-s avx512")
+elseif(ARB_VECTORIZE_TARGET STREQUAL "AVX2")
+    set(modcc_simd "-s avx2")
 else()
-    set(modcc_target "cpu")
+    message(SEND_ERROR "Unrecognized architecture for ARB_VECTORIZE_TARGET")
+    set(modcc_simd "")
 endif()
 build_modules(
     ${mechanisms}
     SOURCE_DIR "${mod_srcdir}"
     DEST_DIR "${mech_dir}"
-    MODCC_FLAGS -t ${modcc_target} ${modcc_opt}
-    GENERATES .hpp
+    MODCC_FLAGS -t cpu ${modcc_simd}
+    GENERATES _cpu.hpp
     TARGET build_all_mods
 )
 
@@ -40,7 +40,7 @@ build_modules(
     SOURCE_DIR "${mod_srcdir}"
     DEST_DIR "${mech_dir}"
     MODCC_FLAGS -t gpu
-    GENERATES _impl.cu .hpp _impl.hpp
+    GENERATES _gpu_impl.cu _gpu.hpp _gpu_impl.hpp
     TARGET build_all_gpu_mods
 )
 
@@ -49,7 +49,7 @@ build_modules(
 if(ARB_WITH_CUDA)
     # make list of the .cu files that implement the mechanism kernels
     foreach(mech ${mechanisms})
-        list(APPEND cuda_mech_sources ${mech_dir}/${mech}_impl.cu)
+        list(APPEND cuda_mech_sources ${mech_dir}/${mech}_gpu_impl.cu)
     endforeach()
 
     # compile the .cu files into a library
diff --git a/modcc/CMakeLists.txt b/modcc/CMakeLists.txt
index 612488e8f2a0449d7584e5ab3b9df832949ccd6c..286c6729c01ad5dd985c034e792020362336a4b2 100644
--- a/modcc/CMakeLists.txt
+++ b/modcc/CMakeLists.txt
@@ -1,12 +1,10 @@
 set(MODCC_SOURCES
     astmanip.cpp
     cexpr_emit.cpp
-    constantfolder.cpp
     cprinter.cpp
     cudaprinter.cpp
     errorvisitor.cpp
     expression.cpp
-    expressionclassifier.cpp
     functionexpander.cpp
     functioninliner.cpp
     lexer.cpp
diff --git a/modcc/backends/avx2.hpp b/modcc/backends/avx2.hpp
index ff8709cc9f5294a10b58eec1dc4660e0b9365f2a..803890a33378e7b9e174c092ad790b1f44d14e2f 100644
--- a/modcc/backends/avx2.hpp
+++ b/modcc/backends/avx2.hpp
@@ -7,12 +7,11 @@
 #include "backends/base.hpp"
 #include "util/compat.hpp"
 
-namespace arb {
 namespace modcc {
 
 // Specialize for the different architectures
 template<>
-struct simd_intrinsics<targetKind::avx2> {
+struct simd_intrinsics<simdKind::avx2> {
     static bool has_scatter() {
         return false;
     }
@@ -198,7 +197,7 @@ private:
     const static std::string varprefix;
 };
 
-int simd_intrinsics<targetKind::avx2>::varcnt = 0;
-const std::string simd_intrinsics<targetKind::avx2>::varprefix = "_r";
+int simd_intrinsics<simdKind::avx2>::varcnt = 0;
+const std::string simd_intrinsics<simdKind::avx2>::varprefix = "_r";
 
-}} // closing namespaces
+} // namespace modcc
diff --git a/modcc/backends/avx512.hpp b/modcc/backends/avx512.hpp
index 024c220a1a53898bfdf0703c0fe8a1e761c8b09e..16b05ae1dc53345b5dd177720753f5ea231d7f07 100644
--- a/modcc/backends/avx512.hpp
+++ b/modcc/backends/avx512.hpp
@@ -6,13 +6,11 @@
 
 #include "backends/base.hpp"
 
-
-namespace arb {
 namespace modcc {
 
 // Specialize for the different architectures
 template<>
-struct simd_intrinsics<targetKind::avx512> {
+struct simd_intrinsics<simdKind::avx512> {
 
     static bool has_scatter() {
         return true;
@@ -147,4 +145,4 @@ struct simd_intrinsics<targetKind::avx512> {
     }
 };
 
-}} // closing namespaces
+} // namespace modcc;
diff --git a/modcc/backends/base.hpp b/modcc/backends/base.hpp
index cee812e84953152d847738b447f42e32646a022a..5149fe7904661693dc2d7b230cb595a2be8971d7 100644
--- a/modcc/backends/base.hpp
+++ b/modcc/backends/base.hpp
@@ -8,15 +8,18 @@
 #include <stdexcept>
 #include <string>
 
-#include "options.hpp"
 #include "token.hpp"
 #include "textbuffer.hpp"
-#include "util/meta.hpp"
 
-namespace arb {
+enum class simdKind {
+    none, avx2, avx512
+};
+
 namespace modcc {
 
-using arb::util::enable_if_t;
+template <bool V, typename R = void>
+using enable_if_t = typename std::enable_if<V, R>::type;
+
 using operand_fn_t = std::function<void(TextBuffer&)>;
 
 static void emit_operands(TextBuffer& tb, operand_fn_t emitter) {
@@ -41,7 +44,7 @@ static operand_fn_t arg_emitter(const operand_fn_t& arg) {
 }
 
 
-template<targetKind Arch>
+template<simdKind Arch>
 struct simd_intrinsics {
     static std::string emit_headers();
     static std::string emit_simd_width();
@@ -87,4 +90,4 @@ struct simd_intrinsics {
     static bool has_scatter();
 };
 
-}} // closing namespaces
+} // namespace modcc
diff --git a/modcc/constantfolder.cpp b/modcc/constantfolder.cpp
deleted file mode 100644
index 8f04714fd2670f82a32459e8b18bc8f24df8c52d..0000000000000000000000000000000000000000
--- a/modcc/constantfolder.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-#include <iostream>
-#include <cmath>
-
-#include "constantfolder.hpp"
-
-/*
-   perform a walk of the AST
-   - pre-order : mark node as not a number
-   - in-order  : convert all children that marked themselves as numbers into NumberExpressions
-   - post-order: mark the current node as a constant if all of its children
-                 were converted to NumberExpressions
-
-   all calculations and intermediate results use 80 bit floating point precision (long double)
-*/
-
-// default is to do nothing and return
-void ConstantFolderVisitor::visit(Expression *e) {
-    is_number = false;
-}
-
-// number expresssion
-void ConstantFolderVisitor::visit(NumberExpression *e) {
-    // set constant number and return
-    is_number = true;
-    value = e->value();
-}
-
-/// unary expresssion
-void ConstantFolderVisitor::visit(UnaryExpression *e) {
-    is_number = false;
-    e->expression()->accept(this);
-    if(is_number) {
-        if(!e->is_number()) {
-            e->replace_expression(
-                make_expression<NumberExpression>(e->location(), value));
-        }
-        switch(e->op()) {
-            case tok::minus :
-                value = -value;
-                return;
-            case tok::exp :
-                value = std::exp(value);
-                return;
-            case tok::cos :
-                value = std::cos(value);
-                return;
-            case tok::sin :
-                value = std::sin(value);
-                return;
-            case tok::log :
-                value = std::log(value);
-                return;
-            default :
-                throw compiler_exception(
-                    "attempting constant folding on unsuported unary operator "
-                    + yellow(token_string(e->op())),
-                    e->location());
-        }
-    }
-}
-
-// binary expresssion
-// handle all binary expressions with one routine, because the
-// pre-order and in-order code is the same for all cases
-void ConstantFolderVisitor::visit(BinaryExpression *e) {
-    bool lhs_is_number = false;
-    long double lhs_value  = 0;
-
-    // check the lhs
-    is_number = false;
-    e->lhs()->accept(this);
-    if(is_number) {
-        lhs_value = value;
-        lhs_is_number = true;
-        // replace lhs with a number node, if it is not already one
-        if(!e->lhs()->is_number()) {
-            e->replace_lhs( make_expression<NumberExpression>(e->location(), value) );
-        }
-    }
-    //std::cout << "lhs : " << e->lhs()->to_string() << std::endl;
-
-    // check the rhs
-    is_number = false;
-    e->rhs()->accept(this);
-    if(is_number) {
-        // replace rhs with a number node, if it is not already one
-        if(!e->rhs()->is_number()) {
-            //std::cout << "rhs : " << e->rhs()->to_string() << " -> ";
-            e->replace_rhs( make_expression<NumberExpression>(e->location(), value) );
-            //std::cout << e->rhs()->to_string() << std::endl;
-        }
-    }
-    //std::cout << "rhs : " << e->rhs()->to_string() << std::endl;
-
-    auto rhs_is_number = is_number;
-    is_number = rhs_is_number && lhs_is_number;
-
-    // check to see if both lhs and rhs are numbers
-    // mark this node as a number if so
-    if(is_number) {
-        // be careful to get the order of operation right for
-        // non-computative operators
-        switch(e->op()) {
-            case tok::plus :
-                value = lhs_value + value;
-                return;
-            case tok::minus :
-                value = lhs_value - value;
-                return;
-            case tok::times :
-                value = lhs_value * value;
-                return;
-            case tok::divide :
-                value = lhs_value / value;
-                return;
-            case tok::pow :
-                value = std::pow(lhs_value, value);
-                return;
-            // don't fold comparison operators (we have no internal support
-            // for boolean values in nodes). leave for the back end compiler.
-            // not a big deal, because these are not counted when estimating
-            // flops with the FLOP visitor
-            case tok::lt     :
-            case tok::lte    :
-            case tok::gt     :
-            case tok::gte    :
-            case tok::equality :
-                is_number = false;
-                return;
-            default         :
-                throw compiler_exception(
-                    "attempting constant folding on unsuported binary operator "
-                    + yellow(token_string(e->op())),
-                    e->location());
-        }
-    }
-}
-
-void ConstantFolderVisitor::visit(CallExpression *e) {
-    is_number = false;
-    for(auto& a : e->args()) {
-        a->accept(this);
-        if(is_number) {
-            // replace rhs with a number node, if it is not already one
-            if(!a->is_number()) {
-                a.reset(new NumberExpression(a->location(), value));
-            }
-        }
-    }
-}
-
-void ConstantFolderVisitor::visit(BlockExpression *e) {
-    is_number = false;
-    for(auto &expression : e->statements()) {
-        expression->accept(this);
-    }
-}
-
-void ConstantFolderVisitor::visit(FunctionExpression *e) {
-    is_number = false;
-    e->body()->accept(this);
-}
-
-void ConstantFolderVisitor::visit(ProcedureExpression *e) {
-    is_number = false;
-    e->body()->accept(this);
-}
-
-void ConstantFolderVisitor::visit(IfExpression *e) {
-    is_number = false;
-    e->condition()->accept(this);
-    e->true_branch()->accept(this);
-    if(e->false_branch()) {
-        e->false_branch()->accept(this);
-    }
-}
diff --git a/modcc/constantfolder.hpp b/modcc/constantfolder.hpp
deleted file mode 100644
index ede3fefd68d3f732f4f804c88fb06b704d3d201d..0000000000000000000000000000000000000000
--- a/modcc/constantfolder.hpp
+++ /dev/null
@@ -1,26 +0,0 @@
-#pragma once
-
-#include "visitor.hpp"
-
-class ConstantFolderVisitor : public Visitor {
-public:
-    ConstantFolderVisitor() {}
-
-    void visit(Expression *e)           override;
-    // reduce child
-    void visit(UnaryExpression *e)      override;
-    // reduce left and right children
-    void visit(BinaryExpression *e)     override;
-    // reduce expressions in arguments
-    void visit(NumberExpression *e)     override;
-
-    void visit(CallExpression *e)       override;
-    void visit(ProcedureExpression *e)  override;
-    void visit(FunctionExpression *e)   override;
-    void visit(BlockExpression *e)      override;
-    void visit(IfExpression *e)         override;
-
-    // store intermediate results as long double, i.e. 80-bit precision
-    long double value = 0.;
-    bool is_number = false;
-};
diff --git a/modcc/cprinter.cpp b/modcc/cprinter.cpp
index 4163403216e0cc57effcf50d90626e65cc4dbeba..641efe7002a8d060ebc9f4ae63a546b8bb80aae0 100644
--- a/modcc/cprinter.cpp
+++ b/modcc/cprinter.cpp
@@ -5,17 +5,11 @@
 #include "cexpr_emit.hpp"
 #include "cprinter.hpp"
 #include "lexer.hpp"
-#include "options.hpp"
 
 /******************************************************************************
                               CPrinter driver
 ******************************************************************************/
 
-CPrinter::CPrinter(Module &m, bool o)
-    : module_(&m),
-      optimize_(o)
-{ }
-
 std::string CPrinter::emit_source() {
     // make a list of vector types, both parameters and assigned
     // and a list of all scalar types
@@ -33,10 +27,7 @@ std::string CPrinter::emit_source() {
         }
     }
 
-    std::string module_name = Options::instance().modulename;
-    if (module_name == "") {
-        module_name = module_->name();
-    }
+    std::string module_name = module_->module_name();
 
     //////////////////////////////////////////////
     //////////////////////////////////////////////
@@ -119,14 +110,8 @@ std::string CPrinter::emit_source() {
     for(int i=0; i<num_vars; ++i) {
         char namestr[128];
         sprintf(namestr, "%-15s", array_variables[i]->name().c_str());
-        if(optimize_) {
-            text_.add_gutter() << namestr << " = data_.data() + "
-                               << i << "*field_size;";
-        }
-        else {
-            text_.add_gutter() << namestr << " = data_("
-                               << i << "*field_size, " << i+1 << "*size());";
-        }
+        text_.add_gutter() << namestr << " = data_("
+                           << i << "*field_size, " << i+1 << "*size());";
         text_.end_line();
     }
     text_.add_line();
@@ -136,12 +121,7 @@ std::string CPrinter::emit_source() {
     text_.add_line("// to per-compartment current in nA");
     text_.add_line("if (weights.size()) {");
     text_.increase_indentation();
-    if(optimize_) {
-        text_.add_line("memory::copy(weights, view(weights_, size()));");
-    }
-    else {
-        text_.add_line("memory::copy(weights, weights_(0, size()));");
-    }
+    text_.add_line("memory::copy(weights, weights_(0, size()));");
     text_.decrease_indentation();
     text_.add_line("}");
     text_.add_line("else {");
@@ -156,8 +136,7 @@ std::string CPrinter::emit_source() {
         double val = var->value();
         // only non-NaN fields need to be initialized, because data_
         // is NaN by default
-        std::string pointer_name = var->name();
-        if(!optimize_) pointer_name += ".data()";
+        std::string pointer_name = var->name()+".data()";
         if(val == val) {
             text_.add_gutter() << "std::fill(" << pointer_name << ", "
                                                << pointer_name << "+size(), "
@@ -461,12 +440,7 @@ std::string CPrinter::emit_source() {
 
     text_.add_line("array data_;");
     for(auto var: array_variables) {
-        if(optimize_) {
-            text_.add_line("value_type *" + var->name() + ";");
-        }
-        else {
-            text_.add_line("view " + var->name() + ";");
-        }
+        text_.add_line("view " + var->name() + ";");
     }
 
     for(auto var: scalar_variables) {
@@ -711,13 +685,7 @@ void CPrinter::visit(APIMethod *e) {
         // get loop dimensions
         text_.add_line("int n_ = node_index_.size();");
 
-        // hand off printing of loops to optimized or unoptimized backend
-        if(optimize_) {
-            print_APIMethod_optimized(e);
-        }
-        else {
-            print_APIMethod_unoptimized(e);
-        }
+        print_APIMethod(e);
     }
 
     // close up the loop body
@@ -767,140 +735,13 @@ void CPrinter::emit_api_loop(APIMethod* e,
     text_.add_line("}");
 }
 
-void CPrinter::print_APIMethod_unoptimized(APIMethod* e) {
+void CPrinter::print_APIMethod(APIMethod* e) {
     emit_api_loop(e, "int i_ = 0", "i_ < n_", "++i_");
     decrease_indentation();
 
     return;
 }
 
-void CPrinter::print_APIMethod_optimized(APIMethod* e) {
-    // ------------- get mechanism properties ------------- //
-
-    // make a list of all the local variables that have to be
-    // written out to global memory via an index
-    auto is_aliased = [this] (Symbol* s) -> LocalVariable* {
-        if(is_output(s)) {
-            return s->is_local_variable();
-        }
-        return nullptr;
-    };
-
-    std::vector<LocalVariable*> aliased_variables;
-    if(is_point_process()) {
-        for(auto &l : e->scope()->locals()) {
-            if(auto var = is_aliased(l.second.get())) {
-                aliased_variables.push_back(var);
-            }
-        }
-    }
-
-    aliased_output_ = aliased_variables.size()>0;
-
-    // only proceed with optimized output if the ouputs are aliased
-    // because all optimizations are for using ghost buffers to avoid
-    // race conditions in vectorized code
-    if(!aliased_output_) {
-        print_APIMethod_unoptimized(e);
-        return;
-    }
-
-    // ------------- block loop ------------- //
-
-    text_.add_line("constexpr int BSIZE = 4;");
-    text_.add_line("int NB = n_/BSIZE;");
-    for(auto out: aliased_variables) {
-        text_.add_line("value_type " + out->name() +  "[BSIZE];");
-    }
-
-    text_.add_line("for(int b_=0; b_<NB; ++b_) {");
-    text_.increase_indentation();
-    text_.add_line("int BSTART = BSIZE*b_;");
-    text_.add_line("int i_ = BSTART;");
-
-
-    text_.add_line("for(int j_=0; j_<BSIZE; ++j_, ++i_) {");
-    text_.increase_indentation();
-
-    // loads from external indexed arrays
-    for(auto &symbol : e->scope()->locals()) {
-        auto var = symbol.second->is_local_variable();
-        if(is_input(var)) {
-            auto ext = var->external_variable();
-            text_.add_gutter() << "value_type ";
-            var->accept(this);
-            text_ << " = ";
-            ext->accept(this);
-            text_.end_line(";");
-        }
-    }
-
-    e->body()->accept(this);
-
-    text_.decrease_indentation();
-    text_.add_line("}"); // end inner compute loop
-
-    text_.add_line("i_ = BSTART;");
-    text_.add_line("for(int j_=0; j_<BSIZE; ++j_, ++i_) {");
-    text_.increase_indentation();
-
-    for(auto out: aliased_variables) {
-        text_.add_gutter();
-        auto ext = out->external_variable();
-        ext->accept(this);
-        text_ << (ext->op() == tok::plus ? " += " : " -= ");
-        out->accept(this);
-        text_.end_line(";");
-    }
-
-    text_.decrease_indentation();
-    text_.add_line("}"); // end inner write loop
-    text_.decrease_indentation();
-    text_.add_line("}"); // end outer block loop
-
-    // ------------- block tail loop ------------- //
-
-    text_.add_line("int j_ = 0;");
-    text_.add_line("for(int i_=NB*BSIZE; i_<n_; ++j_, ++i_) {");
-    text_.increase_indentation();
-
-    for(auto &symbol : e->scope()->locals()) {
-        auto var = symbol.second->is_local_variable();
-        if(is_input(var)) {
-            auto ext = var->external_variable();
-            text_.add_gutter() << "value_type ";
-            var->accept(this);
-            text_ << " = ";
-            ext->accept(this);
-            text_.end_line(";");
-        }
-    }
-
-    e->body()->accept(this);
-
-    text_.decrease_indentation();
-    text_.add_line("}"); // end inner compute loop
-    text_.add_line("j_ = 0;");
-    text_.add_line("for(int i_=NB*BSIZE; i_<n_; ++j_, ++i_) {");
-    text_.increase_indentation();
-
-    for(auto out: aliased_variables) {
-        text_.add_gutter();
-        auto ext = out->external_variable();
-        ext->accept(this);
-        text_ << (ext->op() == tok::plus ? " += " : " -= ");
-        out->accept(this);
-        text_.end_line(";");
-    }
-
-    text_.decrease_indentation();
-    text_.add_line("}"); // end block tail loop
-
-    decrease_indentation();
-
-    aliased_output_ = false;
-}
-
 void CPrinter::visit(CallExpression *e) {
     text_ << e->name() << "(i_";
     for(auto& arg: e->args()) {
diff --git a/modcc/cprinter.hpp b/modcc/cprinter.hpp
index 8c8a5721757a770ec8fc749b619cb33f490c7d48..2da740fb38b80f0e23b60eb67df8fd3cd387d41a 100644
--- a/modcc/cprinter.hpp
+++ b/modcc/cprinter.hpp
@@ -6,10 +6,10 @@
 #include "textbuffer.hpp"
 #include "visitor.hpp"
 
-class CPrinter : public Visitor {
+class CPrinter: public Visitor {
 public:
     CPrinter() {}
-    CPrinter(Module &m, bool o=false);
+    explicit CPrinter(Module &m): module_(&m) {}
 
     virtual void visit(Expression *e)           override;
     virtual void visit(UnaryExpression *e)      override;
@@ -56,12 +56,10 @@ public:
 
 protected:
     void print_mechanism(Visitor *backend);
-    void print_APIMethod_optimized(APIMethod* e);
-    void print_APIMethod_unoptimized(APIMethod* e);
+    void print_APIMethod(APIMethod* e);
 
     Module *module_ = nullptr;
     TextBuffer text_;
-    bool optimize_ = false;
     bool aliased_output_ = false;
 
     bool is_input(Symbol *s) {
@@ -106,7 +104,6 @@ protected:
 
     bool is_ghost_local(Symbol *s) {
         if(!is_point_process()) return false;
-        if(!optimize_)          return false;
         if(!aliased_output_)    return false;
         if(is_arg_local(s))     return false;
         return is_output(s);
diff --git a/modcc/cudaprinter.cpp b/modcc/cudaprinter.cpp
index cea5f9317cd0a6e3f4730e161c8bf2a08cc3d5a9..8005dd8b45f2396e2c342e4c505faebd61778cbb 100644
--- a/modcc/cudaprinter.cpp
+++ b/modcc/cudaprinter.cpp
@@ -5,7 +5,6 @@
 #include "cexpr_emit.hpp"
 #include "cudaprinter.hpp"
 #include "lexer.hpp"
-#include "options.hpp"
 
 std::string CUDAPrinter::pack_name() {
     return module_name_ + "_ParamPack";
@@ -30,10 +29,7 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o)
         }
     }
 
-    module_name_ = Options::instance().modulename;
-    if (module_name_ == "") {
-        module_name_ = m.name();
-    }
+    module_name_ = module_->module_name();
 
     //
     // Implementation header.
@@ -771,7 +767,7 @@ void CUDAPrinter::visit(IfExpression *e) {
 void CUDAPrinter::print_device_function_prototype(ProcedureExpression *e) {
     buffer().add_line("__device__");
     buffer().add_gutter() << "void " << e->name()
-                     << "(" << module_->name() << "_ParamPack const& params_,"
+                     << "(" << module_name_ << "_ParamPack const& params_,"
                      << "const int tid_";
     for(auto& arg : e->args()) {
         buffer() << ", arb::fvm_value_type " << arg->is_argument()->name();
@@ -811,7 +807,7 @@ void CUDAPrinter::visit(ProcedureExpression *e) {
 
         // Core `net_receive` kernel is called device-side from `kernel::deliver_events`.
         buffer().add_line(       "__device__");
-        buffer().add_gutter() << "void net_receive(const " << module_->name() << "_ParamPack& params_, "
+        buffer().add_gutter() << "void net_receive(const " << module_name_ << "_ParamPack& params_, "
                            << "arb::fvm_size_type i_, arb::fvm_value_type weight) {";
         buffer().add_line();
         buffer().increase_indentation();
@@ -834,7 +830,7 @@ void CUDAPrinter::visit(ProcedureExpression *e) {
         // of event delivery.
         buffer().add_line(       "__global__");
         buffer().add_gutter() << "void net_receive_global("
-                           << module_->name() << "_ParamPack params_, "
+                           << module_name_ << "_ParamPack params_, "
                            << "arb::fvm_size_type i_, arb::fvm_value_type weight) {";
         buffer().add_line();
         buffer().increase_indentation();
@@ -848,7 +844,7 @@ void CUDAPrinter::visit(ProcedureExpression *e) {
 
         buffer().add_line(       "__global__");
         buffer().add_gutter() << "void deliver_events("
-                           << module_->name() << "_ParamPack params_, "
+                           << module_name_ << "_ParamPack params_, "
                            << "arb::fvm_size_type mech_id, deliverable_event_stream_state state) {";
         buffer().add_line();
         buffer().increase_indentation();
@@ -882,7 +878,7 @@ void CUDAPrinter::visit(ProcedureExpression *e) {
 }
 
 std::string CUDAPrinter::APIMethod_prototype(APIMethod *e) {
-    return "void " + e->name() + "_" + module_->name()
+    return "void " + e->name() + "_" + module_name_
         + "(" + pack_name() + " params_)";
 }
 
diff --git a/modcc/expressionclassifier.cpp b/modcc/expressionclassifier.cpp
deleted file mode 100644
index 29008ec7674cf166d20ec2f005c127b7d7c176d4..0000000000000000000000000000000000000000
--- a/modcc/expressionclassifier.cpp
+++ /dev/null
@@ -1,323 +0,0 @@
-#include <iostream>
-#include <cmath>
-
-#include "error.hpp"
-#include "expressionclassifier.hpp"
-#include "modccutil.hpp"
-
-// this turns out to be quite easy, however quite fiddly to do right.
-
-// default is to do nothing and return
-void ExpressionClassifierVisitor::visit(Expression *e) {
-    throw compiler_exception(" attempting to apply linear analysis on " + e->to_string(), e->location());
-}
-
-// number expresssion
-void ExpressionClassifierVisitor::visit(NumberExpression *e) {
-    // save the coefficient as the number
-    coefficient_ = e->clone();
-}
-
-// identifier expresssion
-void ExpressionClassifierVisitor::visit(IdentifierExpression *e) {
-    // check if symbol of identifier matches the identifier
-    if(symbol_ == e->symbol()) {
-        found_symbol_ = true;
-        coefficient_.reset(new NumberExpression(Location(), "1"));
-    }
-    else {
-        coefficient_ = e->clone();
-    }
-}
-
-/// unary expresssion
-void ExpressionClassifierVisitor::visit(UnaryExpression *e) {
-    e->expression()->accept(this);
-    if(found_symbol_) {
-        switch(e->op()) {
-            // plus or minus don't change linearity
-            case tok::minus :
-                coefficient_ = unary_expression(Location(),
-                                                e->op(),
-                                                std::move(coefficient_));
-                return;
-            case tok::plus :
-                return;
-            // one of these applied to the symbol certainly isn't linear
-            case tok::exp :
-            case tok::cos :
-            case tok::sin :
-            case tok::log :
-                is_linear_ = false;
-                return;
-            default :
-                throw compiler_exception(
-                    "attempting to apply linear analysis on unsuported UnaryExpression "
-                    + yellow(token_string(e->op())), e->location());
-        }
-    }
-    else {
-        coefficient_ = e->clone();
-    }
-}
-
-// binary expresssion
-// handle all binary expressions with one routine, because the
-// pre-order and in-order code is the same for all cases
-void ExpressionClassifierVisitor::visit(BinaryExpression *e) {
-    bool lhs_contains_symbol = false;
-    bool rhs_contains_symbol = false;
-    expression_ptr lhs_coefficient;
-    expression_ptr rhs_coefficient;
-    expression_ptr lhs_constant;
-    expression_ptr rhs_constant;
-
-    // check the lhs
-    reset();
-    e->lhs()->accept(this);
-    lhs_contains_symbol = found_symbol_;
-    lhs_coefficient     = std::move(coefficient_);
-    lhs_constant        = std::move(constant_);
-    if(!is_linear_) return; // early return if nonlinear
-
-    // check the rhs
-    reset();
-    e->rhs()->accept(this);
-    rhs_contains_symbol = found_symbol_;
-    rhs_coefficient     = std::move(coefficient_);
-    rhs_constant        = std::move(constant_);
-    if(!is_linear_) return; // early return if nonlinear
-
-    // mark symbol as found if in either lhs or rhs
-    found_symbol_ = rhs_contains_symbol || lhs_contains_symbol;
-
-    if( found_symbol_ ) {
-        // if both lhs and rhs contain symbol check that the binary operator
-        // preserves linearity
-        // note that we don't have to test for linearity, because we abort early
-        // if either lhs or rhs are nonlinear
-        if( rhs_contains_symbol && lhs_contains_symbol ) {
-            // be careful to get the order of operation right for
-            // non-computative operators
-            switch(e->op()) {
-                // addition and subtraction are valid, nothing else is
-                case tok::plus :
-                case tok::minus :
-                    coefficient_ =
-                        binary_expression(Location(),
-                                          e->op(),
-                                          std::move(lhs_coefficient),
-                                          std::move(rhs_coefficient));
-                    return;
-                // multiplying two expressions that depend on symbol is nonlinear
-                case tok::times :
-                case tok::pow   :
-                case tok::divide :
-                default         :
-                    is_linear_ = false;
-                    return;
-            }
-        }
-        // special cases :
-        //      operator    | invalid symbol location
-        //      -------------------------------------
-        //      pow         | lhs OR rhs
-        //      comparisons | lhs OR rhs
-        //      division    | rhs
-        ////////////////////////////////////////////////////////////////////////
-        // only RHS contains the symbol
-        ////////////////////////////////////////////////////////////////////////
-        else if(rhs_contains_symbol) {
-            switch(e->op()) {
-                case tok::times  :
-                    // determine the linear coefficient
-                    if( rhs_coefficient->is_number() &&
-                        rhs_coefficient->is_number()->value()==1) {
-                        coefficient_ = lhs_coefficient->clone();
-                    }
-                    else {
-                        coefficient_ =
-                            binary_expression(Location(),
-                                              tok::times,
-                                              lhs_coefficient->clone(),
-                                              rhs_coefficient->clone());
-                    }
-                    // determine the constant
-                    if(rhs_constant) {
-                        constant_ =
-                            binary_expression(Location(),
-                                              tok::times,
-                                              std::move(lhs_coefficient),
-                                              std::move(rhs_constant));
-                    } else {
-                        constant_ = nullptr;
-                    }
-                    return;
-                case tok::plus :
-                    // constant term
-                    if(lhs_constant && rhs_constant) {
-                        constant_ =
-                            binary_expression(Location(),
-                                              tok::plus,
-                                              std::move(lhs_constant),
-                                              std::move(rhs_constant));
-                    }
-                    else if(rhs_constant) {
-                        constant_ = binary_expression(Location(),
-                                                      tok::plus,
-                                                      std::move(lhs_coefficient),
-                                                      std::move(rhs_constant));
-                    }
-                    else {
-                        constant_ = std::move(lhs_coefficient);
-                    }
-                    // coefficient
-                    coefficient_ = std::move(rhs_coefficient);
-                    return;
-                case tok::minus :
-                    // constant term
-                    if(lhs_constant && rhs_constant) {
-                        constant_ = binary_expression(Location(),
-                                                      tok::minus,
-                                                      std::move(lhs_constant),
-                                                      std::move(rhs_constant));
-                    }
-                    else if(rhs_constant) {
-                        constant_ = binary_expression(Location(),
-                                                      tok::minus,
-                                                      std::move(lhs_coefficient),
-                                                      std::move(rhs_constant));
-                    }
-                    else {
-                        constant_ = std::move(lhs_coefficient);
-                    }
-                    // coefficient
-                    coefficient_ = unary_expression(Location(),
-                                                    e->op(),
-                                                    std::move(rhs_coefficient));
-                    return;
-                case tok::pow    :
-                case tok::divide :
-                case tok::lt     :
-                case tok::lte    :
-                case tok::gt     :
-                case tok::gte    :
-                case tok::equality :
-                    is_linear_ = false;
-                    return;
-                default:
-                    return;
-            }
-        }
-        ////////////////////////////////////////////////////////////////////////
-        // only LHS contains the symbol
-        ////////////////////////////////////////////////////////////////////////
-        else if(lhs_contains_symbol) {
-            switch(e->op()) {
-                case tok::times  :
-                    // check if the lhs is == 1
-                    if( lhs_coefficient->is_number() &&
-                        lhs_coefficient->is_number()->value()==1) {
-                        coefficient_ = rhs_coefficient->clone();
-                    }
-                    else {
-                        coefficient_ =
-                            binary_expression(Location(),
-                                              tok::times,
-                                              std::move(lhs_coefficient),
-                                              std::move(rhs_coefficient));
-                    }
-                    // constant term
-                    if(lhs_constant) {
-                        constant_ = binary_expression(Location(),
-                                                      tok::times,
-                                                      std::move(lhs_constant),
-                                                      std::move(rhs_coefficient));
-                    } else {
-                        constant_ = nullptr;
-                    }
-                    return;
-                case tok::plus  :
-                    coefficient_ = std::move(lhs_coefficient);
-                    // constant term
-                    if(lhs_constant && rhs_constant) {
-                        constant_ = binary_expression(Location(),
-                                                      tok::plus,
-                                                      std::move(lhs_constant),
-                                                      std::move(rhs_constant));
-                    }
-                    else if(lhs_constant) {
-                        constant_ = binary_expression(Location(),
-                                                      tok::plus,
-                                                      std::move(lhs_constant),
-                                                      std::move(rhs_coefficient));
-                    }
-                    else {
-                        constant_ = std::move(rhs_coefficient);
-                    }
-                    return;
-                case tok::minus :
-                    coefficient_ = std::move(lhs_coefficient);
-                    // constant term
-                    if(lhs_constant && rhs_constant) {
-                        constant_ = binary_expression(Location(),
-                                                      tok::minus,
-                                                      std::move(lhs_constant),
-                                                      std::move(rhs_constant));
-                    }
-                    else if(lhs_constant) {
-                        constant_ = binary_expression(Location(),
-                                                      tok::minus,
-                                                      std::move(lhs_constant),
-                                                      std::move(rhs_coefficient));
-                    }
-                    else {
-                        constant_ = unary_expression(Location(),
-                                                     tok::minus,
-                                                     std::move(rhs_coefficient));
-                    }
-                    return;
-                case tok::divide:
-                    coefficient_ = binary_expression(Location(),
-                                                     tok::divide,
-                                                     std::move(lhs_coefficient),
-                                                     rhs_coefficient->clone());
-                    if(lhs_constant) {
-                        constant_ = binary_expression(Location(),
-                                                      tok::divide,
-                                                      std::move(lhs_constant),
-                                                      std::move(rhs_coefficient));
-                    }
-                    return;
-                case tok::pow    :
-                case tok::lt     :
-                case tok::lte    :
-                case tok::gt     :
-                case tok::gte    :
-                case tok::equality :
-                    is_linear_ = false;
-                    return;
-                default:
-                    return;
-            }
-        }
-    }
-    // neither lhs or rhs contains symbol
-    // continue building the coefficient
-    else {
-        coefficient_ = e->clone();
-    }
-}
-
-void ExpressionClassifierVisitor::visit(CallExpression *e) {
-    for(auto& a : e->args()) {
-        a->accept(this);
-        // we assume that the parameter passed into a function
-        // won't be linear
-        if(found_symbol_) {
-            is_linear_ = false;
-            return;
-        }
-    }
-}
-
diff --git a/modcc/expressionclassifier.hpp b/modcc/expressionclassifier.hpp
deleted file mode 100644
index 505bdd2b30fb4d018001e144a7dcf117e92c76db..0000000000000000000000000000000000000000
--- a/modcc/expressionclassifier.hpp
+++ /dev/null
@@ -1,121 +0,0 @@
-#pragma once
-
-#include <mutex>
-
-#include "constantfolder.hpp"
-#include "scope.hpp"
-#include "visitor.hpp"
-
-enum class expressionClassification {
-    constant,
-    linear,
-    nonlinear
-};
-
-class ExpressionClassifierVisitor : public Visitor {
-public:
-    ExpressionClassifierVisitor(Symbol *s)
-    : symbol_(s)
-    {
-        const_folder_ = new ConstantFolderVisitor();
-    }
-
-    void reset(Symbol* s) {
-        reset();
-        symbol_ = s;
-    }
-
-    void reset() {
-        is_linear_    = true;
-        found_symbol_ = false;
-        configured_   = false;
-        coefficient_  = nullptr;
-        constant_     = nullptr;
-    }
-
-    void visit(Expression *e)           override;
-    void visit(UnaryExpression *e)      override;
-    void visit(BinaryExpression *e)     override;
-    void visit(NumberExpression *e)     override;
-    void visit(IdentifierExpression *e) override;
-    void visit(CallExpression *e)       override;
-
-    expressionClassification classify() const {
-        if(!found_symbol_) {
-            return expressionClassification::constant;
-        }
-        if(is_linear_) {
-            return expressionClassification::linear;
-        }
-        return expressionClassification::nonlinear;
-    }
-
-    Expression *linear_coefficient() {
-        set();
-        return coefficient_.get();
-    }
-
-    Expression *constant_term() {
-        set();
-        return constant_.get();
-    }
-
-    ~ExpressionClassifierVisitor() {
-        delete const_folder_;
-    }
-
-private:
-
-    void set() const {
-        // a mutex is required because two threads might attempt to update
-        // the cached constant_/coefficient_ values, which would violate the
-        // condition that set() is const
-        std::lock_guard<std::mutex> g(mutex_);
-
-        // update the constant_ and coefficient_ terms if they have not already
-        // been set
-        if(!configured_) {
-            if(classify() == expressionClassification::linear) {
-                // if constat_ was never set, it must be zero
-                if(!constant_) {
-                    constant_ =
-                        make_expression<NumberExpression>(Location(), 0.);
-                }
-                // perform constant folding on the coefficient term
-                coefficient_->accept(const_folder_);
-                if(const_folder_->is_number) {
-                    // if the folding resulted in a constant, reset coefficient
-                    // to be a NumberExpression
-                    coefficient_.reset(new NumberExpression(
-                                            Location(),
-                                            const_folder_->value)
-                                      );
-                }
-            }
-            else if(classify() == expressionClassification::constant) {
-                coefficient_.reset(new NumberExpression(
-                                        Location(),
-                                        0.)
-                                  );
-            }
-            else { // nonlinear expression
-                coefficient_ = nullptr;
-                constant_    = nullptr;
-            }
-            configured_ = true;
-        }
-    }
-
-    // assume linear until otherwise proven
-    bool is_linear_     = true;
-    bool found_symbol_  = false;
-    mutable bool configured_    = false;
-    mutable expression_ptr coefficient_;
-    mutable expression_ptr constant_;
-    Symbol* symbol_;
-    ConstantFolderVisitor* const_folder_;
-
-    mutable std::mutex mutex_;
-
-};
-
diff --git a/modcc/io/bulkio.hpp b/modcc/io/bulkio.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e140843d4053bfc45534cb95186e319dfc7a574f
--- /dev/null
+++ b/modcc/io/bulkio.hpp
@@ -0,0 +1,49 @@
+#pragma once
+
+// Read or write the contents of a file in toto.
+
+#include <string>
+#include <iterator>
+#include <fstream>
+
+namespace io {
+
+template <typename HasAssign>
+void read_all(std::istream& in, HasAssign& A) {
+    A.assign(std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>());
+}
+
+template <typename HasAssign>
+void read_all(const std::string& filename, HasAssign& A) {
+    std::ifstream fs;
+    fs.exceptions(std::ios::failbit);
+    fs.open(filename);
+    read_all(fs, A);
+}
+
+inline std::string read_all(std::istream& in) {
+    std::string s;
+    read_all(in, s);
+    return s;
+}
+
+inline std::string read_all(const std::string& filename) {
+    std::string s;
+    read_all(filename, s);
+    return s;
+}
+
+template <typename Container>
+void write_all(const Container& data, std::ostream& out) {
+    std::copy(std::begin(data), std::end(data), std::ostreambuf_iterator<char>(out));
+}
+
+template <typename Container>
+void write_all(const Container& data, const std::string& filename) {
+    std::ofstream fs;
+    fs.exceptions(std::ios::failbit);
+    fs.open(filename);
+    write_all(data, fs);
+}
+
+}
diff --git a/modcc/modcc.cpp b/modcc/modcc.cpp
index d8caef1ff43195ecf8e5a27cf6da7ab9cd8da89c..ea2dafc1c3c43a3e60625ced65f74cfbfe0fdda8 100644
--- a/modcc/modcc.cpp
+++ b/modcc/modcc.cpp
@@ -1,230 +1,264 @@
-#include <chrono>
+#include <exception>
 #include <iostream>
-#include <fstream>
+#include <unordered_map>
+#include <unordered_set>
 
 #include <tclap/CmdLine.h>
 
 #include "cprinter.hpp"
 #include "cudaprinter.hpp"
-#include "lexer.hpp"
+#include "modccutil.hpp"
 #include "module.hpp"
 #include "parser.hpp"
 #include "perfvisitor.hpp"
-#include "modccutil.hpp"
-#include "options.hpp"
-
 #include "simd_printer.hpp"
 
-using namespace arb;
+#include "io/bulkio.hpp"
+
+using std::cout;
+using std::cerr;
+
+// Options and option parsing:
+
+int report_error(const std::string& message) {
+    cerr << red("error: ") << message << "\n";
+    return 1;
+}
+
+int report_ice(const std::string& message) {
+    cerr << red("internal compiler error: ") << message << "\n"
+         << "\nPlease report this error to the modcc developers.\n";
+    return 1;
+}
+
+enum class targetKind {
+    cpu,
+    gpu,
+};
+
+std::unordered_map<std::string, targetKind> targetKindMap = {
+    {"cpu", targetKind::cpu},
+    {"gpu", targetKind::gpu}
+};
+
+std::unordered_map<std::string, simdKind> simdKindMap = {
+    {"none", simdKind::none},
+    {"avx2", simdKind::avx2},
+    {"avx512", simdKind::avx512}
+};
+
+template <typename Map, typename V>
+auto key_by_value(const Map& map, const V& v) -> decltype(map.begin()->first) {
+    for (const auto& kv: map) {
+        if (kv.second==v) return kv.first;
+    }
+    throw std::out_of_range("value not found in map");
+}
+
+struct Options {
+    std::string outprefix;
+    std::string modfile;
+    std::string modulename;
+    bool verbose = true;
+    bool analysis = false;
+    simdKind simd_arch = simdKind::none;
+    std::unordered_set<targetKind, enum_hash> targets;
+};
+
+// Helper for formatting tabulated output (option reporting).
+struct table_prefix { std::string text; };
+std::ostream& operator<<(std::ostream& out, const table_prefix& tb) {
+    return out << cyan("| "+tb.text) << std::left << std::setw(61-tb.text.size());
+};
+
+std::ostream& operator<<(std::ostream& out, const Options& opt) {
+    static const char* noyes[2] = {"no", "yes"};
+    static const std::string line_end = cyan("|") + "\n";
+    static const std::string tableline = cyan("."+std::string(60, '-')+".")+"\n";
+
+    std::string targets;
+    for (targetKind t: opt.targets) {
+        targets += " "+key_by_value(targetKindMap, t);
+    }
+
+    return out <<
+        tableline <<
+        table_prefix{"file"} << opt.modfile << line_end <<
+        table_prefix{"output"} << (opt.outprefix.empty()? "-": opt.outprefix) << line_end <<
+        table_prefix{"verbose"} << noyes[opt.verbose] << line_end <<
+        table_prefix{"targets"} << targets << line_end <<
+        table_prefix{"simd"} << key_by_value(simdKindMap, opt.simd_arch) << line_end <<
+        table_prefix{"analysis"} << noyes[opt.analysis] << line_end <<
+        tableline;
+}
+
+// Constraints for TCLAP arguments that are names for enumertion values.
+struct MapConstraint: private std::vector<std::string>, public TCLAP::ValuesConstraint<std::string> {
+    template <typename Map>
+    MapConstraint(const Map& map):
+        std::vector<std::string>(keys(map)),
+        TCLAP::ValuesConstraint<std::string>(static_cast<std::vector<std::string>&>(*this)) {}
+
+    template <typename Map>
+    static std::vector<std::string> keys(const Map& map) {
+        std::vector<std::string> ks;
+        for (auto& kv: map) ks.push_back(kv.first);
+        return ks;
+    }
+};
 
 int main(int argc, char **argv) {
+    Options opt;
 
-    // parse command line arguments
     try {
-        TCLAP::CmdLine cmd("welcome to mod2c", ' ', "0.1");
+        TCLAP::CmdLine cmd("modcc code generator for arbor", ' ', "0.1");
 
-        // input file name (to load multiple files we have to use UnlabeledMultiArg
         TCLAP::UnlabeledValueArg<std::string>
-            fin_arg("input_file", "the name of the .mod file to compile", true, "", "filename");
-        // output filename
+            fin_arg("input_file", "the name of the .mod file to compile", true, "", "filename", cmd);
+
         TCLAP::ValueArg<std::string>
-            fout_arg("o","output","name of output file", false,"","filename");
-        // output filename
+            fout_arg("o", "output", "prefix for output file names", false, "", "filename", cmd);
+
+        MapConstraint targets_arg_constraint(targetKindMap);
+        TCLAP::MultiArg<std::string>
+            target_arg("t", "target", "backend target={cpu, gpu}", false, &targets_arg_constraint, cmd);
+
+        MapConstraint simd_arg_constraint(simdKindMap);
         TCLAP::ValueArg<std::string>
-            target_arg("t","target","backend target={cpu,gpu}", true,"cpu","cpu/gpu");
-        // verbose mode
+            simd_arg("s", "simd", "use SIMD intrinsics={avx512, avx2}", false, "", &simd_arg_constraint, cmd);
+
         TCLAP::SwitchArg verbose_arg("V","verbose","toggle verbose mode", cmd, false);
-        // analysis mode
+
         TCLAP::SwitchArg analysis_arg("A","analyse","toggle analysis mode", cmd, false);
-        // optimization mode
-        TCLAP::SwitchArg opt_arg("O","optimize","turn optimizations on", cmd, false);
-        // Set module name explicitly
-        TCLAP::ValueArg<std::string>
-            module_arg("m", "module", "module name to use", false, "", "module");
 
-        cmd.add(fin_arg);
-        cmd.add(fout_arg);
-        cmd.add(target_arg);
-        cmd.add(module_arg);
+        TCLAP::ValueArg<std::string>
+            module_arg("m", "module", "module name to use (default taken from input .mod file)", false, "", "module", cmd);
 
         cmd.parse(argc, argv);
 
-        Options::instance().outputname = fout_arg.getValue();
-        Options::instance().has_output = Options::instance().outputname.size()>0;
-        Options::instance().filename = fin_arg.getValue();
-        Options::instance().modulename = module_arg.getValue();
-        Options::instance().verbose = verbose_arg.getValue();
-        Options::instance().optimize = opt_arg.getValue();
-        Options::instance().analysis = analysis_arg.getValue();
-        auto targstr = target_arg.getValue();
-        if(targstr == "cpu") {
-            Options::instance().target = targetKind::cpu;
-        }
-        else if(targstr == "gpu") {
-            Options::instance().target = targetKind::gpu;
-        }
-        else if(targstr == "avx512") {
-            Options::instance().target = targetKind::avx512;
-        }
-        else if(targstr == "avx2") {
-            Options::instance().target = targetKind::avx2;
+        opt.outprefix = fout_arg.getValue();
+        opt.modfile = fin_arg.getValue();
+        opt.modulename = module_arg.getValue();
+        opt.verbose = verbose_arg.getValue();
+        opt.analysis = analysis_arg.getValue();
+
+        if (!simd_arg.getValue().empty()) {
+            opt.simd_arch = simdKindMap.at(simd_arg.getValue());
         }
-        else {
-            std::cerr << red("error")
-                      << " target must be one in {cpu, gpu, avx2, avx512}\n";
-            return 1;
+
+        for (auto& target: target_arg.getValue()) {
+            opt.targets.insert(targetKindMap.at(target));
         }
     }
-    // catch any exceptions in command line handling
     catch(TCLAP::ArgException &e) {
-        std::cerr << "error: "   << e.error()
-                  << " for arg " << e.argId() << "\n";
+        return report_error(e.error()+" for argument "+to_string(e.argId()));
     }
 
     try {
-        // load the module from file passed as first argument
-        Module m(Options::instance().filename.c_str());
+        auto emit_header = [&opt](const char* h) {
+            if (opt.verbose) {
+                cout << green("[") << h << green("]") << "\n";
+            }
+        };
 
-        // check that the module is not empty
-        if(m.buffer().size()==0) {
-            std::cout << red("error: ") << white(argv[1])
-                      << " invalid or empty file" << std::endl;
-            return 1;
+        if (opt.verbose) {
+            cout << opt;
         }
 
-        if(Options::instance().verbose) {
-            Options::instance().print();
-        }
+        // Load module file and initialize Module object.
 
-        //
-        // parsing
-        //
-        if(Options::instance().verbose) std::cout << green("[") + "parsing" + green("]") << std::endl;
+        Module m(io::read_all(opt.modfile), opt.modfile);
 
-        // initialize the parser
-        Parser p(m, false);
+        if (m.empty()) {
+            return report_error("empty file: "+opt.modfile);
+        }
 
-        // parse
-        p.parse();
-        if( p.status()==lexerStatus::error ) {
-            return 1;
+        if (!opt.modulename.empty()) {
+            m.module_name(opt.modulename);
         }
 
-        //
-        // semantic analysis
-        //
-        if(Options::instance().verbose) {
-            std::cout << green("[") + "semantic analysis" + green("]") << "\n";
+        // Perform parsing and semantic analysis passes.
+
+        emit_header("parsing");
+        Parser p(m, false);
+        if (!p.parse()) {
+            // Parser::parse() writes its own errors to stderr.
+            return 1;
         }
 
+        emit_header("semantic analysis");
         m.semantic();
-
-        if( m.has_error() ) {
-            std::cerr << m.error_string() << std::endl;
+        if (m.has_warning()) {
+            cerr << m.warning_string() << "\n";
         }
-        if( m.has_warning() ) {
-            std::cerr << m.warning_string() << std::endl;
+        if (m.has_error()) {
+            return report_error(m.error_string());
         }
 
-        if(m.has_error()) {
-            return 1;
-        }
+        // Generate backend-specific sources for each backend provided.
 
-        //
-        // optimize
-        //
-        if(Options::instance().optimize) {
-            if(Options::instance().verbose) std::cout << green("[") + "optimize" + green("]") << std::endl;
-            m.optimize();
-            if(m.has_error()) {
-                return 1;
-            }
-        }
+        emit_header("code generation");
 
-        //
-        // generate output
-        //
-        if(Options::instance().verbose) {
-            std::cout << green("[") + "code generation"
-                      << green("]") << std::endl;
-        }
+        // If no output prefix given, use the module name.
+        std::string prefix = opt.outprefix.empty()? m.module_name(): opt.outprefix;
 
-        auto txt_to_file = [](std::string const& fname, std::string const& txt) {
-            std::ofstream fid(fname);
-            if (!fid.is_open()) {
-                throw std::runtime_error("Unable to open file "+fname+" for output.");
+        for (targetKind target: opt.targets) {
+            std::string outfile = prefix;
+            switch (target) {
+            case targetKind::gpu:
+                outfile += "_gpu";
+                {
+                    CUDAPrinter printer(m);
+                    io::write_all(printer.interface_text(), outfile+".hpp");
+                    io::write_all(printer.impl_header_text(), outfile+"_impl.hpp");
+                    io::write_all(printer.impl_text(), outfile+"_impl.cu");
+                }
+                break;
+            case targetKind::cpu:
+                outfile += "_cpu.hpp";
+                switch (opt.simd_arch) {
+                case simdKind::none:
+                    io::write_all(CPrinter(m).emit_source(), outfile);
+                    break;
+                case simdKind::avx2:
+                    io::write_all(SimdPrinter<simdKind::avx2>(m).emit_source(), outfile);
+                    break;
+                case simdKind::avx512:
+                    io::write_all(SimdPrinter<simdKind::avx512>(m).emit_source(), outfile);
+                    break;
+                }
             }
-            fid << txt;
-        };
-
-        const auto name = Options::instance().outputname;
-        const auto target = Options::instance().target;
-        if (target==targetKind::cpu) {
-            CPrinter printer(m, Options::instance().optimize);
-            txt_to_file(name+".hpp", printer.emit_source());
-        }
-        else if (target==targetKind::gpu) {
-            CUDAPrinter printer(m, Options::instance().optimize);
-            txt_to_file(name+".hpp",      printer.interface_text());
-            txt_to_file(name+"_impl.hpp", printer.impl_header_text());
-            txt_to_file(name+"_impl.cu",  printer.impl_text());
-        }
-        else if (target==targetKind::avx512) {
-            SimdPrinter<targetKind::avx512>  printer(m, Options::instance().optimize);
-            txt_to_file(name+".hpp", printer.emit_source());
-        }
-        else if (target==targetKind::avx2) {
-            SimdPrinter<targetKind::avx2>  printer(m, Options::instance().optimize);
-            txt_to_file(name+".hpp", printer.emit_source());
-        }
-        else {
-            throw std::runtime_error("Unknown target architecture.");
         }
 
-        //
-        // print module information
-        //
-        if(Options::instance().analysis) {
-            std::cout << green("performance analysis") << std::endl;
-            for(auto &symbol : m.symbols()) {
-                if(auto method = symbol.second->is_api_method()) {
-                    std::cout << white("-------------------------\n");
-                    std::cout << yellow("method " + method->name()) << "\n";
-                    std::cout << white("-------------------------\n");
+        // Optional analysis report.
+
+        if (opt.analysis) {
+            cout << green("performance analysis\n");
+            for (auto &symbol: m.symbols()) {
+                if (auto method = symbol.second->is_api_method()) {
+                    cout << white("-------------------------\n");
+                    cout << yellow("method " + method->name()) << "\n";
+                    cout << white("-------------------------\n");
 
                     FlopVisitor flops;
                     method->accept(&flops);
-                    std::cout << white("FLOPS") << std::endl;
-                    std::cout << flops.print() << std::endl;
+                    cout << white("FLOPS\n") << flops.print() << "\n";
 
-                    std::cout << white("MEMOPS") << std::endl;
                     MemOpVisitor memops;
                     method->accept(&memops);
-                    std::cout << memops.print() << std::endl;;
+                    cout << white("MEMOPS\n") << memops.print() << "\n";
                 }
             }
         }
     }
-
     catch(compiler_exception& e) {
-        std::cerr << red("internal compiler error: ")
-                  << white("this means a bug in the compiler,"
-                           " please report to modcc developers\n")
-                  << e.what() << " @ " << e.location() << "\n";
-        exit(1);
+        return report_ice(e.what()+std::string(" @ ")+to_string(e.location()));
     }
     catch(std::exception& e) {
-        std::cerr << red("internal compiler error: ")
-                  << white("this means a bug in the compiler,"
-                           " please report to modcc developers\n")
-                  << e.what() << "\n";
-        exit(1);
+        return report_ice(e.what());
     }
     catch(...) {
-        std::cerr << red("internal compiler error: ")
-                  << white("this means a bug in the compiler,"
-                           " please report to modcc developers\n");
-        exit(1);
+        return report_ice("");
     }
 
     return 0;
diff --git a/modcc/modccutil.hpp b/modcc/modccutil.hpp
index ad83a03a30400e2bd02c34f4d08cdf104a8364b8..7dd73f199bbf2ff8115e2fa686b455621443ccb1 100644
--- a/modcc/modccutil.hpp
+++ b/modcc/modccutil.hpp
@@ -41,6 +41,13 @@ bool is_in(const X& x, const std::initializer_list<X>& c) {
     return impl::is_in(x, c, std::false_type{});
 }
 
+struct enum_hash {
+    template <typename E, typename V = typename std::underlying_type<E>::type>
+    std::size_t operator()(E e) const noexcept {
+        return std::hash<V>{}(static_cast<V>(e));
+    }
+};
+
 inline std::string pprintf(const char *s) {
     std::string errstring;
     while(*s) {
diff --git a/modcc/module.cpp b/modcc/module.cpp
index 3c770261b02e0345345724e065d02df4b5350901..ff8e80c182f02b3f6153aa58a1849b604f614e1f 100644
--- a/modcc/module.cpp
+++ b/modcc/module.cpp
@@ -5,7 +5,6 @@
 #include <set>
 
 #include "errorvisitor.hpp"
-#include "expressionclassifier.hpp"
 #include "functionexpander.hpp"
 #include "functioninliner.hpp"
 #include "kineticrewriter.hpp"
@@ -96,77 +95,12 @@ public:
     }
 };
 
-Module::Module(std::string const& fname)
-: fname_(fname)
-{
-    // open the file at the end
-    std::ifstream fid;
-    fid.open(fname.c_str(), std::ios::binary | std::ios::ate);
-    if(!fid.is_open()) { // return if no file opened
-        return;
-    }
-
-    // determine size of file
-    std::size_t size = fid.tellg();
-    fid.seekg(0, std::ios::beg);
-
-    // allocate space for storage and read
-    buffer_.resize(size+1);
-    fid.read(buffer_.data(), size);
-    buffer_[size] = 0; // append \0 to terminate string
-}
-
-Module::Module(std::vector<char> const& buffer) {
-    buffer_ = buffer;
-
-    // add \0 to end of buffer if not already present
-    if (buffer_[buffer_.size()-1] != 0)
-        buffer_.push_back(0);
-}
-
-Module::Module(const char* buffer, size_t count) {
-    auto size = std::distance(buffer, std::find(buffer, buffer+count, '\0'));
-    buffer_.reserve(size+1);
-    buffer_.insert(buffer_.end(), buffer, buffer+size);
-    buffer_.push_back(0);
-}
-
-std::vector<Module::symbol_ptr>&
-Module::procedures() {
-    return procedures_;
-}
-
-std::vector<Module::symbol_ptr>const&
-Module::procedures() const {
-    return procedures_;
-}
-
-std::vector<Module::symbol_ptr>&
-Module::functions() {
-    return functions_;
-}
-
-std::vector<Module::symbol_ptr>const&
-Module::functions() const {
-    return functions_;
-}
-
-Module::symbol_map&
-Module::symbols() {
-    return symbols_;
-}
-
-Module::symbol_map const&
-Module::symbols() const {
-    return symbols_;
-}
-
 std::string Module::error_string() const {
     std::string str;
     for (const error_entry& entry: errors()) {
         if (!str.empty()) str += '\n';
         str += red("error   ");
-        str += white(pprintf("%:% ", file_name(), entry.location));
+        str += white(pprintf("%:% ", source_name(), entry.location));
         str += entry.message;
     }
     return str;
@@ -177,7 +111,7 @@ std::string Module::warning_string() const {
     for (auto& entry: warnings()) {
         if (!str.empty()) str += '\n';
         str += purple("warning   ");
-        str += white(pprintf("%:% ", file_name(), entry.location));
+        str += white(pprintf("%:% ", source_name(), entry.location));
         str += entry.message;
     }
     return str;
@@ -642,53 +576,6 @@ void Module::add_variables_to_symbols() {
     }
 }
 
-bool Module::optimize() {
-    // how to structure the optimizer
-    // loop over APIMethods
-    //      - apply optimization to each in turn
-    ConstantFolderVisitor folder;
-    for(auto &symbol : symbols_) {
-        auto kind = symbol.second->kind();
-        BlockExpression* body;
-        if(kind == symbolKind::procedure) {
-            // we are only interested in true procedures and APIMethods
-            auto proc = symbol.second->is_procedure();
-            auto pkind = proc->kind();
-            if(pkind == procedureKind::normal || pkind == procedureKind::api )
-                body = symbol.second->is_procedure()->body();
-            else
-                continue;
-        }
-        // for now don't look at functions
-        //else if(kind == symbolKind::function) {
-        //    body = symbol.second.expression->is_function()->body();
-        //}
-        else {
-            continue;
-        }
-
-        /////////////////////////////////////////////////////////////////////
-        // loop over folding and propogation steps until there are no changes
-        /////////////////////////////////////////////////////////////////////
-
-        // perform constant folding
-        for(auto& line : *body) {
-            line->accept(&folder);
-        }
-
-        // preform expression simplification
-        // i.e. removing zeros/refactoring reciprocals/etc
-
-        // perform constant propogation
-
-        /////////////////////////////////////////////////////////////////////
-        // remove dead local variables
-        /////////////////////////////////////////////////////////////////////
-    }
-
-    return true;
-}
-
 int Module::semantic_func_proc() {
     ////////////////////////////////////////////////////////////////////////////
     // now iterate over the functions and procedures and perform semantic
@@ -718,7 +605,7 @@ int Module::semantic_func_proc() {
             s->semantic(symbols_);
 
             // then use an error visitor to print out all the semantic errors
-            ErrorVisitor v(file_name());
+            ErrorVisitor v(source_name());
             s->accept(&v);
             errors += v.num_errors();
 
@@ -794,6 +681,11 @@ int Module::semantic_func_proc() {
                 std::cout << "body after inlining\n";
                 for(auto& l : b) std::cout << "  " << l->to_string() << " @ " << l->location() << "\n";
 #endif
+                // Finally, run a constant simplification pass.
+                if (auto proc = s->is_procedure()) {
+                    proc->body(constant_simplify(proc->body()));
+                    s->semantic(symbols_);
+                }
             }
         }
     }
diff --git a/modcc/module.hpp b/modcc/module.hpp
index 18c362ac0b31cb1882e26ad8ac56f35d7fdc308e..afadac59e17570a1d548a6ac94510db926861383 100644
--- a/modcc/module.hpp
+++ b/modcc/module.hpp
@@ -13,20 +13,37 @@ public:
     using symbol_map = scope_type::symbol_map;
     using symbol_ptr = scope_type::symbol_ptr;
 
-    Module(std::string const& fname);
-    Module(std::vector<char> const& buffer);
-    Module(const char* buffer, size_t count);
+    template <typename Iter>
+    Module(Iter b, Iter e, std::string source_name):
+        source_name_(std::move(source_name))
+    {
+        buffer_.assign(b, e);
+        buffer_.push_back('\0');
+    }
+
+    template <typename Container>
+    explicit Module(const Container& text, std::string source_name):
+        Module(std::begin(text), std::end(text), std::move(source_name)) {}
 
     std::vector<char> const& buffer() const {
         return buffer_;
     }
 
-    std::string const& file_name()  const {return fname_;}
-    std::string const& name()  const {return neuron_block_.name;}
+    bool empty() const {
+        return buffer_.empty() || buffer_.front()=='\0';
+    }
+
+    std::string module_name() const {
+        return module_name_.empty()? neuron_block_.name: module_name_;
+    }
+    void module_name(std::string name) { module_name_ = std::move(name); }
+
+    const std::string& source_name() const { return source_name_; }
 
-    void               title(const std::string& t) {title_ = t;}
-    std::string const& title() const          {return title_;}
+    void title(const std::string& t) { title_ = t; }
+    const std::string& title() const { return title_; }
 
+// TODO: are const and non-const methods necessary? check usage.
     NeuronBlock &      neuron_block() {return neuron_block_;}
     NeuronBlock const& neuron_block() const {return neuron_block_;}
 
@@ -42,21 +59,21 @@ public:
     AssignedBlock &       assigned_block()        {return assigned_block_;}
     AssignedBlock const&  assigned_block()  const {return assigned_block_;}
 
-    void neuron_block(NeuronBlock const &n) {neuron_block_ = n;}
-    void state_block (StateBlock  const &s) {state_block_  = s;}
-    void units_block (UnitsBlock  const &u) {units_block_  = u;}
-    void parameter_block (ParameterBlock  const &p) {parameter_block_  = p;}
-    void assigned_block (AssignedBlock  const &a) {assigned_block_  = a;}
+    void neuron_block(const NeuronBlock& n) { neuron_block_ = n; }
+    void state_block(const StateBlock& s) { state_block_ = s; }
+    void units_block(const UnitsBlock& u) { units_block_ = u; }
+    void parameter_block(const ParameterBlock& p) { parameter_block_ = p; }
+    void assigned_block(const AssignedBlock& a) { assigned_block_ = a; }
 
     // access to the AST
-    std::vector<symbol_ptr>&      procedures();
-    std::vector<symbol_ptr>const& procedures() const;
+    std::vector<symbol_ptr>& procedures() { return procedures_; }
+    const std::vector<symbol_ptr>& procedures() const { return procedures_; }
 
-    std::vector<symbol_ptr>&      functions();
-    std::vector<symbol_ptr>const& functions() const;
+    std::vector<symbol_ptr>& functions() { return functions_; }
+    const std::vector<symbol_ptr>& functions() const { return functions_; }
 
-    symbol_map &      symbols();
-    symbol_map const& symbols() const;
+    symbol_map& symbols() { return symbols_; }
+    const symbol_map& symbols() const { return symbols_; }
 
     // error handling
     using error_stack::error;
@@ -80,12 +97,12 @@ public:
     // perform semantic analysis
     void add_variables_to_symbols();
     bool semantic();
-    bool optimize();
 
 private:
     moduleKind kind_;
     std::string title_;
-    std::string fname_;
+    std::string module_name_;
+    std::string source_name_;
     std::vector<char> buffer_; // character buffer loaded from file
 
     bool generate_initial_api();
diff --git a/modcc/options.hpp b/modcc/options.hpp
deleted file mode 100644
index 7de816921af7bb505b185bb59f941c7a7853788a..0000000000000000000000000000000000000000
--- a/modcc/options.hpp
+++ /dev/null
@@ -1,56 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include "modccutil.hpp"
-
-enum class targetKind {
-    cpu,
-    gpu,
-    // Vectorisation targets
-    avx2,
-    avx512
- };
-
-struct Options {
-    std::string filename;
-    std::string outputname;
-    std::string modulename;
-    bool has_output = false;
-    bool verbose = true;
-    bool optimize = false;
-    bool analysis = false;
-    targetKind target = targetKind::cpu;
-
-    void print() {
-        std::cout << cyan("." + std::string(60, '-') + ".") << "\n";
-        std::cout << cyan("| file     ") << filename
-                  << std::string(61-11-filename.size(),' ')
-                  << cyan("|") << "\n";
-
-        std::string outname = (outputname.size() ? outputname : "stdout");
-        std::cout << cyan("| output   ") << outname
-                  << std::string(61-11-outname.size(),' ')
-                  << cyan("|") << "\n";
-        std::cout << cyan("| verbose  ") << (verbose  ? "yes" : "no ")
-                  << std::string(61-11-3,' ') << cyan("|") << "\n";
-        std::cout << cyan("| optimize ") << (optimize ? "yes" : "no ")
-                  << std::string(61-11-3,' ') << cyan("|") << "\n";
-        std::cout << cyan("| target   ")
-                  << (target==targetKind::cpu? "cpu" : "gpu")
-                  << std::string(61-11-3,' ') << cyan("|") << "\n";
-        std::cout << cyan("| analysis ") << (analysis ? "yes" : "no ")
-                  << std::string(61-11-3,' ') << cyan("|") << "\n";
-        std::cout << cyan("." + std::string(60, '-') + ".") << std::endl;
-    }
-
-    Options(const Options& other) = delete;
-    void operator=(const Options& other) = delete;
-
-    static Options& instance() {
-        static Options instance;
-        return instance;
-    }
-
-private:
-    Options() {}
-};
diff --git a/modcc/parser.cpp b/modcc/parser.cpp
index 6c7349e3b3d0c5f7abc075d1d24b5db584be8ae2..cb45d2547d7a3cbdb36c6916f119f7202f979c76 100644
--- a/modcc/parser.cpp
+++ b/modcc/parser.cpp
@@ -2,7 +2,6 @@
 #include <list>
 #include <cstring>
 
-#include "constantfolder.hpp"
 #include "parser.hpp"
 #include "perfvisitor.hpp"
 #include "token.hpp"
@@ -37,7 +36,7 @@ bool Parser::expect(tok tok, std::string const& str) {
 
 void Parser::error(std::string msg) {
     std::string location_info = pprintf(
-            "%:% ", module_ ? module_->file_name() : "", token_.location);
+            "%:% ", module_ ? module_->source_name() : "", token_.location);
     if(status_==lexerStatus::error) {
         // append to current string
         error_string_ += "\n" + white(location_info) + "\n  " +msg;
@@ -50,7 +49,7 @@ void Parser::error(std::string msg) {
 
 void Parser::error(std::string msg, Location loc) {
     std::string location_info = pprintf(
-            "%:% ", module_ ? module_->file_name() : "", loc);
+            "%:% ", module_ ? module_->source_name() : "", loc);
     if(status_==lexerStatus::error) {
         // append to current string
         error_string_ += "\n" + green(location_info) + msg;
diff --git a/modcc/simd_printer.hpp b/modcc/simd_printer.hpp
index 03f2957c0f3a1cb8fd8c6b4c0516268903625517..fa5c768d0bc4ae6836fda88f31c996f82a60fd83 100644
--- a/modcc/simd_printer.hpp
+++ b/modcc/simd_printer.hpp
@@ -6,7 +6,6 @@
 #include "backends/simd.hpp"
 #include "cprinter.hpp"
 #include "modccutil.hpp"
-#include "options.hpp"
 #include "textbuffer.hpp"
 
 #ifdef __GNUC__
@@ -15,22 +14,16 @@
 #   define ANNOT_UNUSED ""
 #endif
 
-
-using namespace arb;
-
-template<targetKind Arch>
-class SimdPrinter : public CPrinter {
+template <simdKind Arch>
+class SimdPrinter: public CPrinter {
 public:
-    SimdPrinter()
-        : cprinter_(make_unique<CPrinter>())
+    SimdPrinter(): cprinter_(make_unique<CPrinter>())
     {}
 
-    // Initialize our base CPrinter in default unoptimized mode; we handle the
-    // vectorization ourselves
-    SimdPrinter(Module& m, bool optimize = false)
-        : CPrinter(m),
-          cprinter_(make_unique<CPrinter>(m))
-    { }
+    explicit SimdPrinter(Module& m):
+        CPrinter(m),
+        cprinter_(make_unique<CPrinter>(m))
+    {}
 
     void visit(NumberExpression *e) override {
         simd_backend::emit_set_value(text_, e->value());
@@ -102,7 +95,7 @@ private:
     bool range_load_ = true;
 };
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::visit(APIMethod *e) {
     text_.add_gutter() << "void " << e->name() << "() override {\n";
     if (!e->scope()) { // error: semantic analysis has not been performed
@@ -157,7 +150,7 @@ void SimdPrinter<Arch>::visit(APIMethod *e) {
     text_.add_line("}\n");
 }
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::emit_indexed_view(LocalVariable* var,
                                           std::set<std::string>& decls) {
     auto const& name = var->name();
@@ -187,7 +180,7 @@ void SimdPrinter<Arch>::emit_indexed_view(LocalVariable* var,
     }
 }
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::emit_indexed_view_simd(LocalVariable* var,
                                                std::set<std::string>& decls) {
     auto const& name = var->name();
@@ -235,7 +228,7 @@ void SimdPrinter<Arch>::emit_indexed_view_simd(LocalVariable* var,
     text_.end_line(";");
 }
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::emit_api_loop(APIMethod* e,
                                       const std::string& start,
                                       const std::string& end,
@@ -391,7 +384,7 @@ void SimdPrinter<Arch>::emit_api_loop(APIMethod* e,
     text_.add_line("}");
 }
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::visit(IndexedVariable *e) {
     std::string vindex_name, value_name;
 
@@ -409,7 +402,7 @@ void SimdPrinter<Arch>::visit(IndexedVariable *e) {
     simd_backend::emit_gather(text_, value_name, vindex_name, "sizeof(value_type)");
 }
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::visit(CellIndexedVariable *e) {
     std::string vindex_name, value_name;
 
@@ -422,7 +415,7 @@ void SimdPrinter<Arch>::visit(CellIndexedVariable *e) {
     simd_backend::emit_gather(text_, vindex_name, value_name, "sizeof(value_type)");
 }
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::visit(BlockExpression *e) {
     if (!e->is_nested()) {
         std::vector<std::string> names;
@@ -458,7 +451,7 @@ void SimdPrinter<Arch>::visit(BlockExpression *e) {
     }
 }
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::visit(BinaryExpression *e) {
     auto lhs = e->lhs();
     auto rhs = e->rhs();
@@ -480,7 +473,7 @@ void SimdPrinter<Arch>::visit(BinaryExpression *e) {
     }
 }
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::visit(AssignmentExpression *e) {
     auto is_memop = [](Expression *e) {
         auto ident = e->is_identifier();
@@ -511,7 +504,7 @@ void SimdPrinter<Arch>::visit(AssignmentExpression *e) {
 }
 
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::visit(VariableExpression *e) {
     if (e->is_range() && range_load_) {
         simd_backend::emit_load_unaligned(text_, "&" + e->name() + "[off_]");
@@ -524,7 +517,7 @@ void SimdPrinter<Arch>::visit(VariableExpression *e) {
     }
 }
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::visit(UnaryExpression *e) {
 
     auto arg = e->expression();
@@ -539,7 +532,7 @@ void SimdPrinter<Arch>::visit(UnaryExpression *e) {
     }
 }
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::visit(PowBinaryExpression *e) {
     auto lhs = e->lhs();
     auto rhs = e->rhs();
@@ -548,7 +541,7 @@ void SimdPrinter<Arch>::visit(PowBinaryExpression *e) {
     simd_backend::emit_pow(text_, emit_lhs, emit_rhs);
 }
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::visit(CallExpression *e) {
     text_ << e->name() << "(off_";
     for (auto& arg: e->args()) {
@@ -558,9 +551,9 @@ void SimdPrinter<Arch>::visit(CallExpression *e) {
     text_ << ")";
 }
 
-template<targetKind Arch>
+template <simdKind Arch>
 void SimdPrinter<Arch>::visit(ProcedureExpression *e) {
-    auto emit_procedure_unoptimized = [this](ProcedureExpression* e) {
+    auto emit_procedure_unvectorized = [this](ProcedureExpression* e) {
         auto cprinter = cprinter_.get();
         cprinter->clear_text();
         cprinter->set_gutter(text_.get_gutter());
@@ -570,7 +563,7 @@ void SimdPrinter<Arch>::visit(ProcedureExpression *e) {
 
     if (e->kind() == procedureKind::net_receive) {
         // Use non-vectorized printer for printing net_receive
-        emit_procedure_unoptimized(e);
+        emit_procedure_unvectorized(e);
         return;
     }
 
@@ -601,5 +594,5 @@ void SimdPrinter<Arch>::visit(ProcedureExpression *e) {
     text_.add_line();
 
     // Emit also the unvectorised version of the procedure
-    emit_procedure_unoptimized(e);
+    emit_procedure_unvectorized(e);
 }
diff --git a/modcc/symdiff.cpp b/modcc/symdiff.cpp
index 36e98c6943eda5172771acbef44605c46b465ff5..f9f6a4bc16af84e827947beec7980c0fee4027a1 100644
--- a/modcc/symdiff.cpp
+++ b/modcc/symdiff.cpp
@@ -261,10 +261,6 @@ private:
     std::string id_;
 };
 
-// ConstantSimplifyVisitior is not the same as ConstantFolderVisitor, as there is no way for a visitor
-// to modify an expression in place (only its children). This visitor instead builds a new expression
-// from the given one with constant simplifications.
-
 long double expr_value(Expression* e) {
     return e && e->is_number()? e->is_number()->value(): NAN;
 }
diff --git a/src/backends/gpu/fvm.cpp b/src/backends/gpu/fvm.cpp
index c1d98df56065604d6d876bdfcdd524b0e4d30e0b..90b10e2c48248af267ebb14ad874a56bbe53ee4c 100644
--- a/src/backends/gpu/fvm.cpp
+++ b/src/backends/gpu/fvm.cpp
@@ -1,11 +1,11 @@
 #include "fvm.hpp"
 
-#include <mechanisms/gpu/hh.hpp>
-#include <mechanisms/gpu/pas.hpp>
-#include <mechanisms/gpu/expsyn.hpp>
-#include <mechanisms/gpu/exp2syn.hpp>
-#include <mechanisms/gpu/test_kin1.hpp>
-#include <mechanisms/gpu/test_kinlva.hpp>
+#include <mechanisms/gpu/hh_gpu.hpp>
+#include <mechanisms/gpu/pas_gpu.hpp>
+#include <mechanisms/gpu/expsyn_gpu.hpp>
+#include <mechanisms/gpu/exp2syn_gpu.hpp>
+#include <mechanisms/gpu/test_kin1_gpu.hpp>
+#include <mechanisms/gpu/test_kinlva_gpu.hpp>
 
 namespace arb {
 namespace gpu {
@@ -20,5 +20,5 @@ backend::mech_map_ = {
     { "test_kinlva", maker<mechanism_test_kinlva> }
 };
 
-} // namespace multicore
+} // namespace gpu
 } // namespace arb
diff --git a/src/backends/multicore/fvm.cpp b/src/backends/multicore/fvm.cpp
index 25e75e3c3c7526488b14d76f57c09417e2150764..1ebbdb292e22cf2f6f48db381d5572234d0218e8 100644
--- a/src/backends/multicore/fvm.cpp
+++ b/src/backends/multicore/fvm.cpp
@@ -1,11 +1,11 @@
 #include "fvm.hpp"
 
-#include <mechanisms/multicore/hh.hpp>
-#include <mechanisms/multicore/pas.hpp>
-#include <mechanisms/multicore/expsyn.hpp>
-#include <mechanisms/multicore/exp2syn.hpp>
-#include <mechanisms/multicore/test_kin1.hpp>
-#include <mechanisms/multicore/test_kinlva.hpp>
+#include <mechanisms/multicore/hh_cpu.hpp>
+#include <mechanisms/multicore/pas_cpu.hpp>
+#include <mechanisms/multicore/expsyn_cpu.hpp>
+#include <mechanisms/multicore/exp2syn_cpu.hpp>
+#include <mechanisms/multicore/test_kin1_cpu.hpp>
+#include <mechanisms/multicore/test_kinlva_cpu.hpp>
 
 namespace arb {
 namespace multicore {
diff --git a/tests/modcc/CMakeLists.txt b/tests/modcc/CMakeLists.txt
index 0416812aee8bc8abebdd96d04bc2d03a3084fbb9..261e9b2bc6794b2d4e8fb7254b4846a7d5eaae67 100644
--- a/tests/modcc/CMakeLists.txt
+++ b/tests/modcc/CMakeLists.txt
@@ -4,7 +4,6 @@ set(MODCC_TEST_SOURCES
     test_kinetic_rewriter.cpp
     test_module.cpp
     test_msparse.cpp
-    test_optimization.cpp
     test_parser.cpp
     test_printers.cpp
     test_removelocals.cpp
diff --git a/tests/modcc/test_module.cpp b/tests/modcc/test_module.cpp
index 74fa541b71c8911e30f3aa68fd17671549d57195..f1517653f372e37ee0fafa2ef57a8737975007d1 100644
--- a/tests/modcc/test_module.cpp
+++ b/tests/modcc/test_module.cpp
@@ -1,8 +1,9 @@
 #include "test.hpp"
 #include "module.hpp"
+#include "io/bulkio.hpp"
 
 TEST(Module, open) {
-    Module m(DATADIR "/test.mod");
+    Module m(io::read_all(DATADIR "/test.mod"), "test.mod");
     if(!m.buffer().size()) {
         std::cout << "skipping Module.open test because unable to open input file" << std::endl;
         return;
diff --git a/tests/modcc/test_optimization.cpp b/tests/modcc/test_optimization.cpp
deleted file mode 100644
index af289eb65d765b55f1d46c68b4ad4840fd58f61b..0000000000000000000000000000000000000000
--- a/tests/modcc/test_optimization.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-#include <cmath>
-
-#include "test.hpp"
-
-#include "constantfolder.hpp"
-#include "modccutil.hpp"
-
-TEST(Optimizer, constant_folding) {
-    ConstantFolderVisitor v;
-    {
-        auto e = parse_line_expression("x = 2*3");
-        verbose_print(e);
-        e->accept(&v);
-        EXPECT_EQ(e->is_assignment()->rhs()->is_number()->value(), 6);
-        verbose_print(e);
-        verbose_print();
-    }
-    {
-        auto e = parse_line_expression("x = 1 + 2 + 3");
-        verbose_print(e);
-        e->accept(&v);
-        EXPECT_EQ(e->is_assignment()->rhs()->is_number()->value(), 6);
-        verbose_print(e);
-        verbose_print();
-    }
-    {
-        auto e = parse_line_expression("x = exp(2)");
-        verbose_print(e);
-        e->accept(&v);
-        // The tolerance has to be loosend to 1e-15, because the optimizer performs
-        // all intermediate calculations in 80 bit precision, which disagrees in
-        // the 16 decimal place to the double precision value from std::exp(2.0).
-        // This is a good thing: by using the constant folder we increase accuracy
-        // over the unoptimized code!
-        EXPECT_EQ(std::fabs(e->is_assignment()->rhs()->is_number()->value()-std::exp(2.0))<1e-15, true);
-        verbose_print(e);
-        verbose_print("" );
-    }
-    {
-        auto e = parse_line_expression("x= 2*2 + 3");
-        verbose_print(e);
-        e->accept(&v);
-        EXPECT_EQ(e->is_assignment()->rhs()->is_number()->value(), 7);
-        verbose_print(e);
-        verbose_print();
-    }
-    {
-        auto e = parse_line_expression("x= 3 + 2*2");
-        verbose_print(e);
-        e->accept(&v);
-        EXPECT_EQ(e->is_assignment()->rhs()->is_number()->value(), 7);
-        verbose_print(e);
-        verbose_print();
-    }
-    {
-        // this doesn't work: the (y+2) expression is not a constant, so folding stops.
-        // we need to fold the 2+3, which isn't possible with a simple walk.
-        // one approach would be try sorting communtative operations so that numbers
-        // are adjacent to one another in the tree
-        auto e = parse_line_expression("x= y + 2 + 3");
-        verbose_print(e);
-        e->accept(&v);
-        verbose_print(e);
-        verbose_print();
-    }
-    {
-        auto e = parse_line_expression("x= 2 + 3 + y");
-        verbose_print(e);
-        e->accept(&v);
-        verbose_print(e);
-        verbose_print();;
-    }
-    {
-        auto e = parse_line_expression("foo(2+3, log(32), 2*3 + x)");
-        verbose_print(e);
-        e->accept(&v);
-        verbose_print(e);
-        verbose_print();
-    }
-}
diff --git a/tests/modcc/test_parser.cpp b/tests/modcc/test_parser.cpp
index d2a39b21ba735e889421f1ec317c33e4ecf9b879..f91ce45339adfbee47c0a1604c44b59b5731141f 100644
--- a/tests/modcc/test_parser.cpp
+++ b/tests/modcc/test_parser.cpp
@@ -1,4 +1,5 @@
 #include <cmath>
+#include <cstring>
 #include <memory>
 
 #include "test.hpp"
@@ -6,6 +7,8 @@
 #include "modccutil.hpp"
 #include "parser.hpp"
 
+#include "io/bulkio.hpp"
+
 // overload for parser errors
 template <typename EPtr>
 void verbose_print(const EPtr& e, Parser& p, const char* text) {
@@ -69,7 +72,7 @@ template <typename RetUniqPtr>
 }
 
 TEST(Parser, full_file) {
-    Module m(DATADIR "/test.mod");
+    Module m(io::read_all(DATADIR "/test.mod"), "test.mod");
     if (m.buffer().size()==0) {
         std::cout << "skipping Parser.full_file test because unable to open input file" << std::endl;
         return;
@@ -582,8 +585,8 @@ TEST(Parser, parse_state_block) {
     };
 
     expression_ptr null;
-    for (auto& text: state_blocks) {
-        Module m(text, sizeof(text));
+    for (const auto& text: state_blocks) {
+        Module m(text, text+std::strlen(text), "");
         Parser p(m, false);
         p.parse_state_block();
         EXPECT_EQ(lexerStatus::happy, p.status());
diff --git a/tests/modcc/test_simd_backend.cpp b/tests/modcc/test_simd_backend.cpp
index 03cded6ef2e343a94043c176b254ad3d4b8f9ccb..f01fc2c5d25660c657998815eddde514130a0d28 100644
--- a/tests/modcc/test_simd_backend.cpp
+++ b/tests/modcc/test_simd_backend.cpp
@@ -1,16 +1,13 @@
 #include "backends/simd.hpp"
-#include "options.hpp"
 #include "textbuffer.hpp"
 #include "token.hpp"
 #include "test.hpp"
 
-using namespace arb;
-
 
 TEST(avx512, emit_binary_op) {
     TextBuffer tb;
 
-    using simd_backend = modcc::simd_intrinsics<targetKind::avx512>;
+    using simd_backend = modcc::simd_intrinsics<simdKind::avx512>;
 
     simd_backend::emit_binary_op(tb, tok::plus, "a", "b");
     EXPECT_EQ("_mm512_add_pd(a, b)", tb.str());
@@ -44,7 +41,7 @@ TEST(avx512, emit_binary_op) {
 TEST(avx512, emit_unary_op) {
     TextBuffer tb;
 
-    using simd_backend = modcc::simd_intrinsics<targetKind::avx512>;
+    using simd_backend = modcc::simd_intrinsics<simdKind::avx512>;
 
     // Test lambdas for generating the argument
     std::string arg = "a";
diff --git a/tests/modcc/test_visitors.cpp b/tests/modcc/test_visitors.cpp
index 6f1caf64d3a066d80d940cee3530c52a9f1d114d..786d8064b9495bab2a930a1c468e6b41760d583d 100644
--- a/tests/modcc/test_visitors.cpp
+++ b/tests/modcc/test_visitors.cpp
@@ -1,7 +1,5 @@
 #include "test.hpp"
 
-#include "constantfolder.hpp"
-#include "expressionclassifier.hpp"
 #include "perfvisitor.hpp"
 #include "parser.hpp"
 #include "modccutil.hpp"
@@ -156,138 +154,3 @@ TEST(FlopVisitor, function) {
     EXPECT_EQ(visitor.flops.pow, 1);
 }
 
-TEST(ClassificationVisitor, linear) {
-    std::vector<const char*> expressions =
-    {
-"x + y + z",
-"y + x + z",
-"y + z + x",
-"x - y - z",
-"y - x - z",
-"y - z - x",
-"z*(x + y + 2)",
-"(x + y)*z",
-"(x + y)/z",
-"x+3",
-"-x",
-"x+x+x+x",
-"2*x     ",
-"y*x     ",
-"x + y   ",
-"y + x   ",
-"y + z*x ",
-"2*(x*z + y)",
-"z*x - y",
-"(2+z)*(x*z + y)",
-"x/y",
-"(y - x)/z",
-"(x - y)/z",
-"y*(x - y)/z",
-    };
-
-    // create a scope that contains the symbols used in the tests
-    Scope<Symbol>::symbol_map globals;
-    globals["x"] = make_symbol<LocalVariable>(Location(), "x");
-    globals["y"] = make_symbol<LocalVariable>(Location(), "y");
-    globals["z"] = make_symbol<LocalVariable>(Location(), "z");
-    auto x = globals["x"].get();
-
-    auto scope = std::make_shared<Scope<Symbol>>(globals);
-
-    for(auto const& expression : expressions) {
-        auto e = parse_expression(expression);
-
-        // sanity check the compiler
-        EXPECT_NE(e, nullptr);
-        if( e==nullptr ) continue;
-
-        e->semantic(scope);
-        ExpressionClassifierVisitor v(x);
-        e->accept(&v);
-        EXPECT_EQ(v.classify(), expressionClassification::linear);
-
-        verbose_print("eq    ", e);
-        verbose_print("coeff ", v.linear_coefficient());
-        verbose_print("const ", v.constant_term());
-        verbose_print("----");
-    }
-}
-
-TEST(ClassificationVisitor, constant) {
-    std::vector<const char*> expressions =
-    {
-"y+3",
-"-y",
-"exp(y+z)",
-"1",
-"y^z",
-    };
-
-    // create a scope that contains the symbols used in the tests
-    Scope<Symbol>::symbol_map globals;
-    globals["x"] = make_symbol<LocalVariable>(Location(), "x");
-    globals["y"] = make_symbol<LocalVariable>(Location(), "y");
-    globals["z"] = make_symbol<LocalVariable>(Location(), "z");
-    auto scope = std::make_shared<Scope<Symbol>>(globals);
-    auto x = globals["x"].get();
-
-    for(auto const& expression : expressions) {
-        Parser p{expression};
-        auto e = p.parse_expression();
-
-        // sanity check the compiler
-        EXPECT_NE(e, nullptr);
-        if( e==nullptr ) continue;
-
-        e->semantic(scope);
-        ExpressionClassifierVisitor v(x);
-        e->accept(&v);
-        EXPECT_EQ(v.classify(), expressionClassification::constant);
-
-        verbose_print(e, p, expression);
-    }
-}
-
-TEST(ClassificationVisitor, nonlinear) {
-    std::vector<const char*> expressions =
-    {
-"x*x",
-"x*2*x",
-"x*(2+x)",
-"y/x",
-"x*(y + z*(x/y))",
-"exp(x)",
-"exp(x+y)",
-"exp(z*(x+y))",
-"log(x)",
-"cos(x)",
-"sin(x)",
-"x^y",
-"y^x",
-    };
-
-    // create a scope that contains the symbols used in the tests
-    Scope<Symbol>::symbol_map globals;
-    globals["x"] = make_symbol<LocalVariable>(Location(), "x");
-    globals["y"] = make_symbol<LocalVariable>(Location(), "y");
-    globals["z"] = make_symbol<LocalVariable>(Location(), "z");
-    auto scope = std::make_shared<Scope<Symbol>>(globals);
-    auto x = globals["x"].get();
-
-    ExpressionClassifierVisitor v(x);
-    for(auto const& expression : expressions) {
-        Parser p{expression};
-        auto e = p.parse_expression();
-
-        // sanity check the compiler
-        EXPECT_NE(e, nullptr);
-        if( e==nullptr ) continue;
-
-        e->semantic(scope);
-        v.reset();
-        e->accept(&v);
-        EXPECT_EQ(v.classify(), expressionClassification::nonlinear);
-
-        verbose_print(e, p, expression);
-    }
-}
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index c0f0d420b136d08b2b33ecd7c535c0d996e53023..ff5959382ecefe0a1e6dfe9d5a12978f31f78f1d 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -11,7 +11,7 @@ build_modules(
     DEST_DIR "${mech_proto_dir}"
     MECH_SUFFIX _proto
     MODCC_FLAGS -t cpu
-    GENERATES .hpp
+    GENERATES _cpu.hpp
     TARGET build_test_mods
 )
 
diff --git a/tests/unit/test_mechanisms.cpp b/tests/unit/test_mechanisms.cpp
index 7d3dae4eeaab731cd8b11ad24fe9560571ec4a12..3925e0fd348cfb0a18b26a52db5c6a356bdc1fc4 100644
--- a/tests/unit/test_mechanisms.cpp
+++ b/tests/unit/test_mechanisms.cpp
@@ -1,20 +1,20 @@
 #include "../gtest.h"
 
 // Prototype mechanisms in tests
-#include "mech_proto/expsyn.hpp"
-#include "mech_proto/exp2syn.hpp"
-#include "mech_proto/hh.hpp"
-#include "mech_proto/pas.hpp"
-#include "mech_proto/test_kin1.hpp"
-#include "mech_proto/test_kinlva.hpp"
+#include "mech_proto/expsyn_cpu.hpp"
+#include "mech_proto/exp2syn_cpu.hpp"
+#include "mech_proto/hh_cpu.hpp"
+#include "mech_proto/pas_cpu.hpp"
+#include "mech_proto/test_kin1_cpu.hpp"
+#include "mech_proto/test_kinlva_cpu.hpp"
 
 // modcc generated mechanisms
-#include "mechanisms/multicore/expsyn.hpp"
-#include "mechanisms/multicore/exp2syn.hpp"
-#include "mechanisms/multicore/hh.hpp"
-#include "mechanisms/multicore/pas.hpp"
-#include "mechanisms/multicore/test_kin1.hpp"
-#include "mechanisms/multicore/test_kinlva.hpp"
+#include "mechanisms/multicore/expsyn_cpu.hpp"
+#include "mechanisms/multicore/exp2syn_cpu.hpp"
+#include "mechanisms/multicore/hh_cpu.hpp"
+#include "mechanisms/multicore/pas_cpu.hpp"
+#include "mechanisms/multicore/test_kin1_cpu.hpp"
+#include "mechanisms/multicore/test_kinlva_cpu.hpp"
 
 #include <initializer_list>
 #include <backends/multicore/fvm.hpp>
diff --git a/tests/unit/test_synapses.cpp b/tests/unit/test_synapses.cpp
index 7951b4fbd343092c4d3208706c3c10b0210bdc71..45e423faad1e4df807a29ffab589e27c745eedfa 100644
--- a/tests/unit/test_synapses.cpp
+++ b/tests/unit/test_synapses.cpp
@@ -4,8 +4,8 @@
 #include <cell.hpp>
 #include <backends/multicore/fvm.hpp>
 
-#include <mechanisms/multicore/expsyn.hpp>
-#include <mechanisms/multicore/exp2syn.hpp>
+#include <mechanisms/multicore/expsyn_cpu.hpp>
+#include <mechanisms/multicore/exp2syn_cpu.hpp>
 
 // compares results with those generated by nrn/ball_and_stick.py
 TEST(synapses, add_to_cell)