diff --git a/arbor/backends/gpu/matrix_state_fine.hpp b/arbor/backends/gpu/matrix_state_fine.hpp
index f2fa08db7548ccbf9123cfff0ee47856b6446df8..3b393a85f3d018ee1e255c13901e1d44f8806b96 100644
--- a/arbor/backends/gpu/matrix_state_fine.hpp
+++ b/arbor/backends/gpu/matrix_state_fine.hpp
@@ -98,9 +98,6 @@ public:
     // Invariant part of the matrix diagonal
     array invariant_d;         // [Î¼S]
 
-    // Solution in unpacked format
-    array solution_;
-
     // Maps cell to integration domain
     iarray cell_to_intdom;
 
@@ -418,7 +415,6 @@ public:
         // d, u, rhs        : packed
         // cv_capacitance   : flat
         // invariant_d      : flat
-        // solution_        : flat
         // cv_to_cell       : flat
         // area             : flat
 
@@ -446,8 +442,7 @@ public:
         // transform u_shuffled values into packed u vector.
         flat_to_packed(u_shuffled, u);
 
-        // the invariant part of d, cv_area and the solution are in flat form
-        solution_ = array(matrix_size, 0);
+        // the invariant part of d and cv_area are in flat form
         cv_area = memory::make_const_view(area);
 
         // the cv_capacitance can be copied directly because it is
@@ -489,43 +484,40 @@ public:
             size());
     }
 
-    void solve() {
-        solve_matrix_fine(
-            rhs.data(), d.data(), u.data(),
-            level_meta.data(), level_lengths.data(), level_parents.data(),
-            block_index.data(),
-            num_cells_in_block.data(),
-            data_partition.data(),
-            num_cells_in_block.size(), max_branches_per_level);
-
+    void solve(array& to) {
+        solve_matrix_fine(rhs.data(),
+                          d.data(),
+                          u.data(),
+                          level_meta.data(),
+                          level_lengths.data(),
+                          level_parents.data(),
+                          block_index.data(),
+                          num_cells_in_block.data(),
+                          data_partition.data(),
+                          num_cells_in_block.size(),
+                          max_branches_per_level);
         // unpermute the solution
-        packed_to_flat(rhs, solution_);
+        packed_to_flat(rhs, to);
     }
 
-    const_view solution() const {
-        return solution_;
+private:
+    std::size_t size() const {
+        return matrix_size;
     }
 
-    template <typename VFrom, typename VTo>
-    void flat_to_packed(const VFrom& from, VTo& to ) {
+    void flat_to_packed(const array& from, array& to ) {
         arb_assert(from.size()==matrix_size);
         arb_assert(to.size()==data_size);
 
         scatter(from.data(), to.data(), perm.data(), perm.size());
     }
 
-    template <typename VFrom, typename VTo>
-    void packed_to_flat(const VFrom& from, VTo& to ) {
+    void packed_to_flat(const array& from, array& to ) {
         arb_assert(from.size()==data_size);
         arb_assert(to.size()==matrix_size);
 
         gather(from.data(), to.data(), perm.data(), perm.size());
     }
-
-private:
-    std::size_t size() const {
-        return matrix_size;
-    }
 };
 
 } // namespace gpu
diff --git a/arbor/backends/gpu/matrix_state_flat.hpp b/arbor/backends/gpu/matrix_state_flat.hpp
index df1c8c84625c1a66de2fe125c3c79552027b3bad..c5a6d98c6920b1165995682cb96aaa79b8ccb2d8 100644
--- a/arbor/backends/gpu/matrix_state_flat.hpp
+++ b/arbor/backends/gpu/matrix_state_flat.hpp
@@ -115,9 +115,6 @@ struct matrix_state_flat {
     }
 
     // interface for exposing the solution to the outside world
-    const_view solution() const {
-        return memory::make_view(rhs);
-    }
 
     // Assemble the matrix
     // Afterwards the diagonal and RHS will have been set given dt, voltage and current.
@@ -132,10 +129,11 @@ struct matrix_state_flat {
             cv_to_cell.data(), dt_intdom.data(), cell_to_intdom.data(), size());
     }
 
-    void solve() {
+    void solve(array& to) {
         // perform solve on gpu
-        solve_matrix_flat(rhs.data(), d.data(), u.data(), parent_index.data(),
-                          cell_cv_divs.data(), num_matrices());
+        arb_assert(to.size() == rhs.size());
+        solve_matrix_flat(rhs.data(), d.data(), u.data(), parent_index.data(), cell_cv_divs.data(), num_matrices());
+        memory::copy(rhs, to);
     }
 
     std::size_t size() const {
diff --git a/arbor/backends/multicore/matrix_state.hpp b/arbor/backends/multicore/matrix_state.hpp
index 84e3f041c3f25a2c41c6e50b827c7747fa2f00b2..24a21c1b5bc8beddf4781547b892bedbd33a80d3 100644
--- a/arbor/backends/multicore/matrix_state.hpp
+++ b/arbor/backends/multicore/matrix_state.hpp
@@ -3,6 +3,8 @@
 #include <util/partition.hpp>
 #include <util/span.hpp>
 
+#include <memory/memory.hpp>
+
 #include "multicore_common.hpp"
 
 namespace arb {
@@ -133,6 +135,12 @@ public:
         }
     }
 
+    template<typename VTo>
+    void solve(VTo& to) {
+        solve();
+        memory::copy(rhs, to);
+    }
+
 private:
 
     std::size_t size() const {
diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp
index 173cad5e744d8b71b3726d5f8693b12eabcc0cf5..81ba0068c410a2d6d025502914f84f1412ebbac3 100644
--- a/arbor/fvm_lowered_cell_impl.hpp
+++ b/arbor/fvm_lowered_cell_impl.hpp
@@ -276,8 +276,7 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
         matrix_.assemble(state_->dt_intdom, state_->voltage, state_->current_density, state_->conductivity);
         PL();
         PE(advance_integrate_matrix_solve);
-        matrix_.solve();
-        memory::copy(matrix_.solution(), state_->voltage);
+        matrix_.solve(state_->voltage);
         PL();
 
         // Integrate mechanism state.
@@ -296,7 +295,7 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
 
         PE(advance_integrate_threshold);
         threshold_watcher_.test();
-        memory::copy(state_->time_to, state_->time);
+        std::swap(state_->time_to, state_->time);
         PL();
 
         // Check for non-physical solutions:
diff --git a/arbor/matrix.hpp b/arbor/matrix.hpp
index 8609cb7c9d0cb54430960372241db7d3184d8467..3bc7a54b8591f2befcd41264048056190ce818d5 100644
--- a/arbor/matrix.hpp
+++ b/arbor/matrix.hpp
@@ -61,9 +61,9 @@ public:
     /// the partition of the parent index over the cells
     const iarray& cell_index() const { return cell_index_; }
 
-    /// Solve the linear system.
-    void solve() {
-        state_.solve();
+    /// Solve the linear system into a given solution storage.
+    void solve(array& to) {
+        state_.solve(to);
     }
 
     /// Assemble the matrix for given dt
@@ -71,11 +71,6 @@ public:
         state_.assemble(dt_cell, voltage, current, conductivity);
     }
 
-    /// Get a view of the solution
-    typename State::const_view solution() const {
-        return state_.solution();
-    }
-
 private:
     /// the parent indice that describe matrix structure
     iarray parent_index_;
diff --git a/test/unit/test_fvm_lowered.cpp b/test/unit/test_fvm_lowered.cpp
index 09cfe347a1fb433b83c7f98dd22f6e959e222bea..4110db15708690fc5c7ca3bcafba487a96501ff5 100644
--- a/test/unit/test_fvm_lowered.cpp
+++ b/test/unit/test_fvm_lowered.cpp
@@ -231,6 +231,7 @@ TEST(fvm_lowered, matrix_init)
     fvcell.initialize({0}, cable1d_recipe(cell), cell_to_intdom, targets, probe_map);
 
     auto& J = fvcell.*private_matrix_ptr;
+    auto& S = fvcell.*private_state_ptr;
     EXPECT_EQ(J.size(), 12u);
 
     // Test that the matrix is initialized with sensible values
@@ -242,7 +243,7 @@ TEST(fvm_lowered, matrix_init)
 
     EXPECT_FALSE(util::any_of(util::subrange_view(mat.u, 1, n), isnan));
     EXPECT_FALSE(util::any_of(mat.d, isnan));
-    EXPECT_FALSE(util::any_of(J.solution(), isnan));
+    EXPECT_FALSE(util::any_of(S->voltage, isnan));
 
     EXPECT_FALSE(util::any_of(util::subrange_view(mat.u, 1, n), ispos));
     EXPECT_FALSE(util::any_of(mat.d, isneg));
diff --git a/test/unit/test_matrix.cpp b/test/unit/test_matrix.cpp
index 6ead914fd9100e7830240f9245b4bcf612de69dc..e3b17d8e5f2710e6336c0718dee3220b3765a3fc 100644
--- a/test/unit/test_matrix.cpp
+++ b/test/unit/test_matrix.cpp
@@ -14,9 +14,11 @@
 
 using namespace arb;
 
-using matrix_type = matrix<arb::multicore::backend>;
-using index_type = matrix_type::index_type;
-using value_type = matrix_type::value_type;
+using backend     = multicore::backend;
+using array       = backend::array;
+using matrix_type = matrix<backend>;
+using index_type  = matrix_type::index_type;
+using value_type  = matrix_type::value_type;
 
 using vvec = std::vector<value_type>;
 
@@ -47,9 +49,10 @@ TEST(matrix, solve_host)
         fill(state.u, -1);
         fill(state.rhs,1);
 
-        m.solve();
+        auto x = array({0});
+        m.solve(x);
 
-        EXPECT_EQ(m.solution()[0], 0.5);
+        EXPECT_EQ(x[0], 0.5);
     }
 
     // matrices in the range of 2x2 to 1000x1000
@@ -68,9 +71,11 @@ TEST(matrix, solve_host)
             fill(A.u, -1);
             fill(A.rhs,1);
 
-            m.solve();
 
-            auto x = m.solution();
+            auto x = array();
+            x.resize(n);
+            m.solve(x);
+
             auto err = math::square(std::fabs(2.*x[0] - x[1] - 1.));
             for(auto i : make_span(1,n-1)) {
                 err += math::square(std::fabs(2.*x[i] - x[i-1] - x[i+1] - 1.));
@@ -108,8 +113,8 @@ TEST(matrix, zero_diagonal)
     // Expected solution:
     std::vector<value_type> expected = {4, 5, 6, 7, 8, 9, 10};
 
-    m.solve();
-    auto x = m.solution();
+    auto x = array({0, 0, 0, 0, 0, 0, 0});
+    m.solve(x);
 
     EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x));
 }
@@ -159,12 +164,11 @@ TEST(matrix, zero_diagonal_assembled)
 
     matrix_type m(p, c, Cm, g, area, s);
     m.assemble(dt, v, i, mg);
-    m.solve();
 
-    vvec x;
-    assign(x, m.solution());
+    auto x = array({0, 0, 0, 0, 0, 0, 0});
     std::vector<value_type> expected = {4, 5, 6, 7, 8, 9, 10};
 
+    m.solve(x);
     EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x));
 
     // Set dt of 2nd (middle) submatrix to zero. Solution
@@ -174,9 +178,8 @@ TEST(matrix, zero_diagonal_assembled)
     v[3] = -20;
     v[4] = -30;
     m.assemble(dt, v, i, mg);
-    m.solve();
+    m.solve(x);
 
-    assign(x, m.solution());
     expected = {4, 5, 6, -20, -30, 9, 10};
 
     EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x));
diff --git a/test/unit/test_matrix.cu b/test/unit/test_matrix.cu
index b46e8315aac0d26e5b1926dea0acd519cb73ddd4..66499801e207ac36cd5cce4467734736f0b87d93 100644
--- a/test/unit/test_matrix.cu
+++ b/test/unit/test_matrix.cu
@@ -134,19 +134,22 @@ TEST(matrix, backends)
     auto gpu_i = on_gpu(i);
     auto gpu_mg = on_gpu(mg);
 
+    auto x_flat_d = gpu_array(group_size);
+    auto x_fine_d = gpu_array(group_size);
+
     flat.assemble(gpu_dt, gpu_v, gpu_i, gpu_mg);
     fine.assemble(gpu_dt, gpu_v, gpu_i, gpu_mg);
 
-    flat.solve();
-    fine.solve();
+    flat.solve(x_flat_d);
+    fine.solve(x_fine_d);
 
     // Compare the results.
     // We expect exact equality for the two gpu matrix implementations because both
     // perform the same operations in the same order on the same inputs.
-    std::vector<double> x_flat = assign_from(on_host(flat.solution()));
+    auto x_flat = on_host(x_flat_d);
     // as the fine algorithm contains atomics the solution might be slightly
     // different from flat and interleaved
-    std::vector<double> x_fine = assign_from(on_host(fine.solution()));
+    auto x_fine = on_host(x_fine_d);
 
     auto max_diff_fine =
         util::max_value(
diff --git a/test/unit/test_matrix_cpuvsgpu.cpp b/test/unit/test_matrix_cpuvsgpu.cpp
index f89bb900e04da3876780ed81b0746669b3d801d7..07601e1a58cdd238cbebcea7a223d652a6556237 100644
--- a/test/unit/test_matrix_cpuvsgpu.cpp
+++ b/test/unit/test_matrix_cpuvsgpu.cpp
@@ -124,15 +124,16 @@ TEST(matrix, assemble)
     std::generate(dt.begin(), dt.end(), [&](){return dt_dist(gen);});
 
     // Voltage, current, and conductance values
+    auto result_h = host_array(group_size);
+    auto x_d = gpu_array(group_size);
     m_mc.assemble(host_array(dt.begin(), dt.end()), host_array(group_size, -64), host_array(group_size, 10), host_array(group_size, 3));
-    m_mc.solve();
+    m_mc.solve(result_h);
     m_gpu.assemble(on_gpu(dt), gpu_array(group_size, -64), gpu_array(group_size, 10), gpu_array(group_size, 3));
-    m_gpu.solve();
-
+    m_gpu.solve(x_d);
+    auto result_g = on_host(x_d);
+    
     // Compare the GPU and CPU results.
     // Cast result to float, because we are happy to ignore small differencs
-    std::vector<float> result_h = util::assign_from(m_mc.solution());
-    std::vector<float> result_g = util::assign_from(on_host(m_gpu.solution()));
     EXPECT_TRUE(seq_almost_eq<float>(result_h, result_g));
 }