diff --git a/arbor/backends/gpu/matrix_state_fine.hpp b/arbor/backends/gpu/matrix_state_fine.hpp index f2fa08db7548ccbf9123cfff0ee47856b6446df8..3b393a85f3d018ee1e255c13901e1d44f8806b96 100644 --- a/arbor/backends/gpu/matrix_state_fine.hpp +++ b/arbor/backends/gpu/matrix_state_fine.hpp @@ -98,9 +98,6 @@ public: // Invariant part of the matrix diagonal array invariant_d; // [μS] - // Solution in unpacked format - array solution_; - // Maps cell to integration domain iarray cell_to_intdom; @@ -418,7 +415,6 @@ public: // d, u, rhs : packed // cv_capacitance : flat // invariant_d : flat - // solution_ : flat // cv_to_cell : flat // area : flat @@ -446,8 +442,7 @@ public: // transform u_shuffled values into packed u vector. flat_to_packed(u_shuffled, u); - // the invariant part of d, cv_area and the solution are in flat form - solution_ = array(matrix_size, 0); + // the invariant part of d and cv_area are in flat form cv_area = memory::make_const_view(area); // the cv_capacitance can be copied directly because it is @@ -489,43 +484,40 @@ public: size()); } - void solve() { - solve_matrix_fine( - rhs.data(), d.data(), u.data(), - level_meta.data(), level_lengths.data(), level_parents.data(), - block_index.data(), - num_cells_in_block.data(), - data_partition.data(), - num_cells_in_block.size(), max_branches_per_level); - + void solve(array& to) { + solve_matrix_fine(rhs.data(), + d.data(), + u.data(), + level_meta.data(), + level_lengths.data(), + level_parents.data(), + block_index.data(), + num_cells_in_block.data(), + data_partition.data(), + num_cells_in_block.size(), + max_branches_per_level); // unpermute the solution - packed_to_flat(rhs, solution_); + packed_to_flat(rhs, to); } - const_view solution() const { - return solution_; +private: + std::size_t size() const { + return matrix_size; } - template <typename VFrom, typename VTo> - void flat_to_packed(const VFrom& from, VTo& to ) { + void flat_to_packed(const array& from, array& to ) { arb_assert(from.size()==matrix_size); arb_assert(to.size()==data_size); scatter(from.data(), to.data(), perm.data(), perm.size()); } - template <typename VFrom, typename VTo> - void packed_to_flat(const VFrom& from, VTo& to ) { + void packed_to_flat(const array& from, array& to ) { arb_assert(from.size()==data_size); arb_assert(to.size()==matrix_size); gather(from.data(), to.data(), perm.data(), perm.size()); } - -private: - std::size_t size() const { - return matrix_size; - } }; } // namespace gpu diff --git a/arbor/backends/gpu/matrix_state_flat.hpp b/arbor/backends/gpu/matrix_state_flat.hpp index df1c8c84625c1a66de2fe125c3c79552027b3bad..c5a6d98c6920b1165995682cb96aaa79b8ccb2d8 100644 --- a/arbor/backends/gpu/matrix_state_flat.hpp +++ b/arbor/backends/gpu/matrix_state_flat.hpp @@ -115,9 +115,6 @@ struct matrix_state_flat { } // interface for exposing the solution to the outside world - const_view solution() const { - return memory::make_view(rhs); - } // Assemble the matrix // Afterwards the diagonal and RHS will have been set given dt, voltage and current. @@ -132,10 +129,11 @@ struct matrix_state_flat { cv_to_cell.data(), dt_intdom.data(), cell_to_intdom.data(), size()); } - void solve() { + void solve(array& to) { // perform solve on gpu - solve_matrix_flat(rhs.data(), d.data(), u.data(), parent_index.data(), - cell_cv_divs.data(), num_matrices()); + arb_assert(to.size() == rhs.size()); + solve_matrix_flat(rhs.data(), d.data(), u.data(), parent_index.data(), cell_cv_divs.data(), num_matrices()); + memory::copy(rhs, to); } std::size_t size() const { diff --git a/arbor/backends/multicore/matrix_state.hpp b/arbor/backends/multicore/matrix_state.hpp index 84e3f041c3f25a2c41c6e50b827c7747fa2f00b2..24a21c1b5bc8beddf4781547b892bedbd33a80d3 100644 --- a/arbor/backends/multicore/matrix_state.hpp +++ b/arbor/backends/multicore/matrix_state.hpp @@ -3,6 +3,8 @@ #include <util/partition.hpp> #include <util/span.hpp> +#include <memory/memory.hpp> + #include "multicore_common.hpp" namespace arb { @@ -133,6 +135,12 @@ public: } } + template<typename VTo> + void solve(VTo& to) { + solve(); + memory::copy(rhs, to); + } + private: std::size_t size() const { diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp index 173cad5e744d8b71b3726d5f8693b12eabcc0cf5..81ba0068c410a2d6d025502914f84f1412ebbac3 100644 --- a/arbor/fvm_lowered_cell_impl.hpp +++ b/arbor/fvm_lowered_cell_impl.hpp @@ -276,8 +276,7 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate( matrix_.assemble(state_->dt_intdom, state_->voltage, state_->current_density, state_->conductivity); PL(); PE(advance_integrate_matrix_solve); - matrix_.solve(); - memory::copy(matrix_.solution(), state_->voltage); + matrix_.solve(state_->voltage); PL(); // Integrate mechanism state. @@ -296,7 +295,7 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate( PE(advance_integrate_threshold); threshold_watcher_.test(); - memory::copy(state_->time_to, state_->time); + std::swap(state_->time_to, state_->time); PL(); // Check for non-physical solutions: diff --git a/arbor/matrix.hpp b/arbor/matrix.hpp index 8609cb7c9d0cb54430960372241db7d3184d8467..3bc7a54b8591f2befcd41264048056190ce818d5 100644 --- a/arbor/matrix.hpp +++ b/arbor/matrix.hpp @@ -61,9 +61,9 @@ public: /// the partition of the parent index over the cells const iarray& cell_index() const { return cell_index_; } - /// Solve the linear system. - void solve() { - state_.solve(); + /// Solve the linear system into a given solution storage. + void solve(array& to) { + state_.solve(to); } /// Assemble the matrix for given dt @@ -71,11 +71,6 @@ public: state_.assemble(dt_cell, voltage, current, conductivity); } - /// Get a view of the solution - typename State::const_view solution() const { - return state_.solution(); - } - private: /// the parent indice that describe matrix structure iarray parent_index_; diff --git a/test/unit/test_fvm_lowered.cpp b/test/unit/test_fvm_lowered.cpp index 09cfe347a1fb433b83c7f98dd22f6e959e222bea..4110db15708690fc5c7ca3bcafba487a96501ff5 100644 --- a/test/unit/test_fvm_lowered.cpp +++ b/test/unit/test_fvm_lowered.cpp @@ -231,6 +231,7 @@ TEST(fvm_lowered, matrix_init) fvcell.initialize({0}, cable1d_recipe(cell), cell_to_intdom, targets, probe_map); auto& J = fvcell.*private_matrix_ptr; + auto& S = fvcell.*private_state_ptr; EXPECT_EQ(J.size(), 12u); // Test that the matrix is initialized with sensible values @@ -242,7 +243,7 @@ TEST(fvm_lowered, matrix_init) EXPECT_FALSE(util::any_of(util::subrange_view(mat.u, 1, n), isnan)); EXPECT_FALSE(util::any_of(mat.d, isnan)); - EXPECT_FALSE(util::any_of(J.solution(), isnan)); + EXPECT_FALSE(util::any_of(S->voltage, isnan)); EXPECT_FALSE(util::any_of(util::subrange_view(mat.u, 1, n), ispos)); EXPECT_FALSE(util::any_of(mat.d, isneg)); diff --git a/test/unit/test_matrix.cpp b/test/unit/test_matrix.cpp index 6ead914fd9100e7830240f9245b4bcf612de69dc..e3b17d8e5f2710e6336c0718dee3220b3765a3fc 100644 --- a/test/unit/test_matrix.cpp +++ b/test/unit/test_matrix.cpp @@ -14,9 +14,11 @@ using namespace arb; -using matrix_type = matrix<arb::multicore::backend>; -using index_type = matrix_type::index_type; -using value_type = matrix_type::value_type; +using backend = multicore::backend; +using array = backend::array; +using matrix_type = matrix<backend>; +using index_type = matrix_type::index_type; +using value_type = matrix_type::value_type; using vvec = std::vector<value_type>; @@ -47,9 +49,10 @@ TEST(matrix, solve_host) fill(state.u, -1); fill(state.rhs,1); - m.solve(); + auto x = array({0}); + m.solve(x); - EXPECT_EQ(m.solution()[0], 0.5); + EXPECT_EQ(x[0], 0.5); } // matrices in the range of 2x2 to 1000x1000 @@ -68,9 +71,11 @@ TEST(matrix, solve_host) fill(A.u, -1); fill(A.rhs,1); - m.solve(); - auto x = m.solution(); + auto x = array(); + x.resize(n); + m.solve(x); + auto err = math::square(std::fabs(2.*x[0] - x[1] - 1.)); for(auto i : make_span(1,n-1)) { err += math::square(std::fabs(2.*x[i] - x[i-1] - x[i+1] - 1.)); @@ -108,8 +113,8 @@ TEST(matrix, zero_diagonal) // Expected solution: std::vector<value_type> expected = {4, 5, 6, 7, 8, 9, 10}; - m.solve(); - auto x = m.solution(); + auto x = array({0, 0, 0, 0, 0, 0, 0}); + m.solve(x); EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x)); } @@ -159,12 +164,11 @@ TEST(matrix, zero_diagonal_assembled) matrix_type m(p, c, Cm, g, area, s); m.assemble(dt, v, i, mg); - m.solve(); - vvec x; - assign(x, m.solution()); + auto x = array({0, 0, 0, 0, 0, 0, 0}); std::vector<value_type> expected = {4, 5, 6, 7, 8, 9, 10}; + m.solve(x); EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x)); // Set dt of 2nd (middle) submatrix to zero. Solution @@ -174,9 +178,8 @@ TEST(matrix, zero_diagonal_assembled) v[3] = -20; v[4] = -30; m.assemble(dt, v, i, mg); - m.solve(); + m.solve(x); - assign(x, m.solution()); expected = {4, 5, 6, -20, -30, 9, 10}; EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x)); diff --git a/test/unit/test_matrix.cu b/test/unit/test_matrix.cu index b46e8315aac0d26e5b1926dea0acd519cb73ddd4..66499801e207ac36cd5cce4467734736f0b87d93 100644 --- a/test/unit/test_matrix.cu +++ b/test/unit/test_matrix.cu @@ -134,19 +134,22 @@ TEST(matrix, backends) auto gpu_i = on_gpu(i); auto gpu_mg = on_gpu(mg); + auto x_flat_d = gpu_array(group_size); + auto x_fine_d = gpu_array(group_size); + flat.assemble(gpu_dt, gpu_v, gpu_i, gpu_mg); fine.assemble(gpu_dt, gpu_v, gpu_i, gpu_mg); - flat.solve(); - fine.solve(); + flat.solve(x_flat_d); + fine.solve(x_fine_d); // Compare the results. // We expect exact equality for the two gpu matrix implementations because both // perform the same operations in the same order on the same inputs. - std::vector<double> x_flat = assign_from(on_host(flat.solution())); + auto x_flat = on_host(x_flat_d); // as the fine algorithm contains atomics the solution might be slightly // different from flat and interleaved - std::vector<double> x_fine = assign_from(on_host(fine.solution())); + auto x_fine = on_host(x_fine_d); auto max_diff_fine = util::max_value( diff --git a/test/unit/test_matrix_cpuvsgpu.cpp b/test/unit/test_matrix_cpuvsgpu.cpp index f89bb900e04da3876780ed81b0746669b3d801d7..07601e1a58cdd238cbebcea7a223d652a6556237 100644 --- a/test/unit/test_matrix_cpuvsgpu.cpp +++ b/test/unit/test_matrix_cpuvsgpu.cpp @@ -124,15 +124,16 @@ TEST(matrix, assemble) std::generate(dt.begin(), dt.end(), [&](){return dt_dist(gen);}); // Voltage, current, and conductance values + auto result_h = host_array(group_size); + auto x_d = gpu_array(group_size); m_mc.assemble(host_array(dt.begin(), dt.end()), host_array(group_size, -64), host_array(group_size, 10), host_array(group_size, 3)); - m_mc.solve(); + m_mc.solve(result_h); m_gpu.assemble(on_gpu(dt), gpu_array(group_size, -64), gpu_array(group_size, 10), gpu_array(group_size, 3)); - m_gpu.solve(); - + m_gpu.solve(x_d); + auto result_g = on_host(x_d); + // Compare the GPU and CPU results. // Cast result to float, because we are happy to ignore small differencs - std::vector<float> result_h = util::assign_from(m_mc.solution()); - std::vector<float> result_g = util::assign_from(on_host(m_gpu.solution())); EXPECT_TRUE(seq_almost_eq<float>(result_h, result_g)); }