From ca006d1c717303feeeccea2f4e81a52c8cf3ed9b Mon Sep 17 00:00:00 2001 From: Thorsten Hater <24411438+thorstenhater@users.noreply.github.com> Date: Tue, 9 Aug 2022 20:19:23 +0200 Subject: [PATCH] PANIC! Forgot to fix fvm types in GPU! --- arbor/backends/gpu/diffusion.cu | 48 +++++++++++----------- arbor/backends/gpu/fine.cu | 12 +++--- arbor/backends/gpu/matrix_assemble.cu | 52 ++++++++++++------------ arbor/backends/gpu/matrix_fine.cu | 46 ++++++++++----------- arbor/backends/gpu/matrix_solve.cu | 24 +++++------ arbor/backends/gpu/multi_event_stream.cu | 32 +++++++-------- arbor/backends/gpu/shared_state.cu | 20 ++++----- arbor/backends/gpu/stimulus.cu | 10 ++--- arbor/backends/gpu/threshold_watcher.cu | 44 ++++++++++---------- 9 files changed, 144 insertions(+), 144 deletions(-) diff --git a/arbor/backends/gpu/diffusion.cu b/arbor/backends/gpu/diffusion.cu index a4f129bc..98ea02ec 100644 --- a/arbor/backends/gpu/diffusion.cu +++ b/arbor/backends/gpu/diffusion.cu @@ -65,10 +65,10 @@ void solve_diffusion( T* __restrict__ const d, const T* __restrict__ const u, const level_metadata* __restrict__ const level_meta, - const fvm_index_type* __restrict__ const level_lengths, - const fvm_index_type* __restrict__ const level_parents, - const fvm_index_type* __restrict__ const block_index, - const fvm_index_type* __restrict__ const num_matrix) // number of packed matrices = number of cells + const arb_index_type* __restrict__ const level_lengths, + const arb_index_type* __restrict__ const level_parents, + const arb_index_type* __restrict__ const block_index, + const arb_index_type* __restrict__ const num_matrix) // number of packed matrices = number of cells { const auto tid = threadIdx.x; const auto bid = blockIdx.x; @@ -213,18 +213,18 @@ void solve_diffusion( } // namespace kernels ARB_ARBOR_API void assemble_diffusion( - fvm_value_type* d, - fvm_value_type* rhs, - const fvm_value_type* invariant_d, - const fvm_value_type* concentration, - const fvm_value_type* voltage, - const fvm_value_type* current, - fvm_value_type q, - const fvm_value_type* conductivity, - const fvm_value_type* area, - const fvm_index_type* cv_to_intdom, - const fvm_value_type* dt_intdom, - const fvm_index_type* perm, + arb_value_type* d, + arb_value_type* rhs, + const arb_value_type* invariant_d, + const arb_value_type* concentration, + const arb_value_type* voltage, + const arb_value_type* current, + arb_value_type q, + const arb_value_type* conductivity, + const arb_value_type* area, + const arb_index_type* cv_to_intdom, + const arb_value_type* dt_intdom, + const arb_index_type* perm, unsigned n) { const unsigned block_dim = 128; @@ -253,15 +253,15 @@ ARB_ARBOR_API void assemble_diffusion( // num_cells = [2, 3, ...] // num_blocks = level_start.size() - 1 = num_levels.size() = num_cells.size() ARB_ARBOR_API void solve_diffusion( - fvm_value_type* rhs, - fvm_value_type* d, // diagonal values - const fvm_value_type* u, // upper diagonal (and lower diagonal as the matrix is SPD) + arb_value_type* rhs, + arb_value_type* d, // diagonal values + const arb_value_type* u, // upper diagonal (and lower diagonal as the matrix is SPD) const level_metadata* level_meta, // information pertaining to each level - const fvm_index_type* level_lengths, // lengths of branches of every level concatenated - const fvm_index_type* level_parents, // parents of branches of every level concatenated - const fvm_index_type* block_index, // start index into levels for each gpu block - fvm_index_type* num_cells, // the number of cells packed into this single matrix - fvm_index_type* padded_size, // length of rhs, d, u, including padding + const arb_index_type* level_lengths, // lengths of branches of every level concatenated + const arb_index_type* level_parents, // parents of branches of every level concatenated + const arb_index_type* block_index, // start index into levels for each gpu block + arb_index_type* num_cells, // the number of cells packed into this single matrix + arb_index_type* padded_size, // length of rhs, d, u, including padding unsigned num_blocks, // number of blocks unsigned blocksize) // size of each block { diff --git a/arbor/backends/gpu/fine.cu b/arbor/backends/gpu/fine.cu index c695ba53..1f5c4945 100644 --- a/arbor/backends/gpu/fine.cu +++ b/arbor/backends/gpu/fine.cu @@ -38,9 +38,9 @@ void scatter(const T* __restrict__ const from, } // namespace kernels ARB_ARBOR_API void gather( - const fvm_value_type* from, - fvm_value_type* to, - const fvm_index_type* p, + const arb_value_type* from, + arb_value_type* to, + const arb_index_type* p, unsigned n) { constexpr unsigned blockdim = 128; @@ -50,9 +50,9 @@ ARB_ARBOR_API void gather( } ARB_ARBOR_API void scatter( - const fvm_value_type* from, - fvm_value_type* to, - const fvm_index_type* p, + const arb_value_type* from, + arb_value_type* to, + const arb_index_type* p, unsigned n) { constexpr unsigned blockdim = 128; diff --git a/arbor/backends/gpu/matrix_assemble.cu b/arbor/backends/gpu/matrix_assemble.cu index e1c385e4..7e2fa250 100644 --- a/arbor/backends/gpu/matrix_assemble.cu +++ b/arbor/backends/gpu/matrix_assemble.cu @@ -155,24 +155,24 @@ void assemble_matrix_interleaved( } // namespace kernels ARB_ARBOR_API void assemble_matrix_flat( - fvm_value_type* d, - fvm_value_type* rhs, - const fvm_value_type* invariant_d, - const fvm_value_type* voltage, - const fvm_value_type* current, - const fvm_value_type* conductivity, - const fvm_value_type* cv_capacitance, - const fvm_value_type* area, - const fvm_index_type* cv_to_cell, - const fvm_value_type* dt_intdom, - const fvm_index_type* cell_to_intdom, + arb_value_type* d, + arb_value_type* rhs, + const arb_value_type* invariant_d, + const arb_value_type* voltage, + const arb_value_type* current, + const arb_value_type* conductivity, + const arb_value_type* cv_capacitance, + const arb_value_type* area, + const arb_index_type* cv_to_cell, + const arb_value_type* dt_intdom, + const arb_index_type* cell_to_intdom, unsigned n) { constexpr unsigned block_dim = 128; const unsigned grid_dim = impl::block_count(n, block_dim); kernels::assemble_matrix_flat - <fvm_value_type, fvm_index_type> + <arb_value_type, arb_index_type> <<<grid_dim, block_dim>>> (d, rhs, invariant_d, voltage, current, conductivity, cv_capacitance, area, cv_to_cell, dt_intdom, cell_to_intdom, n); @@ -180,19 +180,19 @@ ARB_ARBOR_API void assemble_matrix_flat( //template <typename T, typename I, unsigned BlockWidth, unsigned LoadWidth, unsigned Threads> void assemble_matrix_interleaved( - fvm_value_type* d, - fvm_value_type* rhs, - const fvm_value_type* invariant_d, - const fvm_value_type* voltage, - const fvm_value_type* current, - const fvm_value_type* conductivity, - const fvm_value_type* cv_capacitance, - const fvm_value_type* area, - const fvm_index_type* sizes, - const fvm_index_type* starts, - const fvm_index_type* matrix_to_cell, - const fvm_value_type* dt_intdom, - const fvm_index_type* cell_to_intdom, + arb_value_type* d, + arb_value_type* rhs, + const arb_value_type* invariant_d, + const arb_value_type* voltage, + const arb_value_type* current, + const arb_value_type* conductivity, + const arb_value_type* cv_capacitance, + const arb_value_type* area, + const arb_index_type* sizes, + const arb_index_type* starts, + const arb_index_type* matrix_to_cell, + const arb_value_type* dt_intdom, + const arb_index_type* cell_to_intdom, unsigned padded_size, unsigned num_mtx) { constexpr unsigned bd = impl::matrices_per_block(); @@ -203,7 +203,7 @@ void assemble_matrix_interleaved( const unsigned grid_dim = impl::block_count(num_mtx*lw, block_dim); kernels::assemble_matrix_interleaved - <fvm_value_type, fvm_index_type, bd, lw, block_dim> + <arb_value_type, arb_index_type, bd, lw, block_dim> <<<grid_dim, block_dim>>> (d, rhs, invariant_d, voltage, current, conductivity, cv_capacitance, area, sizes, starts, matrix_to_cell, diff --git a/arbor/backends/gpu/matrix_fine.cu b/arbor/backends/gpu/matrix_fine.cu index 41156f2d..eed8944b 100644 --- a/arbor/backends/gpu/matrix_fine.cu +++ b/arbor/backends/gpu/matrix_fine.cu @@ -65,10 +65,10 @@ void solve_matrix_fine( T* __restrict__ const d, const T* __restrict__ const u, const level_metadata* __restrict__ const level_meta, - const fvm_index_type* __restrict__ const level_lengths, - const fvm_index_type* __restrict__ const level_parents, - const fvm_index_type* __restrict__ const block_index, - const fvm_index_type* __restrict__ const num_matrix) // number of packed matrices = number of cells + const arb_index_type* __restrict__ const level_lengths, + const arb_index_type* __restrict__ const level_parents, + const arb_index_type* __restrict__ const block_index, + const arb_index_type* __restrict__ const num_matrix) // number of packed matrices = number of cells { const auto tid = threadIdx.x; const auto bid = blockIdx.x; @@ -213,17 +213,17 @@ void solve_matrix_fine( } // namespace kernels ARB_ARBOR_API void assemble_matrix_fine( - fvm_value_type* d, - fvm_value_type* rhs, - const fvm_value_type* invariant_d, - const fvm_value_type* voltage, - const fvm_value_type* current, - const fvm_value_type* conductivity, - const fvm_value_type* cv_capacitance, - const fvm_value_type* area, - const fvm_index_type* cv_to_intdom, - const fvm_value_type* dt_intdom, - const fvm_index_type* perm, + arb_value_type* d, + arb_value_type* rhs, + const arb_value_type* invariant_d, + const arb_value_type* voltage, + const arb_value_type* current, + const arb_value_type* conductivity, + const arb_value_type* cv_capacitance, + const arb_value_type* area, + const arb_index_type* cv_to_intdom, + const arb_value_type* dt_intdom, + const arb_index_type* perm, unsigned n) { const unsigned block_dim = 128; @@ -252,15 +252,15 @@ ARB_ARBOR_API void assemble_matrix_fine( // num_cells = [2, 3, ...] // num_blocks = level_start.size() - 1 = num_levels.size() = num_cells.size() ARB_ARBOR_API void solve_matrix_fine( - fvm_value_type* rhs, - fvm_value_type* d, // diagonal values - const fvm_value_type* u, // upper diagonal (and lower diagonal as the matrix is SPD) + arb_value_type* rhs, + arb_value_type* d, // diagonal values + const arb_value_type* u, // upper diagonal (and lower diagonal as the matrix is SPD) const level_metadata* level_meta, // information pertaining to each level - const fvm_index_type* level_lengths, // lengths of branches of every level concatenated - const fvm_index_type* level_parents, // parents of branches of every level concatenated - const fvm_index_type* block_index, // start index into levels for each gpu block - fvm_index_type* num_cells, // the number of cells packed into this single matrix - fvm_index_type* padded_size, // length of rhs, d, u, including padding + const arb_index_type* level_lengths, // lengths of branches of every level concatenated + const arb_index_type* level_parents, // parents of branches of every level concatenated + const arb_index_type* block_index, // start index into levels for each gpu block + arb_index_type* num_cells, // the number of cells packed into this single matrix + arb_index_type* padded_size, // length of rhs, d, u, including padding unsigned num_blocks, // number of blocks unsigned blocksize) // size of each block { diff --git a/arbor/backends/gpu/matrix_solve.cu b/arbor/backends/gpu/matrix_solve.cu index 6a0a383c..674bf2ab 100644 --- a/arbor/backends/gpu/matrix_solve.cu +++ b/arbor/backends/gpu/matrix_solve.cu @@ -87,33 +87,33 @@ void solve_matrix_interleaved( } // namespace kernels ARB_ARBOR_API void solve_matrix_flat( - fvm_value_type* rhs, - fvm_value_type* d, - const fvm_value_type* u, - const fvm_index_type* p, - const fvm_index_type* cell_cv_divs, + arb_value_type* rhs, + arb_value_type* d, + const arb_value_type* u, + const arb_index_type* p, + const arb_index_type* cell_cv_divs, int num_mtx) { constexpr unsigned block_dim = 128; const unsigned grid_dim = impl::block_count(num_mtx, block_dim); kernels::solve_matrix_flat - <fvm_value_type, fvm_index_type> + <arb_value_type, arb_index_type> <<<grid_dim, block_dim>>> (rhs, d, u, p, cell_cv_divs, num_mtx); } void solve_matrix_interleaved( - fvm_value_type* rhs, - fvm_value_type* d, - const fvm_value_type* u, - const fvm_index_type* p, - const fvm_index_type* sizes, + arb_value_type* rhs, + arb_value_type* d, + const arb_value_type* u, + const arb_index_type* p, + const arb_index_type* sizes, int padded_size, int num_mtx) { constexpr unsigned block_dim = impl::matrices_per_block(); const unsigned grid_dim = impl::block_count(num_mtx, block_dim); - kernels::solve_matrix_interleaved<fvm_value_type, fvm_index_type, block_dim> + kernels::solve_matrix_interleaved<arb_value_type, arb_index_type, block_dim> <<<grid_dim, block_dim>>> (rhs, d, u, p, sizes, padded_size, num_mtx); } diff --git a/arbor/backends/gpu/multi_event_stream.cu b/arbor/backends/gpu/multi_event_stream.cu index 11a8136f..1bf01e72 100644 --- a/arbor/backends/gpu/multi_event_stream.cu +++ b/arbor/backends/gpu/multi_event_stream.cu @@ -86,10 +86,10 @@ namespace kernels { } // namespace kernels void mark_until_after_w(unsigned n, - fvm_index_type* mark, - fvm_index_type* span_end, - fvm_value_type* ev_time, - const fvm_value_type* t_until) + arb_index_type* mark, + arb_index_type* span_end, + arb_value_type* ev_time, + const arb_value_type* t_until) { const int nblock = impl::block_count(n, 128); kernels::mark_until_after @@ -98,10 +98,10 @@ void mark_until_after_w(unsigned n, } void mark_until_w(unsigned n, - fvm_index_type* mark, - fvm_index_type* span_end, - fvm_value_type* ev_time, - const fvm_value_type* t_until) + arb_index_type* mark, + arb_index_type* span_end, + arb_value_type* ev_time, + const arb_value_type* t_until) { const int nblock = impl::block_count(n, 128); kernels::mark_until @@ -110,10 +110,10 @@ void mark_until_w(unsigned n, } void drop_marked_events_w(unsigned n, - fvm_index_type* n_nonempty_stream, - fvm_index_type* span_begin, - fvm_index_type* span_end, - fvm_index_type* mark) + arb_index_type* n_nonempty_stream, + arb_index_type* span_begin, + arb_index_type* span_end, + arb_index_type* mark) { const int nblock = impl::block_count(n, 128); kernels::drop_marked_events @@ -123,10 +123,10 @@ void drop_marked_events_w(unsigned n, } void event_time_if_before_w(unsigned n, - fvm_index_type* span_begin, - fvm_index_type* span_end, - fvm_value_type* ev_time, - fvm_value_type* t_until) + arb_index_type* span_begin, + arb_index_type* span_end, + arb_value_type* ev_time, + arb_value_type* t_until) { const int nblock = impl::block_count(n, 128); kernels::event_time_if_before diff --git a/arbor/backends/gpu/shared_state.cu b/arbor/backends/gpu/shared_state.cu index 5f6aba70..5ac91218 100644 --- a/arbor/backends/gpu/shared_state.cu +++ b/arbor/backends/gpu/shared_state.cu @@ -30,7 +30,7 @@ __global__ void update_time_to_impl(unsigned n, template <typename T> __global__ void add_scalar(unsigned n, T* __restrict__ const x, - fvm_value_type v) { + arb_value_type v) { unsigned i = threadIdx.x+blockIdx.x*blockDim.x; if (i<n) { x[i] += v; @@ -55,9 +55,9 @@ __global__ void set_dt_impl( T* __restrict__ dt_intdom, __global__ void take_samples_impl( multi_event_stream_state<raw_probe_info> s, - const fvm_value_type* __restrict__ const time, - fvm_value_type* __restrict__ const sample_time, - fvm_value_type* __restrict__ const sample_value) + const arb_value_type* __restrict__ const time, + arb_value_type* __restrict__ const sample_time, + arb_value_type* __restrict__ const sample_value) { unsigned i = threadIdx.x+blockIdx.x*blockDim.x; if (i<s.n) { @@ -74,7 +74,7 @@ __global__ void take_samples_impl( using impl::block_count; -void add_scalar(std::size_t n, fvm_value_type* data, fvm_value_type v) { +void add_scalar(std::size_t n, arb_value_type* data, arb_value_type v) { if (!n) return; constexpr int block_dim = 128; @@ -83,8 +83,8 @@ void add_scalar(std::size_t n, fvm_value_type* data, fvm_value_type v) { } void update_time_to_impl( - std::size_t n, fvm_value_type* time_to, const fvm_value_type* time, - fvm_value_type dt, fvm_value_type tmax) + std::size_t n, arb_value_type* time_to, const arb_value_type* time, + arb_value_type dt, arb_value_type tmax) { if (!n) return; @@ -94,8 +94,8 @@ void update_time_to_impl( } void set_dt_impl( - fvm_size_type nintdom, fvm_size_type ncomp, fvm_value_type* dt_intdom, fvm_value_type* dt_comp, - const fvm_value_type* time_to, const fvm_value_type* time, const fvm_index_type* cv_to_intdom) + arb_size_type nintdom, arb_size_type ncomp, arb_value_type* dt_intdom, arb_value_type* dt_comp, + const arb_value_type* time_to, const arb_value_type* time, const arb_index_type* cv_to_intdom) { if (!nintdom || !ncomp) return; @@ -106,7 +106,7 @@ void set_dt_impl( void take_samples_impl( const multi_event_stream_state<raw_probe_info>& s, - const fvm_value_type* time, fvm_value_type* sample_time, fvm_value_type* sample_value) + const arb_value_type* time, arb_value_type* sample_time, arb_value_type* sample_value) { if (!s.n_streams()) return; diff --git a/arbor/backends/gpu/stimulus.cu b/arbor/backends/gpu/stimulus.cu index f6aa6036..2a9b9467 100644 --- a/arbor/backends/gpu/stimulus.cu +++ b/arbor/backends/gpu/stimulus.cu @@ -22,16 +22,16 @@ void istim_add_current_impl(int n, istim_pp pp) { auto i = threadIdx.x + blockDim.x*blockIdx.x; if (i>=n) return; - fvm_index_type ei_left = pp.envl_divs[i]; - fvm_index_type ei_right = pp.envl_divs[i+1]; + arb_index_type ei_left = pp.envl_divs[i]; + arb_index_type ei_right = pp.envl_divs[i+1]; - fvm_index_type ai = pp.accu_index[i]; - fvm_index_type cv = pp.accu_to_cv[ai]; + arb_index_type ai = pp.accu_index[i]; + arb_index_type cv = pp.accu_to_cv[ai]; double t = pp.time[pp.cv_to_intdom[cv]]; if (ei_left==ei_right || t<pp.envl_times[ei_left]) return; - fvm_index_type& ei = pp.envl_index[i]; + arb_index_type& ei = pp.envl_index[i]; while (ei+1<ei_right && pp.envl_times[ei+1]<=t) ++ei; double J = pp.envl_amplitudes[ei]; // current density (A/m²) diff --git a/arbor/backends/gpu/threshold_watcher.cu b/arbor/backends/gpu/threshold_watcher.cu index bf048243..032b8a98 100644 --- a/arbor/backends/gpu/threshold_watcher.cu +++ b/arbor/backends/gpu/threshold_watcher.cu @@ -24,17 +24,17 @@ namespace kernel { __global__ void test_thresholds_impl( int size, - const fvm_index_type* __restrict__ const cv_to_intdom, - const fvm_value_type* __restrict__ const t_after, - const fvm_value_type* __restrict__ const t_before, - const fvm_index_type* __restrict__ const src_to_spike, - fvm_value_type* __restrict__ const time_since_spike, + const arb_index_type* __restrict__ const cv_to_intdom, + const arb_value_type* __restrict__ const t_after, + const arb_value_type* __restrict__ const t_before, + const arb_index_type* __restrict__ const src_to_spike, + arb_value_type* __restrict__ const time_since_spike, stack_storage<threshold_crossing>& stack, - fvm_index_type* __restrict__ const is_crossed, - fvm_value_type* __restrict__ const prev_values, - const fvm_index_type* __restrict__ const cv_index, - const fvm_value_type* __restrict__ const values, - const fvm_value_type* __restrict__ const thresholds, + arb_index_type* __restrict__ const is_crossed, + arb_value_type* __restrict__ const prev_values, + const arb_index_type* __restrict__ const cv_index, + const arb_value_type* __restrict__ const values, + const arb_value_type* __restrict__ const thresholds, bool record_time_since_spike) { int i = threadIdx.x + blockIdx.x*blockDim.x; @@ -49,7 +49,7 @@ void test_thresholds_impl( const auto v_prev = prev_values[cv]; const auto v = values[cv]; const auto thresh = thresholds[i]; - fvm_index_type spike_idx = 0; + arb_index_type spike_idx = 0; // Reset all spike times to -1.0 indicating no spike has been recorded on the detector if (record_time_since_spike) { @@ -79,17 +79,17 @@ void test_thresholds_impl( } if (crossed) { - push_back(stack, {fvm_size_type(i), crossing_time}); + push_back(stack, {arb_size_type(i), crossing_time}); } } __global__ extern void reset_crossed_impl( int size, - fvm_index_type* __restrict__ const is_crossed, - const fvm_index_type* __restrict__ const cv_index, - const fvm_value_type* __restrict__ const values, - const fvm_value_type* __restrict__ const thresholds) + arb_index_type* __restrict__ const is_crossed, + const arb_index_type* __restrict__ const cv_index, + const arb_value_type* __restrict__ const values, + const arb_value_type* __restrict__ const thresholds) { int i = threadIdx.x + blockIdx.x*blockDim.x; if (i<size) { @@ -101,10 +101,10 @@ extern void reset_crossed_impl( void test_thresholds_impl( int size, - const fvm_index_type* cv_to_intdom, const fvm_value_type* t_after, const fvm_value_type* t_before, - const fvm_index_type* src_to_spike, fvm_value_type* time_since_spike, stack_storage<threshold_crossing>& stack, - fvm_index_type* is_crossed, fvm_value_type* prev_values, - const fvm_index_type* cv_index, const fvm_value_type* values, const fvm_value_type* thresholds, + const arb_index_type* cv_to_intdom, const arb_value_type* t_after, const arb_value_type* t_before, + const arb_index_type* src_to_spike, arb_value_type* time_since_spike, stack_storage<threshold_crossing>& stack, + arb_index_type* is_crossed, arb_value_type* prev_values, + const arb_index_type* cv_index, const arb_value_type* values, const arb_value_type* thresholds, bool record_time_since_spike) { if (size>0) { @@ -117,8 +117,8 @@ void test_thresholds_impl( } void reset_crossed_impl( - int size, fvm_index_type* is_crossed, - const fvm_index_type* cv_index, const fvm_value_type* values, const fvm_value_type* thresholds) + int size, arb_index_type* is_crossed, + const arb_index_type* cv_index, const arb_value_type* values, const arb_value_type* thresholds) { if (size>0) { constexpr int block_dim = 128; -- GitLab