Skip to content
Snippets Groups Projects
Commit bd1e56a5 authored by Sam Yates's avatar Sam Yates Committed by Ben Cumming
Browse files

Add required thread synchronization to matrix kernel. (#280)

There is a potential data race in the `assemble_matrix_interleaved` kernel, where threads in a different warp can overwrite the `buffer_v` and `buffer_i` values before they are used to update the `d` and `rhs` vectors.

This race has been exercised in the asynchronous event delivery branch.

* Add `__syncthreads()` to assemble matrix interleaved kernel after `d` and `rhs` update.
parent 56fd0532
No related branches found
No related tags found
No related merge requests found
...@@ -94,6 +94,8 @@ void assemble_matrix_interleaved( ...@@ -94,6 +94,8 @@ void assemble_matrix_interleaved(
rhs[store_pos] = gi*buffer_v[blk_pos] - buffer_i[blk_pos]; rhs[store_pos] = gi*buffer_v[blk_pos] - buffer_i[blk_pos];
} }
__syncthreads();
store_pos += LoadWidth*BlockWidth; store_pos += LoadWidth*BlockWidth;
load_pos += LoadWidth; load_pos += LoadWidth;
} }
......
...@@ -55,6 +55,7 @@ void flat_to_interleaved( ...@@ -55,6 +55,7 @@ void flat_to_interleaved(
if (i+blk_row<padded_size) { if (i+blk_row<padded_size) {
out[store_pos] = buffer[blk_pos]; out[store_pos] = buffer[blk_pos];
} }
__syncthreads();
load_pos += LoadWidth; load_pos += LoadWidth;
store_pos += LoadWidth*BlockWidth; store_pos += LoadWidth*BlockWidth;
} }
...@@ -100,6 +101,7 @@ void interleaved_to_flat( ...@@ -100,6 +101,7 @@ void interleaved_to_flat(
if (do_store && store_pos<end) { if (do_store && store_pos<end) {
out[store_pos] = buffer[lid]; out[store_pos] = buffer[lid];
} }
__syncthreads();
load_pos += LoadWidth*BlockWidth; load_pos += LoadWidth*BlockWidth;
store_pos += LoadWidth; store_pos += LoadWidth;
} }
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment