Skip to content
Snippets Groups Projects
Commit 9dbb2266 authored by Philipp Spilger's avatar Philipp Spilger
Browse files

Add test iterating over all synram weight columns via single-byte-enable

Change-Id: I59d306032ab1bdbed3b2be27f668912f7d5fda77
parent 05d2e1e5
No related branches found
No related tags found
No related merge requests found
......@@ -6,6 +6,16 @@
using namespace libnux;
namespace libnux {
enum class PPUOnDLS
{
top,
bottom
};
} // namespace libnux
/* Size of synram */
constexpr static uint32_t dls_num_rows = 256;
constexpr static uint32_t dls_num_columns = 256;
......
#pragma once
#include "libnux/dls_vx.h"
#include "libnux/vector.h"
namespace libnux {
/**
* Get location of execution by local write, global read operations.
* @note This function temporarily modifies local synram and is not safe under concurrent execution
* of the same function on the other PPU.
* @param ppu PPUOnDLS to describe location
* @return Boolean value whether operation was successful
*/
inline bool get_location(PPUOnDLS& ppu)
{
constexpr size_t row = 0;
auto working = get_row_via_vector(row, dls_weight_base);
auto const before = working.even_columns[0];
working.even_columns[0] = 0;
set_row_via_vector(working, row, dls_weight_base);
auto const top_read_0 = get_row_via_omnibus(row, synram_top_base_address).even_columns[0];
auto const bottom_read_0 = get_row_via_omnibus(row, synram_bottom_base_address).even_columns[0];
working.even_columns[0] = 32;
set_row_via_vector(working, row, dls_weight_base);
auto const top_read_1 = get_row_via_omnibus(row, synram_top_base_address).even_columns[0];
auto const bottom_read_1 = get_row_via_omnibus(row, synram_bottom_base_address).even_columns[0];
bool valid = false;
if ((top_read_0 == 0) && (top_read_1 == 32)) {
ppu = PPUOnDLS::top;
valid = true;
} else if ((bottom_read_0 == 0) && (bottom_read_1 == 32)) {
ppu = PPUOnDLS::bottom;
valid = true;
}
working.even_columns[0] = before;
set_row_via_vector(working, row, dls_weight_base);
return valid;
}
} // namespace libnux
#pragma once
#include <array>
#include <stddef.h>
#include <stdint.h>
......@@ -43,6 +44,24 @@ struct vector_row_t
}
bool operator!=(vector_row_t const& rhs) const { return !operator==(rhs); }
void fill(uint8_t const value)
{
for (size_t i = 0; i < dls_vector_size; ++i) {
even_columns[i] = value;
odd_columns[i] = value;
}
}
uint8_t const& operator[](size_t const i) const
{
return (i % 2) ? odd_columns[i / 2] : even_columns[i / 2];
}
uint8_t& operator[](size_t const i)
{
return (i % 2) ? odd_columns[i / 2] : even_columns[i / 2];
}
};
......@@ -108,6 +127,46 @@ inline void set_row_via_vector(vector_row_t const& values, size_t const row_id,
);
}
/**
* Write a masked complete row to the synapse memory or CADC in parallel via the vector unit.
* @param values Values to be written.
* @param mask Mask to be used, in which values > 0 indicate usage, values <= 0 are discarded.
* @param row_id Row number to be read.
* @param base Base address to be used, selects weight, address, ... settings.
*/
inline void set_row_via_vector_masked(
vector_row_t const& values, vector_row_t const& mask, size_t const row_id, uint32_t const base)
{
static constexpr uint32_t zero = 0;
asm volatile(
// clang-format off
"fxvlax %3, %[mlindex], %[zero]\n"
"fxvlax %4, %[mrindex], %[zero]\n"
"fxvlax %1, %[slindex], %[zero]\n"
"fxvlax %2, %[srindex], %[zero]\n"
"fxvcmpb %3\n"
"fxvoutx %1, %[base], %[first_index], 1\n"
"fxvcmpb %4\n"
"fxvoutx %2, %[base], %[second_index], 1\n"
"sync\n"
:
: [base] "b" (base),
[first_index] "r" (row_id*2),
[second_index] "r" (row_id*2+1),
[slindex] "r" (values.even_columns.data()),
[srindex] "r" (values.odd_columns.data()),
[mlindex] "r" (mask.even_columns.data()),
[mrindex] "r" (mask.odd_columns.data()),
[zero] "r" (zero),
"m"(const_cast<vector_type &>(values.even_columns)),
"m"(const_cast<vector_type &>(values.odd_columns)),
"m"(const_cast<vector_type &>(mask.even_columns)),
"m"(const_cast<vector_type &>(mask.odd_columns))
: /* no clobber */
// clang-format on
);
}
/**
* Read a complete row from the synapse memory or CADC serially via omnibus.
......@@ -119,14 +178,18 @@ inline vector_row_t get_row_via_omnibus(size_t const row, uint32_t const base)
{
vector_row_t ret;
constexpr size_t num_words_per_row = dls_num_columns / sizeof(omnibus_word_t);
for (size_t i = 0; i < num_words_per_row; ++i) {
omnibus_word_t const value = omnibus_read(base + i + row * num_words_per_row);
uint8_t const* ptr = reinterpret_cast<uint8_t const*>(&value);
auto& columns = (i < num_words_per_row / 2) ? ret.even_columns : ret.odd_columns;
for (size_t j = 0; j < sizeof(omnibus_word_t); ++j) {
columns[(i * sizeof(omnibus_word_t) + j) % columns.size()] = ptr[j];
constexpr size_t num_words_per_vector = dls_vector_size / sizeof(omnibus_word_t);
auto const get_vector_via_omnibus = [](size_t const row, uint32_t const base, auto& columns) {
for (size_t i = 0; i < num_words_per_row / 2; ++i) {
omnibus_word_t const value = omnibus_read(base + i + row * num_words_per_row);
uint8_t const* ptr = reinterpret_cast<uint8_t const*>(&value);
for (size_t j = 0; j < sizeof(omnibus_word_t); ++j) {
columns[i * sizeof(omnibus_word_t) + j] = ptr[j];
}
}
}
};
get_vector_via_omnibus(row, base, ret.even_columns);
get_vector_via_omnibus(row, base + num_words_per_vector, ret.odd_columns);
return ret;
}
......
......@@ -114,6 +114,9 @@ def get_special_binaries(dls_version: str) -> Set[PpuHwTest]:
PpuHwTest(
join(TEST_BINARY_PATH, f"test_cadc_{dls_version}.bin"),
expected_exit_code=0 if simulation is not None else 1),
PpuHwTest(
join(TEST_BINARY_PATH, f"test_synram_{dls_version}.bin"),
timeout=int(2e6)),
})
return test_list
#include <array>
#include <stddef.h>
#include <stdint.h>
#include "libnux/dls.h"
#include "libnux/location.h"
#include "libnux/omnibus.h"
#include "libnux/sync.h"
#include "libnux/unittest.h"
#include "libnux/vector.h"
// weight value which is the same value than written, when read from Omnibus
constexpr uint8_t invariant_weight = 32;
/**
* Write single byte via vector unit, all other bytes are disabled via byte enables.
*/
void write_byte_vector_unit(uint32_t row, size_t index, uint8_t value)
{
vector_row_t values;
// only one byte written -> other byte don't need initialization
values[index] = value;
vector_row_t mask;
mask.fill(0);
mask[index] = 1;
sync();
set_row_via_vector_masked(values, mask, row, dls_weight_base);
}
void start(void)
{
libnux_test_init();
libnux::PPUOnDLS ppu;
bool const success = get_location(ppu);
if (!success) {
exit(1);
}
uint32_t synram_base =
(ppu == libnux::PPUOnDLS::top) ? synram_top_base_address : synram_bottom_base_address;
// only use first row for now
constexpr uint32_t row = 0;
// zero columns
vector_row_t zero;
zero.fill(0);
set_row_via_vector(zero, row, dls_weight_base);
libnux_testcase_begin("synram fill linear");
// write one weight at ascending index via byte-enables with a stride of 13,
// which should essentially lead to filling up of the weight matrix like
// t
// | 32 0 ... 0 0 0 ...
// v 32 0 ... 0 32 0 ...
// ...
vector_row_t expectation;
expectation.fill(0);
for (size_t column = 0; column < dls_num_columns; column += 13) {
write_byte_vector_unit(row, column, invariant_weight);
expectation[column] = invariant_weight;
// read-out via Omnibus
vector_row_t values = get_row_via_omnibus(row, synram_base);
// Compare to expectation from Omnibus access
libnux_test_equal(values, expectation);
}
// Test cornercase of last column
constexpr size_t last_column = dls_num_columns - 1;
write_byte_vector_unit(row, last_column, invariant_weight);
expectation[last_column] = invariant_weight;
vector_row_t values = get_row_via_omnibus(row, synram_base);
libnux_test_equal(values, expectation);
libnux_testcase_end();
libnux_test_summary();
libnux_test_shutdown();
}
......@@ -174,6 +174,7 @@ def build(bld):
program_list += [
'test/test_cadc.cpp',
'test/test_fpga_memory_vector_access.cpp',
'test/test_synram.cpp',
]
for program in program_list:
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment