@@ -23,26 +23,27 @@ struct Rv32BaseAluRecord {
2323};
2424
2525__global__ void alu_tracegen (
26- Fp *d_trace,
27- size_t height,
26+ Fp *d_trace, // can be apc trace
27+ size_t height, // can be apc height
2828 DeviceBufferConstView<Rv32BaseAluRecord> d_records,
2929 uint32_t *d_range_checker_ptr,
3030 size_t range_checker_bins,
3131 uint32_t *d_bitwise_lookup_ptr,
3232 size_t bitwise_num_bits,
3333 uint32_t timestamp_max_bits,
3434 // Fp *d_apc_trace,
35- uint32_t *subs // same length as dummy width
36- // size_t width, // dummy width
35+ uint32_t *subs, // same length as dummy width
36+ uint32_t calls_per_apc_row, // 1 for non-apc
37+ size_t width // dummy width
3738 // uint32_t *apc_row_index, // dummy row mapping to apc row same length as d_records
3839) {
3940 uint32_t idx = blockIdx .x * blockDim .x + threadIdx .x ;
40- RowSlice row (d_trace + idx, height);
41+ RowSliceNew row (d_trace + idx / calls_per_apc_row , height, 0 , 0 ); // we need to slice to the correct APC row, but if non-APC it's dividing by 1 and therefore the same idx
4142 if (idx < d_records.len ()) {
4243 auto const &rec = d_records[idx];
4344 // RowSlice apc_row(d_apc_trace + apc_row_index[idx], height);
4445 // auto const sub = subs[idx * width]; // offset the subs to the corresponding dummy row
45- uint32_t *sub = subs;
46+ uint32_t *sub = & subs[(idx % calls_per_apc_row) * width]; // dummy width
4647
4748 Rv32BaseAluAdapter adapter (
4849 VariableRangeChecker (d_range_checker_ptr, range_checker_bins),
@@ -52,7 +53,7 @@ __global__ void alu_tracegen(
5253 adapter.fill_trace_row_new (row, rec.adapter , sub);
5354
5455 Rv32BaseAluCore core (BitwiseOperationLookup (d_bitwise_lookup_ptr, bitwise_num_bits));
55- core.fill_trace_row_new (row.slice_from (COL_INDEX (Rv32BaseAluCols, core) - number_of_gaps_in (sub, sizeof (Rv32BaseAluCols<uint8_t >))), rec.core , sub);
56+ core.fill_trace_row_new (row.slice_from (COL_INDEX (Rv32BaseAluCols, core), number_of_gaps_in (sub, sizeof (Rv32BaseAluCols<uint8_t >))), rec.core , sub);
5657 } else {
5758 // TODO: use APC width if APC
5859 row.fill_zero (0 , sizeof (Rv32BaseAluCols<uint8_t >));
@@ -70,7 +71,8 @@ extern "C" int _alu_tracegen(
7071 size_t bitwise_num_bits,
7172 uint32_t timestamp_max_bits,
7273 // Fp *d_apc_trace,
73- uint32_t *subs // same length as dummy width
74+ uint32_t *subs, // same length as dummy width
75+ uint32_t calls_per_apc_row // 1 for non-apc
7476 // uint32_t *apc_row_index, // dummy row mapping to apc row same length as d_records
7577) {
7678 assert ((height & (height - 1 )) == 0 );
@@ -87,8 +89,9 @@ extern "C" int _alu_tracegen(
8789 bitwise_num_bits,
8890 timestamp_max_bits,
8991 // Fp *d_apc_trace,
90- subs // same length as dummy width
91- // size_t width, // dummy width
92+ subs, // same length as dummy width
93+ calls_per_apc_row, // 1 for non-apc
94+ width // dummy width
9295 // uint32_t *apc_row_index, // dummy row mapping to apc row same length as d_records
9396 );
9497 return CHECK_KERNEL ();
0 commit comments