Convolution

int16_t *riscv_nn_mat_mult_kernel_s16(const int8_t *input_a, const int16_t *input_b, const int32_t output_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t activation_min, const int32_t activation_max, const int32_t num_col_a, const nmsis_nn_bias_data *const bias_data, int16_t *out_0)

riscv_nmsis_nn_status riscv_nn_depthwise_conv_nt_t_padded_s8(const int8_t *lhs, const int8_t *rhs, const int32_t input_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out)

int16_t *riscv_nn_depthwise_conv_nt_t_s16(const int16_t *lhs, const int8_t *rhs, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int64_t *const output_bias, int16_t *out)

riscv_nmsis_nn_status riscv_nn_depthwise_conv_nt_t_s4(const int8_t *lhs, const int8_t *rhs, const int32_t input_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out)

riscv_nmsis_nn_status riscv_nn_depthwise_conv_nt_t_s8(const int8_t *lhs, const int8_t *rhs, const int32_t input_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out)

riscv_nmsis_nn_status riscv_nn_mat_mul_core_1x_s4(int32_t row_elements, const int32_t skipped_row_elements, const int8_t *row_base_ref, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)

riscv_nmsis_nn_status riscv_nn_mat_mul_core_1x_s8(int32_t row_elements, const int32_t skipped_row_elements, const int8_t *row_base_ref, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)

int8_t *riscv_nn_mat_mul_core_4x_s8(const int32_t row_elements, const int32_t offset, const int8_t *row_base, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)

riscv_nmsis_nn_status riscv_nn_mat_mult_nt_interleaved_t_even_s4(const int8_t *lhs, const int8_t *packed_rhs, const int32_t *bias, int8_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, const int32_t lhs_cols_offset)

riscv_nmsis_nn_status riscv_nn_mat_mult_nt_t_s16(const int16_t *lhs, const int8_t *rhs, const nmsis_nn_bias_data *bias_data, int16_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t activation_min, const int32_t activation_max)

riscv_nmsis_nn_status riscv_nn_mat_mult_nt_t_s4(const int8_t *lhs, const int8_t *packed_rhs, const int32_t *bias, int8_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, const int32_t lhs_cols_offset)

riscv_nmsis_nn_status riscv_nn_mat_mult_nt_t_s8(const int8_t *lhs, const int8_t *rhs, const int32_t *bias, int8_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, const int32_t row_address_offset, const int32_t lhs_cols_offset)

riscv_nmsis_nn_status riscv_nn_mat_mult_nt_t_s8_s32(const int8_t *lhs, const int8_t *rhs, int32_t *dst, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_idx_offset)

riscv_nmsis_nn_status riscv_nn_transpose_conv_row_s8_s32(const int8_t *lhs, const int8_t *rhs, int32_t *output_start, const int32_t output_index, const int32_t output_max, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t input_channels, const int32_t output_channels, const int32_t lhs_offset, const int32_t row_offset, const int32_t input_x, const int32_t stride_x, const int32_t skip_rows_top, const int32_t skip_rows_bottom)

group Convolution

Support functions for Convolution and DW Convolution

Functions

int16_t *riscv_nn_mat_mult_kernel_s16(const int8_t *input_a, const int16_t *input_b, const int32_t output_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t activation_min, const int32_t activation_max, const int32_t num_col_a, const nmsis_nn_bias_data *const bias_data, int16_t *out_0)

Matrix-multiplication function for convolution with per-channel requantization for 16 bits convolution.

Parameters:

input_a – [in] pointer to operand A
input_b – [in] pointer to operand B, always consists of 2 vectors.
output_ch – [in] number of rows of A
out_shift – [in] pointer to per output channel requantization shift parameter.
out_mult – [in] pointer to per output channel requantization multiplier parameter.
activation_min – [in] minimum value to clamp the output to. Range : int16
activation_max – [in] maximum value to clamp the output to. Range : int16
num_col_a – [in] number of columns of A
bias_data – [in] pointer to struct with bias vector. The length of this vector is equal to the number of output columns (or RHS input rows). The vector can be int32 or int64 indicated by a flag in the struct.
out_0 – [inout] pointer to output

Returns:

The function returns one of the two

The incremented output pointer for a successful operation or
NULL if implementation is not available.

This function does the matrix multiplication of weight matrix for all output channels with 2 columns from im2col and produces two elements/output_channel. The outputs are clamped in the range provided by activation min and max. Supported framework: TensorFlow Lite micro.

riscv_nmsis_nn_status riscv_nn_depthwise_conv_nt_t_padded_s8(const int8_t *lhs, const int8_t *rhs, const int32_t input_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out)

Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs.

Note

If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.

Output shift
Output multiplier
Output bias
rhs

Parameters:

lhs – [in] Input left-hand side matrix
rhs – [in] Input right-hand side matrix (transposed)
lhs_offset – [in] LHS matrix offset(input offset). Range: -127 to 128
active_ch – [in] Subset of total_ch processed
total_ch – [in] Number of channels in LHS/RHS
out_shift – [in] Per channel output shift. Length of vector is equal to number of channels
out_mult – [in] Per channel output multiplier. Length of vector is equal to number of channels
out_offset – [in] Offset to be added to the output values. Range: -127 to 128
activation_min – [in] Minimum value to clamp the output to. Range: int8
activation_max – [in] Maximum value to clamp the output to. Range: int8
row_x_col – [in] (row_dimension * col_dimension) of LHS/RHS matrix
output_bias – [in] Per channel output bias. Length of vector is equal to number of channels
out – [in] Output pointer

Returns:

The function returns one of the two

Updated output pointer if an implementation is available
NULL if no implementation is available.

int16_t *riscv_nn_depthwise_conv_nt_t_s16(const int16_t *lhs, const int8_t *rhs, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int64_t *const output_bias, int16_t *out)

Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. Dimensions are the same for lhs and rhs.

Note

If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.

Output shift
Output multiplier
Output bias
rhs

Parameters:

lhs – [in] Input left-hand side matrix
rhs – [in] Input right-hand side matrix (transposed)
num_ch – [in] Number of channels in LHS/RHS
out_shift – [in] Per channel output shift. Length of vector is equal to number of channels.
out_mult – [in] Per channel output multiplier. Length of vector is equal to number of channels.
activation_min – [in] Minimum value to clamp the output to. Range: int8
activation_max – [in] Maximum value to clamp the output to. Range: int8
row_x_col – [in] (row_dimension * col_dimension) of LHS/RHS matrix
output_bias – [in] Per channel output bias. Length of vector is equal to number of channels.
out – [in] Output pointer

Returns:

The function returns one of the two

Updated output pointer if an implementation is available
NULL if no implementation is available.

riscv_nmsis_nn_status riscv_nn_depthwise_conv_nt_t_s4(const int8_t *lhs, const int8_t *rhs, const int32_t input_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out)

Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. rhs consists of packed int4 data. Dimensions are the same for lhs and rhs.

Note

If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.

Output shift
Output multiplier
Output bias
rhs

Parameters:

lhs – [in] Input left-hand side matrix
rhs – [in] Input right-hand side matrix (transposed). Consists of int4 data packed in an int8 buffer.
lhs_offset – [in] LHS matrix offset(input offset). Range: -127 to 128
active_ch – [in] Subset of total_ch processed
total_ch – [in] Number of channels in LHS/RHS
out_shift – [in] Per channel output shift. Length of vector is equal to number of channels.
out_mult – [in] Per channel output multiplier. Length of vector is equal to number of channels.
out_offset – [in] Offset to be added to the output values. Range: -127 to 128
activation_min – [in] Minimum value to clamp the output to. Range: int8
activation_max – [in] Maximum value to clamp the output to. Range: int8
row_x_col – [in] (row_dimension * col_dimension) of LHS/RHS matrix
output_bias – [in] Per channel output bias. Length of vector is equal to number of channels.
out – [in] Output pointer

Returns:

The function returns one of the two

Updated output pointer if an implementation is available
NULL if no implementation is available.

riscv_nmsis_nn_status riscv_nn_depthwise_conv_nt_t_s8(const int8_t *lhs, const int8_t *rhs, const int32_t input_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out)

Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. Dimensions are the same for lhs and rhs.

Note

If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.

Output shift
Output multiplier
Output bias
rhs

Parameters:

lhs – [in] Input left-hand side matrix
rhs – [in] Input right-hand side matrix (transposed)
lhs_offset – [in] LHS matrix offset(input offset). Range: -127 to 128
active_ch – [in] Subset of total_ch processed
total_ch – [in] Number of channels in LHS/RHS
out_shift – [in] Per channel output shift. Length of vector is equal to number of channels.
out_mult – [in] Per channel output multiplier. Length of vector is equal to number of channels.
out_offset – [in] Offset to be added to the output values. Range: -127 to 128
activation_min – [in] Minimum value to clamp the output to. Range: int8
activation_max – [in] Maximum value to clamp the output to. Range: int8
row_x_col – [in] (row_dimension * col_dimension) of LHS/RHS matrix
output_bias – [in] Per channel output bias. Length of vector is equal to number of channels.
out – [in] Output pointer

Returns:

The function returns one of the two

Updated output pointer if an implementation is available
NULL if no implementation is available.

riscv_nmsis_nn_status riscv_nn_mat_mul_core_1x_s4(int32_t row_elements, const int32_t skipped_row_elements, const int8_t *row_base_ref, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)

General Vector by Matrix multiplication with requantization, storage of result and int4 weights packed into an int8 buffer.

Pseudo-code as int8 example. Int4 filter data will be unpacked. *output = 0 sum_col = 0 for (j = 0; j < out_ch; j++) for (i = 0; i < row_elements; i++) *output += row_base_ref[i] * col_base_ref[i] sum_col += col_base_ref[i] scale sum_col using quant_params and bias store result in ‘output’

Parameters:

row_elements – [in] number of row elements
skipped_row_elements – [in] number of row elements skipped due to padding. row_elements + skipped_row_elements = (kernel_x * kernel_y) * input_ch
row_base_ref – [in] pointer to row operand
col_base_ref – [in] pointer to col operand as packed int4
out_ch – [out] Number of output channels
conv_params – [in] Pointer to convolution parameters like offsets and activation values
quant_params – [in] Pointer to per-channel quantization parameters
bias – [in] Pointer to optional per-channel bias
output – [out] Pointer to output where int8 results are stored.

Returns:

The function performs matrix(row_base_ref) multiplication with vector(col_base_ref) and scaled result is stored in memory.

riscv_nmsis_nn_status riscv_nn_mat_mul_core_1x_s8(int32_t row_elements, const int32_t skipped_row_elements, const int8_t *row_base_ref, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)

General Vector by Matrix multiplication with requantization and storage of result.

Pseudo-code *output = 0 sum_col = 0 for (j = 0; j < out_ch; j++) for (i = 0; i < row_elements; i++) *output += row_base_ref[i] * col_base_ref[i] sum_col += col_base_ref[i] scale sum_col using quant_params and bias store result in ‘output’

Parameters:

row_elements – [in] number of row elements
skipped_row_elements – [in] number of row elements skipped due to padding. row_elements + skipped_row_elements = (kernel_x * kernel_y) * input_ch
row_base_ref – [in] pointer to row operand
col_base_ref – [in] pointer to col operand
out_ch – [out] Number of output channels
conv_params – [in] Pointer to convolution parameters like offsets and activation values
quant_params – [in] Pointer to per-channel quantization parameters
bias – [in] Pointer to optional per-channel bias
output – [out] Pointer to output where int8 results are stored.

Returns:

The function performs matrix(row_base_ref) multiplication with vector(col_base_ref) and scaled result is stored in memory.

int8_t *riscv_nn_mat_mul_core_4x_s8(const int32_t row_elements, const int32_t offset, const int8_t *row_base, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)

Matrix-multiplication with requantization & activation function for four rows and one column.

Compliant to TFLM int8 specification. MVE implementation only

Parameters:

row_elements – [in] number of row elements
offset – [in] offset between rows. Can be the same as row_elements. For e.g, in a 1x1 conv scenario with stride as 1.
row_base – [in] pointer to row operand
col_base – [in] pointer to col operand
out_ch – [in] Number of output channels
conv_params – [in] Pointer to convolution parameters like offsets and activation values
quant_params – [in] Pointer to per-channel quantization parameters
bias – [in] Pointer to per-channel bias
output – [out] Pointer to output where int8 results are stored.

Returns:

The function returns the updated output pointer or NULL if implementation is not available.

riscv_nmsis_nn_status riscv_nn_mat_mult_nt_interleaved_t_even_s4(const int8_t *lhs, const int8_t *packed_rhs, const int32_t *bias, int8_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, const int32_t lhs_cols_offset)

General Matrix-multiplication function with per-channel requantization. This function assumes:

LHS input matrix NOT transposed (nt)
RHS input matrix transposed (t)
RHS is int8 packed with 2x int4
LHS is int8
LHS/RHS input columns must be even numbered
LHS must be interleaved. Compare to riscv_nn_mat_mult_nt_t_s4 where LHS is not interleaved.

Note

This operation also performs the broadcast bias addition before the requantization

Parameters:

lhs – [in] Pointer to the LHS input matrix
rhs – [in] Pointer to the RHS input matrix
bias – [in] Pointer to the bias vector. The length of this vector is equal to the number of output columns (or RHS input rows)
dst – [out] Pointer to the output matrix with “m” rows and “n” columns
dst_multipliers – [in] Pointer to the multipliers vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
dst_shifts – [in] Pointer to the shifts vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
lhs_rows – [in] Number of LHS input rows
rhs_rows – [in] Number of RHS input rows
rhs_cols – [in] Number of LHS/RHS input columns. Note this must be even.
lhs_offset – [in] Offset to be applied to the LHS input value
dst_offset – [in] Offset to be applied the output result
activation_min – [in] Minimum value to clamp down the output. Range : int8
activation_max – [in] Maximum value to clamp up the output. Range : int8
lhs_cols_offset – [in] Column offset between subsequent lhs_rows

Returns:

The function returns RISCV_NMSIS_NN_SUCCESS

riscv_nmsis_nn_status riscv_nn_mat_mult_nt_t_s16(const int16_t *lhs, const int8_t *rhs, const nmsis_nn_bias_data *bias_data, int16_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t activation_min, const int32_t activation_max)

General Matrix-multiplication function with per-channel requantization and int16 input (LHS) and output. This function assumes:

LHS input matrix NOT transposed (nt)
RHS input matrix transposed (t)

MVE implementation only.

Note

This operation also performs the broadcast bias addition before the requantization

Parameters:

lhs – [in] Pointer to the LHS input matrix
rhs – [in] Pointer to the RHS input matrix
bias_data – [in] Pointer to struct with bias vector. The length of this vector is equal to the number of output columns (or RHS input rows). The vector can be int32 or int64 indicated by a flag in the struct.
dst – [out] Pointer to the output matrix with “m” rows and “n” columns
dst_multipliers – [in] Pointer to the multipliers vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
dst_shifts – [in] Pointer to the shifts vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
lhs_rows – [in] Number of LHS input rows
rhs_rows – [in] Number of RHS input rows
rhs_cols – [in] Number of LHS/RHS input columns
activation_min – [in] Minimum value to clamp down the output. Range : int16
activation_max – [in] Maximum value to clamp up the output. Range : int16

Returns:

The function returns RISCV_NMSIS_NN_SUCCESS or RISCV_NMSIS_NN_NO_IMPL_ERROR if not for MVE

riscv_nmsis_nn_status riscv_nn_mat_mult_nt_t_s4(const int8_t *lhs, const int8_t *packed_rhs, const int32_t *bias, int8_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, const int32_t lhs_cols_offset)

General Matrix-multiplication function with per-channel requantization. This function assumes:

LHS input matrix NOT transposed (nt)
RHS input matrix transposed (t)
RHS is int8 packed with 2x int4
LHS is int8

Note

This operation also performs the broadcast bias addition before the requantization

Parameters:

lhs – [in] Pointer to the LHS input matrix
rhs – [in] Pointer to the RHS input matrix
bias – [in] Pointer to the bias vector. The length of this vector is equal to the number of output columns (or RHS input rows)
dst – [out] Pointer to the output matrix with “m” rows and “n” columns
dst_multipliers – [in] Pointer to the multipliers vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
dst_shifts – [in] Pointer to the shifts vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
lhs_rows – [in] Number of LHS input rows
rhs_rows – [in] Number of RHS input rows
rhs_cols – [in] Number of LHS/RHS input columns
lhs_offset – [in] Offset to be applied to the LHS input value
dst_offset – [in] Offset to be applied the output result
activation_min – [in] Minimum value to clamp down the output. Range : int8
activation_max – [in] Maximum value to clamp up the output. Range : int8
lhs_cols_offset – [in] Column offset between subsequent lhs_rows

Returns:

The function returns RISCV_NMSIS_NN_SUCCESS

riscv_nmsis_nn_status riscv_nn_mat_mult_nt_t_s8(const int8_t *lhs, const int8_t *rhs, const int32_t *bias, int8_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, const int32_t row_address_offset, const int32_t lhs_cols_offset)

General Matrix-multiplication function with per-channel requantization. This function assumes:

LHS input matrix NOT transposed (nt)
RHS input matrix transposed (t)

Note

This operation also performs the broadcast bias addition before the requantization

Parameters:

lhs – [in] Pointer to the LHS input matrix
rhs – [in] Pointer to the RHS input matrix
bias – [in] Pointer to the bias vector. The length of this vector is equal to the number of output columns (or RHS input rows)
dst – [out] Pointer to the output matrix with “m” rows and “n” columns
dst_multipliers – [in] Pointer to the multipliers vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
dst_shifts – [in] Pointer to the shifts vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
lhs_rows – [in] Number of LHS input rows
rhs_rows – [in] Number of RHS input rows
rhs_cols – [in] Number of LHS/RHS input columns
lhs_offset – [in] Offset to be applied to the LHS input value
dst_offset – [in] Offset to be applied the output result
activation_min – [in] Minimum value to clamp down the output. Range : int8
activation_max – [in] Maximum value to clamp up the output. Range : int8
row_address_offset – [in] Address offset between rows in output.
lhs_cols_offset – [in] Column offset between subsequent lhs_rows

Returns:

The function returns RISCV_NMSIS_NN_SUCCESS

riscv_nmsis_nn_status riscv_nn_mat_mult_nt_t_s8_s32(const int8_t *lhs, const int8_t *rhs, int32_t *dst, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_idx_offset)

General Matrix-multiplication function with int8 input and int32 output. This function assumes:

LHS input matrix NOT transposed (nt)
RHS input matrix transposed (t)

Note

Dst/output buffer must be zeroed out before calling this function.

Parameters:

lhs – [in] Pointer to the LHS input matrix
rhs – [in] Pointer to the RHS input matrix
dst – [out] Pointer to the output matrix with “m” rows and “n” columns
lhs_rows – [in] Number of LHS input rows
rhs_rows – [in] Number of LHS input columns/RHS input rows
rhs_cols – [in] Number of RHS input columns
lhs_offset – [in] Offset to be applied to the LHS input value
dst_idx_offset – [in] Offset between subsequent output results

Returns:

The function returns RISCV_NMSIS_NN_SUCCESS

riscv_nmsis_nn_status riscv_nn_transpose_conv_row_s8_s32(const int8_t *lhs, const int8_t *rhs, int32_t *output_start, const int32_t output_index, const int32_t output_max, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t input_channels, const int32_t output_channels, const int32_t lhs_offset, const int32_t row_offset, const int32_t input_x, const int32_t stride_x, const int32_t skip_rows_top, const int32_t skip_rows_bottom)

Row of s8 scalars multiplicated with a s8 matrix ad accumulated into a s32 rolling scratch buffer. Helpfunction for transposed convolution.

Note

Rolling buffer refers to how the function wraps around the scratch buffer, e.g. it starts writing at [output_start + output_index], writes to [output_start + output_max] and then continues at [output_start] again.

Parameters:

lhs – [in] Input left-hand side scalars
rhs – [in] Input right-hand side matrix
output_start – [out] Output buffer start
output_index – [in] Output buffer current index
output_max – [in] Output buffer size
rhs_rows – [in] Number of rows in rhs matrix
rhs_cols – [in] Number of columns in rhs matrix
input_channels – [in] Number of input channels
output_channels – [in] Number of output channels
lhs_offset – [in] Offset added to lhs before multiplication
row_offset – [in] Address offset between each row of data output
input_x – [in] Length of lhs scalar row.
stride_x – [in] Address offset between each scalar-matrix multiplication result.
skip_row_top – [in] Skip rows on top of the filter, used for padding.
skip_row_bottom – [in] Skip rows in the bottom of the filter, used for padding.

Returns:

The function returns RISCV_NMSIS_NN_SUCCESS