NMSIS-NN
Version 1.3.1
NMSIS NN Software Library
|
Collection of convolution, depthwise convolution functions and their variants. More...
Modules | |
GetBufferSizeNNConv | |
Functions | |
riscv_nmsis_nn_status | riscv_convolve_1_x_n_s4 (const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data) |
1xn convolution for s4 weights More... | |
riscv_nmsis_nn_status | riscv_convolve_1_x_n_s8 (const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data) |
1xn convolution More... | |
riscv_nmsis_nn_status | riscv_convolve_1x1_HWC_q7_fast_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Fast Q7 version of 1x1 convolution (non-sqaure shape) More... | |
riscv_nmsis_nn_status | riscv_convolve_1x1_s4 (const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data) |
s4 version for 1x1 convolution with support for non-unity stride values More... | |
riscv_nmsis_nn_status | riscv_convolve_1x1_s4_fast (const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data) |
Fast s4 version for 1x1 convolution (non-square shape) More... | |
riscv_nmsis_nn_status | riscv_convolve_1x1_s8 (const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data) |
s8 version for 1x1 convolution with support for non-unity stride values More... | |
riscv_nmsis_nn_status | riscv_convolve_1x1_s8_fast (const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data) |
Fast s8 version for 1x1 convolution (non-square shape) More... | |
riscv_nmsis_nn_status | riscv_convolve_HWC_q15_basic (const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Basic Q15 convolution function. More... | |
riscv_nmsis_nn_status | riscv_convolve_HWC_q15_fast (const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Fast Q15 convolution function. More... | |
riscv_nmsis_nn_status | riscv_convolve_HWC_q15_fast_nonsquare (const q15_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Fast Q15 convolution function (non-sqaure shape) More... | |
riscv_nmsis_nn_status | riscv_convolve_HWC_q7_basic (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Basic Q7 convolution function. More... | |
riscv_nmsis_nn_status | riscv_convolve_HWC_q7_basic_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Basic Q7 convolution function (non-sqaure shape) More... | |
riscv_nmsis_nn_status | riscv_convolve_HWC_q7_fast (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Fast Q7 convolution function. More... | |
riscv_nmsis_nn_status | riscv_convolve_HWC_q7_fast_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Fast Q7 convolution function (non-sqaure shape) More... | |
riscv_nmsis_nn_status | riscv_convolve_HWC_q7_RGB (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Q7 convolution function for RGB image. More... | |
riscv_nmsis_nn_status | riscv_convolve_s16 (const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const nmsis_nn_bias_data *bias_data, const nmsis_nn_dims *output_dims, int16_t *output_data) |
Basic s16 convolution function. More... | |
riscv_nmsis_nn_status | riscv_convolve_s4 (const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *packed_filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data) |
Basic s4 convolution function. More... | |
riscv_nmsis_nn_status | riscv_convolve_s8 (const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data) |
Basic s8 convolution function. More... | |
riscv_nmsis_nn_status | riscv_convolve_wrapper_s16 (const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const nmsis_nn_bias_data *bias_data, const nmsis_nn_dims *output_dims, int16_t *output_data) |
s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in nmsis-nn to perform the convolution. More... | |
riscv_nmsis_nn_status | riscv_convolve_wrapper_s4 (const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data) |
s4 convolution layer wrapper function with the main purpose to call the optimal kernel available in nmsis-nn to perform the convolution. More... | |
riscv_nmsis_nn_status | riscv_convolve_wrapper_s8 (const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data) |
s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in nmsis-nn to perform the convolution. More... | |
riscv_nmsis_nn_status | riscv_depthwise_conv_3x3_s8 (const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output) |
Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on the input arguments(documented below). Refer riscv_depthwise_conv_s8() for function argument details. More... | |
riscv_nmsis_nn_status | riscv_depthwise_conv_fast_s16 (const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int64_t *bias, const nmsis_nn_dims *output_dims, int16_t *output) |
Optimized s16 depthwise convolution function with constraint that in_channel equals out_channel. Refer riscv_depthwise_conv_s16() for function argument details. More... | |
static void | __attribute__ ((unused)) |
static void | depthwise_conv_s16_generic_s16 (const int16_t *input, const uint16_t input_batches, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const int8_t *kernel, const uint16_t ch_mult, const uint16_t kernel_x, const uint16_t kernel_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int64_t *bias, int16_t *output, const int32_t *output_shift, const int32_t *output_mult, const uint16_t output_x, const uint16_t output_y, const int32_t output_activation_min, const int32_t output_activation_max, const uint16_t dilation_x, const uint16_t dilation_y) |
riscv_nmsis_nn_status | riscv_depthwise_conv_s16 (const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int64_t *bias, const nmsis_nn_dims *output_dims, int16_t *output) |
Basic s16 depthwise convolution function that doesn't have any constraints on the input dimensions. More... | |
static void | depthwise_conv_s4_generic (const int8_t *input, const int32_t input_batches, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const int8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *output, const int32_t *output_shift, const int32_t *output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max, const int32_t dilation_x, const int32_t dilation_y) |
riscv_nmsis_nn_status | riscv_depthwise_conv_s4 (const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output) |
Basic s4 depthwise convolution function that doesn't have any constraints on the input dimensions. More... | |
riscv_nmsis_nn_status | riscv_depthwise_conv_s4_opt (const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output) |
Optimized s4 depthwise convolution function with constraint that in_channel equals out_channel. Refer riscv_depthwise_conv_s4() for function argument details. More... | |
static void | depthwise_conv_s8_mult_4 (const int8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const int8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *output, const int32_t *output_shift, const int32_t *output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max) |
static void | depthwise_conv_s8_generic (const int8_t *input, const uint16_t input_batches, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const int8_t *kernel, const uint16_t output_ch, const uint16_t ch_mult, const uint16_t kernel_x, const uint16_t kernel_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, int8_t *output, const int32_t *output_shift, const int32_t *output_mult, const uint16_t output_x, const uint16_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max, const uint16_t dilation_x, const uint16_t dilation_y) |
riscv_nmsis_nn_status | riscv_depthwise_conv_s8 (const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output) |
Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions. More... | |
riscv_nmsis_nn_status | riscv_depthwise_conv_s8_opt (const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output) |
Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. Refer riscv_depthwise_conv_s8() for function argument details. More... | |
riscv_nmsis_nn_status | riscv_depthwise_conv_wrapper_s16 (const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input, const nmsis_nn_dims *filter_dims, const int8_t *filter, const nmsis_nn_dims *bias_dims, const int64_t *bias, const nmsis_nn_dims *output_dims, int16_t *output) |
Wrapper function to pick the right optimized s16 depthwise convolution function. More... | |
riscv_nmsis_nn_status | riscv_depthwise_conv_wrapper_s4 (const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *filter, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output) |
Wrapper function to pick the right optimized s4 depthwise convolution function. More... | |
riscv_nmsis_nn_status | riscv_depthwise_conv_wrapper_s8 (const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *filter, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output) |
Wrapper function to pick the right optimized s8 depthwise convolution function. More... | |
riscv_nmsis_nn_status | riscv_depthwise_separable_conv_HWC_q7 (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Q7 depthwise separable convolution function. More... | |
riscv_nmsis_nn_status | riscv_depthwise_separable_conv_HWC_q7_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Q7 depthwise separable convolution function (non-square shape) More... | |
riscv_nmsis_nn_status | riscv_transpose_conv_s8 (const nmsis_nn_context *ctx, const nmsis_nn_context *output_ctx, const nmsis_nn_transpose_conv_params *transpose_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data) |
Basic s8 transpose convolution function. More... | |
Collection of convolution, depthwise convolution functions and their variants.
The convolution is implemented in 2 steps: im2col and General Matrix Multiplication(GEMM)
im2col is a process of converting each patch of image data into a column. After im2col, the convolution is computed as matrix-matrix multiplication.
To reduce the memory footprint, the im2col is performed partially. Each iteration, only a few column (i.e., patches) are generated followed by GEMM.
|
static |
|
static |
|
static |
|
static |
|
static |
riscv_nmsis_nn_status riscv_convolve_1_x_n_s4 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_conv_params * | conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
1xn convolution for s4 weights
[in,out] | ctx | Function context that contains the additional buffer if required by the function. riscv_convolve_1_x_n_s4_get_buffer_size will return the buffer_size if required The caller is expected to clear the buffer, if applicable, for security reasons. |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension |
[in] | filter_data | Filter data pointer. Data type: int8 as packed int4 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_ARG_ERROR
if argument constraints fail. or, RISCV_NMSIS_NN_SUCCESS
on successful completion.riscv_nmsis_nn_status riscv_convolve_1_x_n_s8 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_conv_params * | conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
1xn convolution
[in,out] | ctx | Function context that contains the additional buffer if required by the function. riscv_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required The caller is expected to clear the buffer, if applicable, for security reasons. |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_ARG_ERROR
if argument constraints fail. or, RISCV_NMSIS_NN_SUCCESS
on successful completion.riscv_nmsis_nn_status riscv_convolve_1x1_HWC_q7_fast_nonsquare | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Fast Q7 version of 1x1 convolution (non-sqaure shape)
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimention x |
[in] | dim_im_in_y | input tensor dimention y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding size x |
[in] | padding_y | padding size y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
RISCV_NMSIS_NN_SIZE_MISMATCH
or RISCV_NMSIS_NN_SUCCESS
based on the outcome of size checking.This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1 and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise separable convolution.
This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2
[1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications https://arxiv.org/abs/1704.04861
riscv_nmsis_nn_status riscv_convolve_1x1_s4 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_conv_params * | conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
s4 version for 1x1 convolution with support for non-unity stride values
[in,out] | ctx | Function context that contains the additional buffer if required by the function. None is required by this function. |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] |
[in] | filter_data | Filter data pointer. Data type: int8 packed with 2x int4 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_ARG_ERROR
if argument constraints fail. or, RISCV_NMSIS_NN_SUCCESS
on successful completion.riscv_nmsis_nn_status riscv_convolve_1x1_s4_fast | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_conv_params * | conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
Fast s4 version for 1x1 convolution (non-square shape)
[in,out] | ctx | Function context that contains the additional buffer if required by the function. riscv_convolve_1x1_s4_fast_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons. |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] |
[in] | filter_data | Filter data pointer. Data type: int8 packed with 2x int4 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_ARG_ERROR
if argument constraints fail. or, RISCV_NMSIS_NN_SUCCESS
on successful completion.riscv_nmsis_nn_status riscv_convolve_1x1_s8 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_conv_params * | conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
s8 version for 1x1 convolution with support for non-unity stride values
[in,out] | ctx | Function context that contains the additional buffer if required by the function. None is required by this function. |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_ARG_ERROR
if argument constraints fail. or, RISCV_NMSIS_NN_SUCCESS
on successful completion.riscv_nmsis_nn_status riscv_convolve_1x1_s8_fast | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_conv_params * | conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
Fast s8 version for 1x1 convolution (non-square shape)
[in,out] | ctx | Function context that contains the additional buffer if required by the function. riscv_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons. |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_ARG_ERROR
if argument constraints fail. or, RISCV_NMSIS_NN_SUCCESS
on successful completion.[in,out] | ctx | Function context that contains the additional buffer if required by the function. riscv_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_SIZE_MISMATCH
if argument constraints fail. or, RISCV_NMSIS_NN_SUCCESS
on successful completion.riscv_nmsis_nn_status riscv_convolve_HWC_q15_basic | ( | const q15_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q15_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q15_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q15_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Basic Q15 convolution function.
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
RISCV_NMSIS_NN_SUCCESS
Buffer size:
bufferA size: ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
This basic version is designed to work for any input tensor and weight dimension.
riscv_nmsis_nn_status riscv_convolve_HWC_q15_fast | ( | const q15_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q15_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q15_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q15_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Fast Q15 convolution function.
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
RISCV_NMSIS_NN_SIZE_MISMATCH
or RISCV_NMSIS_NN_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in is multiple of 2
ch_im_out is multiple of 2
dim_im_out is a multiple of 2
riscv_nmsis_nn_status riscv_convolve_HWC_q15_fast_nonsquare | ( | const q15_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q15_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q15_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q15_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Fast Q15 convolution function (non-sqaure shape)
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimention x |
[in] | dim_im_in_y | input tensor dimention y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding size x |
[in] | padding_y | padding size y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
RISCV_NMSIS_NN_SIZE_MISMATCH
or RISCV_NMSIS_NN_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in is multiple of 2
ch_im_out is multiple of 2
riscv_nmsis_nn_status riscv_convolve_HWC_q7_basic | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Basic Q7 convolution function.
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
RISCV_NMSIS_NN_SUCCESS
Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
This basic version is designed to work for any input tensor and weight dimension.
riscv_nmsis_nn_status riscv_convolve_HWC_q7_basic_nonsquare | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Basic Q7 convolution function (non-sqaure shape)
Basic Q7 convolution function (non-square shape)
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimention x |
[in] | dim_im_in_y | input tensor dimention y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding size x |
[in] | padding_y | padding size y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
RISCV_NMSIS_NN_SUCCESS
riscv_nmsis_nn_status riscv_convolve_HWC_q7_fast | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Fast Q7 convolution function.
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
RISCV_NMSIS_NN_SIZE_MISMATCH
or RISCV_NMSIS_NN_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in is multiple of 4 ( because of the SIMD32 read and swap )
ch_im_out is multiple of 2 ( bacause 2x2 mat_mult kernel )
The im2col converts the Q7 tensor input into Q15 column, which is stored in bufferA. There is reordering happenning during this im2col process with riscv_q7_to_q15_reordered_no_shift. For every four elements, the second and third elements are swapped.
The computation kernel riscv_nn_mat_mult_kernel_q7_q15_reordered does the GEMM computation with the reordered columns.
To speed-up the determination of the padding condition, we split the computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}. This reduces the total number of boundary condition checks and improves the data copying performance.
riscv_nmsis_nn_status riscv_convolve_HWC_q7_fast_nonsquare | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Fast Q7 convolution function (non-sqaure shape)
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimention x |
[in] | dim_im_in_y | input tensor dimention y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding size x |
[in] | padding_y | padding size y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
RISCV_NMSIS_NN_SIZE_MISMATCH
or RISCV_NMSIS_NN_SUCCESS
based on the outcome of size checking.This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2
riscv_nmsis_nn_status riscv_convolve_HWC_q7_RGB | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Q7 convolution function for RGB image.
Q7 version of convolution for RGB image.
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
RISCV_NMSIS_NN_SIZE_MISMATCH
or RISCV_NMSIS_NN_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in equals 3
This kernel is written exclusively for convolution with ch_im_in equals 3. This applies on the first layer of CNNs which has input image with RGB format.
riscv_nmsis_nn_status riscv_convolve_s16 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_conv_params * | conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int16_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const nmsis_nn_bias_data * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int16_t * | output_data | ||
) |
Basic s16 convolution function.
[in,out] | ctx | Function context that contains the additional buffer if required by the function. riscv_convolve_s16_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer, if applicable, for security reasons. |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). conv_params->input_offset : Not used conv_params->output_offset : Not used |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int16 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Struct with optional bias data pointer. Bias data type can be int64 or int32 depending flag in struct. |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int16 |
RISCV_NMSIS_NN_SUCCESS
if successful or RISCV_NMSIS_NN_ARG_ERROR
if incorrect arguments or RISCV_NMSIS_NN_NO_IMPL_ERROR
riscv_nmsis_nn_status riscv_convolve_s4 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_conv_params * | conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
Basic s4 convolution function.
[in,out] | ctx | Function context that contains the additional buffer if required by the function. riscv_convolve_s4_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons. |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | filter_data | Packed Filter data pointer. Data type: int8 packed with 2x int4 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_SUCCESS
riscv_nmsis_nn_status riscv_convolve_s8 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_conv_params * | conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
Basic s8 convolution function.
[in,out] | ctx | Function context that contains the additional buffer if required by the function. riscv_convolve_s8_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer, if applicable, for security reasons. |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, CK] where HK, WK and CK are the spatial filter dimensions. CK != C_IN is used for grouped convolution, in which case the required conditions are C_IN = N * CK and C_OUT = N * M for N groups of size M. |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_SUCCESS
if successful or RISCV_NMSIS_NN_ARG_ERROR
if incorrect arguments or RISCV_NMSIS_NN_NO_IMPL_ERROR
riscv_nmsis_nn_status riscv_convolve_wrapper_s16 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_conv_params * | conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int16_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const nmsis_nn_bias_data * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int16_t * | output_data | ||
) |
s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in nmsis-nn to perform the convolution.
[in,out] | ctx | Function context that contains the additional buffer if required by the function. riscv_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required The caller is expected to clear the buffer, if applicable, for security reasons. |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). conv_params->input_offset : Not used conv_params->output_offset : Not used |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int16 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Struct with optional bias data pointer. Bias data type can be int64 or int32 depending flag in struct. |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int16 |
RISCV_NMSIS_NN_ARG_ERROR
if argument constraints fail. or, RISCV_NMSIS_NN_SUCCESS
on successful completion. riscv_nmsis_nn_status riscv_convolve_wrapper_s4 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_conv_params * | conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
s4 convolution layer wrapper function with the main purpose to call the optimal kernel available in nmsis-nn to perform the convolution.
[in,out] | ctx | Function context that contains the additional buffer if required by the function. riscv_convolve_wrapper_s4_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons. |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | filter_data | Filter data pointer. Data type: int8 packed with 2x int4 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_ARG_ERROR
if argument constraints fail. or, RISCV_NMSIS_NN_SUCCESS
on successful completion. riscv_nmsis_nn_status riscv_convolve_wrapper_s8 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_conv_params * | conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in nmsis-nn to perform the convolution.
[in,out] | ctx | Function context that contains the additional buffer if required by the function. riscv_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons. |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_ARG_ERROR
if argument constraints fail. or, RISCV_NMSIS_NN_SUCCESS
on successful completion. riscv_nmsis_nn_status riscv_depthwise_conv_3x3_s8 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_dw_conv_params * | dw_conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on the input arguments(documented below). Refer riscv_depthwise_conv_s8() for function argument details.
RISCV_NMSIS_NN_ARG_ERROR
- Unsupported dimension of tensorsRISCV_NMSIS_NN_SUCCESS
- Successful operationriscv_nmsis_nn_status riscv_depthwise_conv_fast_s16 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_dw_conv_params * | dw_conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int16_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int64_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int16_t * | output_data | ||
) |
Optimized s16 depthwise convolution function with constraint that in_channel equals out_channel. Refer riscv_depthwise_conv_s16() for function argument details.
RISCV_NMSIS_NN_ARG_ERROR
- ctx-buff == NULL and riscv_depthwise_conv_fast_s16_get_buffer_size() > 0 or input channel != output channel or ch_mult != 1RISCV_NMSIS_NN_SUCCESS
- Successful operation
riscv_nmsis_nn_status riscv_depthwise_conv_s16 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_dw_conv_params * | dw_conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int16_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int64_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int16_t * | output_data | ||
) |
Basic s16 depthwise convolution function that doesn't have any constraints on the input dimensions.
[in,out] | ctx | Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if an additional buffer is required. exists if additional memory is. The caller is expected to clear the buffer, if applicable, for security reasons. |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) conv_params->input_offset : Not used conv_params->output_offset : Not used |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] Batch argument N is not used. |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int64 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[in,out] | output_data | Output data pointer. Data type: int16 |
RISCV_NMSIS_NN_SUCCESS
riscv_nmsis_nn_status riscv_depthwise_conv_s4 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_dw_conv_params * | dw_conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | kernel, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output | ||
) |
Basic s4 depthwise convolution function that doesn't have any constraints on the input dimensions.
[in,out] | ctx | Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if an additional buffer is required exists if additional memory is. The caller is expected to clear the buffer ,if applicable, for security reasons. |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->input_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] Batch argument N is not used. |
[in] | input | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | kernel | Filter data pointer. Data type: int8_t packed 4-bit weights, e.g four sequential weights [0x1, 0x2, 0x3, 0x4] packed as [0x21, 0x43]. |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias | Bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[in,out] | output | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_SUCCESS
riscv_nmsis_nn_status riscv_depthwise_conv_s4_opt | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_dw_conv_params * | dw_conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
Optimized s4 depthwise convolution function with constraint that in_channel equals out_channel. Refer riscv_depthwise_conv_s4() for function argument details.
RISCV_NMSIS_NN_ARG_ERROR
- input channel != output channel or ch_mult != 1 RISCV_NMSIS_NN_SUCCESS
- Successful operationriscv_nmsis_nn_status riscv_depthwise_conv_s8 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_dw_conv_params * | dw_conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions.
[in,out] | ctx | Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if an additional buffer is required exists if additional memory is. The caller is expected to clear the buffer, if applicable, for security reasons. |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->input_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] Batch argument N is not used. |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[in,out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_SUCCESS
riscv_nmsis_nn_status riscv_depthwise_conv_s8_opt | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_dw_conv_params * | dw_conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. Refer riscv_depthwise_conv_s8() for function argument details.
RISCV_NMSIS_NN_ARG_ERROR
- input channel != output channel or ch_mult != 1 RISCV_NMSIS_NN_SUCCESS
- Successful operationriscv_nmsis_nn_status riscv_depthwise_conv_wrapper_s16 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_dw_conv_params * | dw_conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int16_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int64_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int16_t * | output_data | ||
) |
Wrapper function to pick the right optimized s16 depthwise convolution function.
[in,out] | ctx | Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if required. The caller is expected to clear the buffer, if applicable, for security reasons. |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : Not used Range of dw_conv_params->output_offset : Not used |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1. |
[in] | input_data | Input (activation) data pointer. Data type: int16 |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int64 |
[in] | output_dims | Output tensor dimensions. Format: [1, H, W, C_OUT] |
[in,out] | output_data | Output data pointer. Data type: int16 |
RISCV_NMSIS_NN_SUCCESS
- Successful completion.riscv_nmsis_nn_status riscv_depthwise_conv_wrapper_s4 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_dw_conv_params * | dw_conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
Wrapper function to pick the right optimized s4 depthwise convolution function.
[in,out] | ctx | Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if required. The caller is expected to clear the buffer ,if applicable, for security reasons. |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1. |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | filter_data | Filter data pointer. Data type: int8_t packed 4-bit weights, e.g four sequential weights [0x1, 0x2, 0x3, 0x4] packed as [0x21, 0x43]. |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [1, H, W, C_OUT] |
[in,out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_SUCCESS
- Successful completion.riscv_nmsis_nn_status riscv_depthwise_conv_wrapper_s8 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_dw_conv_params * | dw_conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
Wrapper function to pick the right optimized s8 depthwise convolution function.
[in,out] | ctx | Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if required. The caller is expected to clear the buffer, if applicable, for security reasons. |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1. |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [1, H, W, C_OUT] |
[in,out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_SUCCESS
- Successful completion.riscv_nmsis_nn_status riscv_depthwise_separable_conv_HWC_q7 | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Q7 depthwise separable convolution function.
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimension |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
RISCV_NMSIS_NN_SIZE_MISMATCH
or RISCV_NMSIS_NN_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in equals ch_im_out
Implementation: There are 3 nested loop here: Inner loop: calculate each output value with MAC instruction over an accumulator Mid loop: loop over different output channel Outer loop: loop over different output (x, y)
riscv_nmsis_nn_status riscv_depthwise_separable_conv_HWC_q7_nonsquare | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Q7 depthwise separable convolution function (non-square shape)
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimension x |
[in] | dim_im_in_y | input tensor dimension y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding sizes x |
[in] | padding_y | padding sizes y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
RISCV_NMSIS_NN_SIZE_MISMATCH
or RISCV_NMSIS_NN_SUCCESS
based on the outcome of size checking.This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is equal to ch_im_out
riscv_nmsis_nn_status riscv_transpose_conv_s8 | ( | const nmsis_nn_context * | ctx, |
const nmsis_nn_context * | output_ctx, | ||
const nmsis_nn_transpose_conv_params * | transpose_conv_params, | ||
const nmsis_nn_per_channel_quant_params * | quant_params, | ||
const nmsis_nn_dims * | input_dims, | ||
const int8_t * | input_data, | ||
const nmsis_nn_dims * | filter_dims, | ||
const int8_t * | filter_data, | ||
const nmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const nmsis_nn_dims * | output_dims, | ||
int8_t * | output_data | ||
) |
Basic s8 transpose convolution function.
[in,out] | ctx | Function context that contains the additional buffer if required by the function. riscv_transpose_conv_s8_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer, if applicable, for security reasons. |
[in,out] | output_ctx | Temporary scratch buffer. The size required size is: output width * output height * output channel * 4 The caller is expected to clear the buffer, if applicable, for security reasons. |
[in] | transpose_conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of transpose_conv_params->input_offset : [-127, 128] Range of transpose_conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each out channel. |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
RISCV_NMSIS_NN_ARG_ERROR
if argument constraints fail. or, RISCV_NMSIS_NN_SUCCESS
on successful completion.