CMSIS NN: Fix data type for bias of s8 conv functions
Data type was int8 and it is corrected to int32.
diff --git a/CMSIS/NN/Include/arm_nnfunctions.h b/CMSIS/NN/Include/arm_nnfunctions.h
index f73244f..9cbac58 100644
--- a/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/CMSIS/NN/Include/arm_nnfunctions.h
@@ -135,7 +135,7 @@
* @param[in] pad_y padding along height
* @param[in] stride_x convolution stride x
* @param[in] stride_y convolution stride y
- * @param[in] bias pointer to per output channel bias. Range: int8
+ * @param[in] bias pointer to per output channel bias. Range: int32
* @param[in,out] output pointer to output tensor. format: [H, W, out_ch]
* @param[in] output_shift pointer to per output channel requantization shift parameter.
* @param[in] output_mult pointer to per output channel requantization multiplier parameter.
@@ -168,7 +168,7 @@
const uint16_t pad_y,
const uint16_t stride_x,
const uint16_t stride_y,
- const q7_t *bias,
+ const int32_t *bias,
q7_t *output,
const int32_t *output_shift,
const int32_t *output_mult,
@@ -457,7 +457,7 @@
* @param[in] pad_y padding size y
* @param[in] stride_x convolution stride x
* @param[in] stride_y convolution stride y
- * @param[in] bias pointer to bias
+ * @param[in] bias pointer to per channel bias. Range : int32
* @param[in,out] output pointer to output tensor. Format: [H, W, out_ch]
* @param[in] output_shift pointer to per output channel requantization shift parameter.
* @param[in] output_mult pointer to per output channel requantization multiplier parameter.
@@ -492,7 +492,7 @@
const uint16_t pad_y,
const uint16_t stride_x,
const uint16_t stride_y,
- const q7_t *bias,
+ const int32_t *bias,
q7_t *output,
const int32_t *output_shift,
const int32_t *output_mult,
@@ -758,7 +758,7 @@
* @param[in] pad_y padding along height
* @param[in] stride_x convolution stride along width
* @param[in] stride_y convolution stride along height
- * @param[in] bias pointer to per output channel bias. Range: int8
+ * @param[in] bias pointer to per output channel bias. Range: int32
* @param[in,out] output pointer to output tensor. Format: [H, W, out_ch]
* @param[in] output_shift pointer to per output channel requantization shift parameter.
* @param[in] output_mult pointer to per output channel requantization multiplier parameter.
@@ -794,7 +794,7 @@
const uint16_t pad_y,
const uint16_t stride_x,
const uint16_t stride_y,
- const q7_t *bias,
+ const int32_t *bias,
q7_t *output,
const int32_t *output_shift,
const int32_t *output_mult,
@@ -860,7 +860,7 @@
const uint16_t pad_y,
const uint16_t stride_x,
const uint16_t stride_y,
- const q7_t *bias,
+ const int32_t *bias,
q7_t *output,
const int32_t *output_shift,
const int32_t *output_mult,
@@ -1137,7 +1137,7 @@
* @param[in] activation_min minimum value to clamp the output to. Range : int8
* @param[in] activation_max maximum value to clamp the output to. Range : int8
* @param[in] num_col_a number of columns of A
- * @param[in] output_bias per output channel bias
+ * @param[in] output_bias per output channel bias. Range : int32
* @param[in,out] out_0 pointer to output
* @return The function returns one of the two
* 1. The incremented output pointer for a successful operation or
@@ -1157,7 +1157,7 @@
const int16_t activation_min,
const int16_t activation_max,
const uint16_t num_col_a,
- const q7_t *const output_bias,
+ const int32_t *const output_bias,
q7_t *out_0);
/**
@@ -1177,7 +1177,7 @@
const int16_t activation_min,
const int16_t activation_max,
const uint16_t num_col_a,
- const q7_t *const output_bias,
+ const int32_t *const output_bias,
q7_t *out_0);
/**
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
index 2f5ebb7..89f1976 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
@@ -61,7 +61,7 @@
const uint16_t pad_y,
const uint16_t stride_x,
const uint16_t stride_y,
- const q7_t *bias,
+ const int32_t *bias,
q7_t *output,
const int32_t *output_shift,
const int32_t *output_mult,
@@ -124,7 +124,7 @@
const q7_t *ker_a = kernel;
for (i_ch_out = 0; i_ch_out < output_ch; i_ch_out++)
{
- q31_t sum = (q31_t)bias[i_ch_out];
+ q31_t sum = bias[i_ch_out];
/* Point to the beginning of the im2col buffer where the input is available as a rearranged column */
q15_t *ip_as_col = buffer_a;
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c
index be5f16a..5966297 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c
@@ -60,7 +60,7 @@
const uint16_t pad_y,
const uint16_t stride_x,
const uint16_t stride_y,
- const q7_t *bias,
+ const int32_t *bias,
q7_t *output,
const int32_t *output_shift,
const int32_t *output_mult,
@@ -134,7 +134,7 @@
for (i = 0; i < output_ch; i++)
{
/* Load the accumulator with bias first */
- q31_t sum = (q31_t)bias[i];
+ q31_t sum = bias[i];
/* Point to the beginning of the im2col buffer where the input is available as a rearranged column */
q15_t *ip_as_col = buffer_a;
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
index 7a224a7..02e4164 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
@@ -60,7 +60,7 @@
const uint16_t pad_y,
const uint16_t stride_x,
const uint16_t stride_y,
- const q7_t *bias,
+ const int32_t *bias,
q7_t *output,
const int32_t *output_shift,
const int32_t *output_mult,
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
index 114bcf7..0b80f5b 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
@@ -60,7 +60,7 @@
const uint16_t pad_y,
const uint16_t stride_x,
const uint16_t stride_y,
- const q7_t *bias,
+ const int32_t *bias,
q7_t *output,
const int32_t *output_shift,
const int32_t *output_mult,
@@ -91,7 +91,7 @@
int16_t i_ker_y, i_ker_x;
q15_t *const col_buffer_start = buffer_a;
q15_t *col_buffer = col_buffer_start;
- const q7_t *const bias_start_pos = bias;
+ const int32_t *const bias_start_pos = bias;
const q31_t *const out_mult_start_pos = output_mult;
const q31_t *const out_shift_start_pos = output_shift;
uint16_t row_count;
@@ -151,10 +151,10 @@
while (row_count)
{
- q31_t sum = (q31_t)(*bias++);
- q31_t sum_2 = (q31_t)(*bias++);
- q31_t sum_3 = (q31_t)(*bias++);
- q31_t sum_4 = (q31_t)(*bias++);
+ q31_t sum = *bias++;
+ q31_t sum_2 = *bias++;
+ q31_t sum_3 = *bias++;
+ q31_t sum_4 = *bias++;
uint16_t col_count = (kernel_x * kernel_y) / 2;
q15_t *col_pos = col_buffer_start + row_shift;
@@ -255,7 +255,7 @@
{
q15_t *col_pos = col_buffer_start + row_shift;
const q7_t *row_pos = kernel + row_shift;
- q31_t sum = (q31_t)*bias++;
+ q31_t sum = *bias++;
const uint16_t col_count = (kernel_x * kernel_y);
row_shift += 1;
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c
index a299436..314a579 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c
@@ -45,13 +45,13 @@
const int16_t activation_min,
const int16_t activation_max,
const uint16_t num_col_a,
- const q7_t *const output_bias,
+ const int32_t *const output_bias,
q7_t *out_0)
{
#if defined(ARM_MATH_LOOPUNROLL) && defined(ARM_MATH_DSP)
/* set up the second output pointers */
q7_t *out_1 = out_0 + output_ch;
- const q7_t *bias = output_bias;
+ const int32_t *bias = output_bias;
uint16_t row_count = output_ch / 2;
const q7_t *ip_a0 = input_a;
@@ -66,10 +66,10 @@
const q7_t *ip_a1 = ip_a0 + num_col_a;
/* Init accumulator with bias for channel N and N + 1 */
- q31_t ch_0_out_0 = (q31_t)*bias;
- q31_t ch_0_out_1 = (q31_t)*bias++;
- q31_t ch_1_out_0 = (q31_t)*bias;
- q31_t ch_1_out_1 = (q31_t)*bias++;
+ q31_t ch_0_out_0 = *bias;
+ q31_t ch_0_out_1 = *bias++;
+ q31_t ch_1_out_0 = *bias;
+ q31_t ch_1_out_1 = *bias++;
uint16_t col_count = num_col_a / 4;
/* accumulate over the vector */
@@ -153,8 +153,8 @@
const q15_t *ip_b1 = ip_b0 + num_col_a;
/* load the bias */
- q31_t ch_0_out_0 = (q31_t)*bias;
- q31_t ch_0_out_1 = (q31_t)*bias++;
+ q31_t ch_0_out_0 = *bias;
+ q31_t ch_0_out_1 = *bias++;
uint16_t col_count = num_col_a >> 2;
while (col_count)
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c
index 75b1d6f..4708261 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c
@@ -49,13 +49,13 @@
const int16_t activation_min,
const int16_t activation_max,
const uint16_t num_col_a,
- const q7_t *const output_bias,
+ const int32_t *const output_bias,
q7_t *out_0)
{
#if defined(ARM_MATH_LOOPUNROLL) && defined(ARM_MATH_DSP)
/* set up the second output pointers */
q7_t *out_1 = out_0 + output_ch;
- const q7_t *bias = output_bias;
+ const int32_t *bias = output_bias;
uint16_t row_count = output_ch / 2;
const q7_t *ip_a0 = input_a;
@@ -70,10 +70,10 @@
const q7_t *ip_a1 = ip_a0 + num_col_a;
/* Init accumulator with bias for channel N and N + 1 */
- q31_t ch_0_out_0 = (q31_t)*bias;
- q31_t ch_0_out_1 = (q31_t)*bias++;
- q31_t ch_1_out_0 = (q31_t)*bias;
- q31_t ch_1_out_1 = (q31_t)*bias++;
+ q31_t ch_0_out_0 = *bias;
+ q31_t ch_0_out_1 = *bias++;
+ q31_t ch_1_out_0 = *bias;
+ q31_t ch_1_out_1 = *bias++;
uint16_t col_count = num_col_a / 4;
/* accumulate over the vector */
@@ -157,8 +157,8 @@
const q15_t *ip_b1 = ip_b0 + num_col_a;
/* load the bias */
- q31_t ch_0_out_0 = (q31_t)*bias;
- q31_t ch_0_out_1 = (q31_t)*bias++;
+ q31_t ch_0_out_0 = *bias;
+ q31_t ch_0_out_1 = *bias++;
uint16_t col_count = num_col_a >> 2;
while (col_count)