CMSIS-NN: Add elementwise add/mul int16 and unit tests (#1419)
* CMSIS-NN: Add elementwise add/mul int16 and unit tests
Zero out offsets for add/mul s16
Co-authored-by: Annie Tallund <annie.tallund@arm.com> and Måns Nilsson <mans.nilsson@arm.com>
diff --git a/ARM.CMSIS.pdsc b/ARM.CMSIS.pdsc
index c1c9994..8a2af8c 100644
--- a/ARM.CMSIS.pdsc
+++ b/ARM.CMSIS.pdsc
@@ -18,6 +18,7 @@
- Support dilation for int8 depthwise convolution
- Support for int16 depthwise conv for reference implementation including dilation
- Support for int16 average and max pooling for reference implementation
+ - Support for elementwise add and mul int16 scalar version
CMSIS-RTOS2:
- RTX 5.5.4 (see revision history for details)
</release>
@@ -2849,7 +2850,9 @@
<file category="source" name="CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s16.c"/>
<file category="source" name="CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c"/>
<file category="source" name="CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c"/>
+ <file category="source" name="CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c"/>
<file category="source" name="CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c"/>
+ <file category="source" name="CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c"/>
<file category="source" name="CMSIS/NN/Source/ActivationFunctions/arm_relu6_s8.c"/>
<file category="source" name="CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c"/>
<file category="source" name="CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c"/>
diff --git a/CMSIS/NN/Include/arm_nnfunctions.h b/CMSIS/NN/Include/arm_nnfunctions.h
index 4d13c2f..c3d8f97 100644
--- a/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/CMSIS/NN/Include/arm_nnfunctions.h
@@ -21,8 +21,8 @@
* Title: arm_nnfunctions.h
* Description: Public header file for CMSIS NN Library
*
- * $Date: 7 February 2022
- * $Revision: V.8.0.0
+ * $Date: 14 February 2022
+ * $Revision: V.8.0.1
*
* Target Processor: Cortex-M CPUs
* -------------------------------------------------------------------- */
@@ -1603,27 +1603,27 @@
/**
* @defgroup BasicMath Basic math functions
*
- * Element wise add and multiplication functions.
+ * Elementwise add and multiplication functions.
*
*/
/**
- * @brief s8 element wise add of two vectors
+ * @brief s8 elementwise add of two vectors
* @param[in] input_1_vect pointer to input vector 1
* @param[in] input_2_vect pointer to input vector 2
- * @param[in] input_1_offset offset for input 1. Range: Range: -127 to 128
+ * @param[in] input_1_offset offset for input 1. Range: -127 to 128
* @param[in] input_1_mult multiplier for input 1
* @param[in] input_1_shift shift for input 1
- * @param[in] input_2_offset offset for input 2. Range: Range: -127 to 128
+ * @param[in] input_2_offset offset for input 2. Range: -127 to 128
* @param[in] input_2_mult multiplier for input 2
* @param[in] input_2_shift shift for input 2
* @param[in] left_shift input left shift
* @param[in,out] output pointer to output vector
- * @param[in] out_offset output offset
+ * @param[in] out_offset output offset. Range: -128 to 127
* @param[in] out_mult output multiplier
* @param[in] out_shift output shift
- * @param[in] out_activation_min minimum value to clamp output to
- * @param[in] out_activation_max maximum value to clamp output to
+ * @param[in] out_activation_min minimum value to clamp output to. Min: -128
+ * @param[in] out_activation_max maximum value to clamp output to. Max: 127
* @param[in] block_size number of samples
* @return The function returns ARM_MATH_SUCCESS
*/
@@ -1642,20 +1642,57 @@
const int32_t out_shift,
const int32_t out_activation_min,
const int32_t out_activation_max,
- const uint32_t block_size);
+ const int32_t block_size);
/**
- * @brief s8 element wise multiplication
+ * @brief s16 elementwise add of two vectors
* @param[in] input_1_vect pointer to input vector 1
* @param[in] input_2_vect pointer to input vector 2
- * @param[in] input_1_offset offset for input 1. Range: Range: -127 to 128
- * @param[in] input_2_offset offset for input 2. Range: Range: -127 to 128
+ * @param[in] input_1_offset offset for input 1. Not used.
+ * @param[in] input_1_mult multiplier for input 1
+ * @param[in] input_1_shift shift for input 1
+ * @param[in] input_2_offset offset for input 2. Not used.
+ * @param[in] input_2_mult multiplier for input 2
+ * @param[in] input_2_shift shift for input 2
+ * @param[in] left_shift input left shift
* @param[in,out] output pointer to output vector
- * @param[in] out_offset output offset
+ * @param[in] out_offset output offset. Not used.
* @param[in] out_mult output multiplier
* @param[in] out_shift output shift
- * @param[in] out_activation_min minimum value to clamp output to
- * @param[in] out_activation_max maximum value to clamp output to
+ * @param[in] out_activation_min minimum value to clamp output to. Min: -32768
+ * @param[in] out_activation_max maximum value to clamp output to. Max: 32767
+ * @param[in] block_size number of samples
+ * @return The function returns ARM_MATH_SUCCESS
+ */
+arm_status arm_elementwise_add_s16(const int16_t *input_1_vect,
+ const int16_t *input_2_vect,
+ const int32_t input_1_offset,
+ const int32_t input_1_mult,
+ const int32_t input_1_shift,
+ const int32_t input_2_offset,
+ const int32_t input_2_mult,
+ const int32_t input_2_shift,
+ const int32_t left_shift,
+ int16_t *output,
+ const int32_t out_offset,
+ const int32_t out_mult,
+ const int32_t out_shift,
+ const int32_t out_activation_min,
+ const int32_t out_activation_max,
+ const int32_t block_size);
+
+/**
+ * @brief s8 elementwise multiplication
+ * @param[in] input_1_vect pointer to input vector 1
+ * @param[in] input_2_vect pointer to input vector 2
+ * @param[in] input_1_offset offset for input 1. Range: -127 to 128
+ * @param[in] input_2_offset offset for input 2. Range: -127 to 128
+ * @param[in,out] output pointer to output vector
+ * @param[in] out_offset output offset. Range: -128 to 127
+ * @param[in] out_mult output multiplier
+ * @param[in] out_shift output shift
+ * @param[in] out_activation_min minimum value to clamp output to. Min: -128
+ * @param[in] out_activation_max maximum value to clamp output to. Max: 127
* @param[in] block_size number of samples
* @return The function returns ARM_MATH_SUCCESS
*
@@ -1671,7 +1708,37 @@
const int32_t out_shift,
const int32_t out_activation_min,
const int32_t out_activation_max,
- const uint32_t block_size);
+ const int32_t block_size);
+
+/**
+ * @brief s16 elementwise multiplication
+ * @param[in] input_1_vect pointer to input vector 1
+ * @param[in] input_2_vect pointer to input vector 2
+ * @param[in] input_1_offset offset for input 1. Not used.
+ * @param[in] input_2_offset offset for input 2. Not used.
+ * @param[in,out] output pointer to output vector
+ * @param[in] out_offset output offset. Not used.
+ * @param[in] out_mult output multiplier
+ * @param[in] out_shift output shift
+ * @param[in] out_activation_min minimum value to clamp output to. Min: -32768
+ * @param[in] out_activation_max maximum value to clamp output to. Max: 32767
+ * @param[in] block_size number of samples
+ * @return The function returns ARM_MATH_SUCCESS
+ *
+ * @details Supported framework: TensorFlow Lite micro
+ */
+arm_status arm_elementwise_mul_s16(const int16_t *input_1_vect,
+ const int16_t *input_2_vect,
+ const int32_t input_1_offset,
+ const int32_t input_2_offset,
+ int16_t *output,
+ const int32_t out_offset,
+ const int32_t out_mult,
+ const int32_t out_shift,
+ const int32_t out_activation_min,
+ const int32_t out_activation_max,
+ const int32_t block_size);
+
/**
* @defgroup Acti Activation Functions
*
diff --git a/CMSIS/NN/README.md b/CMSIS/NN/README.md
index 9ac6593..d8534bd 100644
--- a/CMSIS/NN/README.md
+++ b/CMSIS/NN/README.md
@@ -52,7 +52,9 @@
|[Misc](https://arm-software.github.io/CMSIS_5/NN/html/group__groupNN.html)||||| | ||
||arm_reshape_s8()| SOFTMAX | None | None | No | No | |
||arm_elementwise_add_s8()| ELEMENTWISE ADD | None | None | Yes| Yes| Reshape is not done in this function <br/> Only minor improvements are expected |
+||arm_elementwise_add_s16()| ELEMENTWISE ADD | None | None | No| No| Reshape is not done in this function <br/> Only minor improvements are expected |
||arm_elementwise_mul_s8()| ELEMENTWISE MUL | None | None | Yes| Yes| Reshape is not done in this function <br/> Only minor improvements are expected |
+||arm_elementwise_mul_s16()| ELEMENTWISE MUL | None | None | No| No| Reshape is not done in this function <br/> Only minor improvements are expected |
||arm_relu_q7() | RELU | None | None | Yes| No|
||arm_relu6_s8() | RELU | None | None | Yes| No|
|[Concat](https://arm-software.github.io/CMSIS_5/NN/html/group__groupNN.html)||||| | ||
diff --git a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c
new file mode 100644
index 0000000..6b1366d
--- /dev/null
+++ b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project: CMSIS NN Library
+ * Title: arm_elementwise_add_s16
+ * Description: Elementwise add
+ *
+ * $Date: 14 Februari 2022
+ * $Revision: V.1.0.0
+ *
+ * Target Processor: Cortex-M CPUs
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+
+/**
+ * @ingroup groupNN
+ */
+
+/**
+ * @addtogroup BasicMath
+ * @{
+ */
+
+/*
+ * s16 elementwise add
+ *
+ * Refer header file for details.
+ *
+ */
+
+/* Note: __SHIFT is expected to be <=0 */
+
+arm_status arm_elementwise_add_s16(const int16_t *input_1_vect,
+ const int16_t *input_2_vect,
+ const int32_t input_1_offset,
+ const int32_t input_1_mult,
+ const int32_t input_1_shift,
+ const int32_t input_2_offset,
+ const int32_t input_2_mult,
+ const int32_t input_2_shift,
+ const int32_t left_shift,
+ int16_t *output,
+ const int32_t out_offset,
+ const int32_t out_mult,
+ const int32_t out_shift,
+ const int32_t out_activation_min,
+ const int32_t out_activation_max,
+ const int32_t block_size)
+{
+ (void)input_1_offset;
+ (void)input_2_offset;
+ (void)out_offset;
+ int32_t loop_count;
+ int32_t input_1;
+ int32_t input_2;
+ int32_t sum;
+
+ loop_count = block_size;
+
+ while (loop_count > 0)
+ {
+ /* C = A + B */
+ input_1 = *input_1_vect++ << left_shift;
+ input_2 = *input_2_vect++ << left_shift;
+
+ input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
+ input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
+
+ sum = input_1 + input_2;
+ sum = arm_nn_requantize(sum, out_mult, out_shift);
+
+ sum = MAX(sum, out_activation_min);
+ sum = MIN(sum, out_activation_max);
+
+ *output++ = (int16_t)sum;
+
+ /* Decrement loop counter */
+ loop_count--;
+ }
+
+ return (ARM_MATH_SUCCESS);
+}
+
+/**
+ * @} end of BasicMath group
+ */
diff --git a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c
index 6bade7b..13b6bb3 100644
--- a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c
+++ b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -19,10 +19,10 @@
/* ----------------------------------------------------------------------
* Project: CMSIS NN Library
* Title: arm_elementwise_add_s8
- * Description: Element wise add
+ * Description: Elementwise add
*
- * $Date: 20. July 2021
- * $Revision: V.2.5.4
+ * $Date: 3 Februari 2022
+ * $Revision: V.2.6.0
*
* Target Processor: Cortex-M CPUs
*
@@ -31,21 +31,6 @@
#include "arm_nnfunctions.h"
#include "arm_nnsupportfunctions.h"
-#if defined(ARM_MATH_MVEI)
-#define SAT_INPUT_VECT(__INPUT_V, __MULT, __SHIFT) \
- __INPUT_V = arm_doubling_high_mult_mve(__INPUT_V, __MULT); \
- __INPUT_V = arm_divide_by_power_of_two_mve(__INPUT_V, -__SHIFT);
-#endif
-
-/**
- * @note The *_no_sat API does not mean that the input not saturated, Since
- * __MULT is a positive integer, it is saturated. The API definition
- * has more info about it.
- */
-#define SAT_INPUT(__INPUT, __MULT, __SHIFT) \
- __INPUT = arm_nn_doubling_high_mult_no_sat(__INPUT, __MULT); \
- __INPUT = arm_nn_divide_by_power_of_two(__INPUT, -__SHIFT);
-
/**
* @ingroup groupNN
*/
@@ -56,7 +41,7 @@
*/
/*
- * s8 element wise add
+ * s8 elementwise add
*
* Refer header file for details.
*
@@ -79,10 +64,10 @@
const int32_t out_shift,
const int32_t out_activation_min,
const int32_t out_activation_max,
- const uint32_t block_size)
+ const int32_t block_size)
{
#if defined(ARM_MATH_MVEI)
- int32_t count = (int32_t)block_size;
+ int32_t count = block_size;
while (count > 0)
{
@@ -100,11 +85,11 @@
vect_1 = vshlq_r_s32(vect_1, left_shift);
vect_2 = vshlq_r_s32(vect_2, left_shift);
- SAT_INPUT_VECT(vect_1, input_1_mult, input_1_shift);
- SAT_INPUT_VECT(vect_2, input_2_mult, input_2_shift);
+ vect_1 = arm_requantize_mve(vect_1, input_1_mult, input_1_shift);
+ vect_2 = arm_requantize_mve(vect_2, input_2_mult, input_2_shift);
vect_1 = vaddq_s32(vect_1, vect_2);
- SAT_INPUT_VECT(vect_1, out_mult, out_shift);
+ vect_1 = arm_requantize_mve(vect_1, out_mult, out_shift);
vect_1 = vaddq_n_s32(vect_1, out_offset);
@@ -119,7 +104,7 @@
count -= 4;
}
#else
- uint32_t loop_count;
+ int32_t loop_count;
int32_t input_1;
int32_t input_2;
int32_t sum;
@@ -136,7 +121,7 @@
loop_count = block_size >> 2;
- while (loop_count > 0U)
+ while (loop_count > 0)
{
/* 4 outputs are calculated in one loop. The order of calculation is follows the order of output sign extension
intrinsic */
@@ -152,13 +137,13 @@
/* Sum 1 */
input_1 = (b_1 & 0x0FFFF) << left_shift;
- SAT_INPUT(input_1, input_1_mult, input_1_shift);
+ input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
input_2 = (b_2 & 0x0FFFF) << left_shift;
- SAT_INPUT(input_2, input_2_mult, input_2_shift);
+ input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
sum = input_1 + input_2;
- SAT_INPUT(sum, out_mult, out_shift);
+ sum = arm_nn_requantize(sum, out_mult, out_shift);
sum += out_offset;
sum = MAX(sum, out_activation_min);
sum = MIN(sum, out_activation_max);
@@ -166,13 +151,13 @@
/* Sum 3 */
input_1 = ((b_1 >> 16) & 0x0FFFF) << left_shift;
- SAT_INPUT(input_1, input_1_mult, input_1_shift);
+ input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
input_2 = ((b_2 >> 16) & 0x0FFFF) << left_shift;
- SAT_INPUT(input_2, input_2_mult, input_2_shift);
+ input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
sum = input_1 + input_2;
- SAT_INPUT(sum, out_mult, out_shift);
+ sum = arm_nn_requantize(sum, out_mult, out_shift);
sum += out_offset;
sum = MAX(sum, out_activation_min);
sum = MIN(sum, out_activation_max);
@@ -180,13 +165,13 @@
/* Sum 2 */
input_1 = (a_1 & 0x0FFFF) << left_shift;
- SAT_INPUT(input_1, input_1_mult, input_1_shift);
+ input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
input_2 = (a_2 & 0x0FFFF) << left_shift;
- SAT_INPUT(input_2, input_2_mult, input_2_shift);
+ input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
sum = input_1 + input_2;
- SAT_INPUT(sum, out_mult, out_shift);
+ sum = arm_nn_requantize(sum, out_mult, out_shift);
sum += out_offset;
sum = MAX(sum, out_activation_min);
sum = MIN(sum, out_activation_max);
@@ -194,13 +179,13 @@
/* Sum 4 */
input_1 = ((a_1 >> 16) & 0x0FFFF) << left_shift;
- SAT_INPUT(input_1, input_1_mult, input_1_shift);
+ input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
input_2 = ((a_2 >> 16) & 0x0FFFF) << left_shift;
- SAT_INPUT(input_2, input_2_mult, input_2_shift);
+ input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
sum = input_1 + input_2;
- SAT_INPUT(sum, out_mult, out_shift);
+ sum = arm_nn_requantize(sum, out_mult, out_shift);
sum += out_offset;
sum = MAX(sum, out_activation_min);
sum = MIN(sum, out_activation_max);
@@ -216,21 +201,18 @@
loop_count = block_size;
#endif
- while (loop_count > 0U)
+ while (loop_count > 0)
{
/* C = A + B */
input_1 = (*input_1_vect++ + input_1_offset) << left_shift;
input_2 = (*input_2_vect++ + input_2_offset) << left_shift;
- input_1 = arm_nn_doubling_high_mult(input_1, input_1_mult);
- input_1 = arm_nn_divide_by_power_of_two(input_1, -input_1_shift);
-
- input_2 = arm_nn_doubling_high_mult(input_2, input_2_mult);
- input_2 = arm_nn_divide_by_power_of_two(input_2, -input_2_shift);
+ input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
+ input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
sum = input_1 + input_2;
- SAT_INPUT(sum, out_mult, out_shift);
+ sum = arm_nn_requantize(sum, out_mult, out_shift);
sum += out_offset;
sum = MAX(sum, out_activation_min);
diff --git a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c
new file mode 100644
index 0000000..4e25574
--- /dev/null
+++ b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project: CMSIS NN Library
+ * Title: arm_elementwise_mul_s16
+ * Description: Element wise multiplication
+ *
+ * $Date: 14 Februari 2022
+ * $Revision: V.1.0.0
+ *
+ * Target Processor: Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+
+/**
+ * @ingroup groupNN
+ */
+
+/**
+ * @addtogroup BasicMath
+ * @{
+ */
+
+/**
+ * @brief s16 element wise multiplication of two vectors
+ *
+ * @note Refer header file for details.
+ *
+ */
+arm_status arm_elementwise_mul_s16(const int16_t *input_1_vect,
+ const int16_t *input_2_vect,
+ const int32_t input_1_offset,
+ const int32_t input_2_offset,
+ int16_t *output,
+ const int32_t out_offset,
+ const int32_t out_mult,
+ const int32_t out_shift,
+ const int32_t out_activation_min,
+ const int32_t out_activation_max,
+ const int32_t block_size)
+{
+ (void)input_1_offset;
+ (void)input_2_offset;
+ (void)out_offset;
+ int32_t loop_count;
+ int32_t input_1;
+ int32_t input_2;
+ int32_t mul_res;
+
+ loop_count = block_size;
+
+ while (loop_count > 0)
+ {
+ /* C = A * B */
+
+ input_1 = *input_1_vect++;
+ input_2 = *input_2_vect++;
+
+ mul_res = input_1 * input_2;
+ mul_res = arm_nn_requantize(mul_res, out_mult, out_shift);
+
+ mul_res = MAX(mul_res, out_activation_min);
+ mul_res = MIN(mul_res, out_activation_max);
+
+ *output++ = (int16_t)mul_res;
+
+ /* Decrement loop counter */
+ loop_count--;
+ }
+
+ return ARM_MATH_SUCCESS;
+}
+
+/**
+ * @} end of BasicMath group
+ */
diff --git a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
index 3e3a63b..ff04cbf 100644
--- a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
+++ b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -21,8 +21,8 @@
* Title: arm_elementwise_mul_s8
* Description: Element wise multiplication
*
- * $Date: July 20, 2021
- * $Revision: V.1.0.6
+ * $Date: 3 Februari 2022
+ * $Revision: V.1.1.0
*
* Target Processor: Cortex-M cores
*
@@ -57,7 +57,7 @@
const int32_t out_shift,
const int32_t out_activation_min,
const int32_t out_activation_max,
- const uint32_t block_size)
+ const int32_t block_size)
{
int32_t loop_count;
diff --git a/CMSIS/NN/Tests/UnitTest/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/CMakeLists.txt
index 707f46e..164d515 100644
--- a/CMSIS/NN/Tests/UnitTest/CMakeLists.txt
+++ b/CMSIS/NN/Tests/UnitTest/CMakeLists.txt
@@ -66,20 +66,24 @@
set_property(GLOBAL PROPERTY cmsis_nn_unit_test_executables "${tmp}")
endfunction(add_cmsis_nn_unit_test_executable)
-add_subdirectory(TestCases/test_arm_avgpool_s8)
add_subdirectory(TestCases/test_arm_avgpool_s16)
+add_subdirectory(TestCases/test_arm_avgpool_s8)
add_subdirectory(TestCases/test_arm_convolve_1x1_s8_fast)
-add_subdirectory(TestCases/test_arm_convolve_s8)
-add_subdirectory(TestCases/test_arm_convolve_s16)
add_subdirectory(TestCases/test_arm_convolve_fast_s16)
+add_subdirectory(TestCases/test_arm_convolve_s16)
+add_subdirectory(TestCases/test_arm_convolve_s8)
add_subdirectory(TestCases/test_arm_depthwise_conv_3x3_s8)
+add_subdirectory(TestCases/test_arm_depthwise_conv_s16)
add_subdirectory(TestCases/test_arm_depthwise_conv_s8)
add_subdirectory(TestCases/test_arm_depthwise_conv_s8_opt)
-add_subdirectory(TestCases/test_arm_depthwise_conv_s16)
-add_subdirectory(TestCases/test_arm_fully_connected_s8)
+add_subdirectory(TestCases/test_arm_elementwise_add_s16)
+add_subdirectory(TestCases/test_arm_elementwise_add_s8)
+add_subdirectory(TestCases/test_arm_elementwise_mul_s16)
+add_subdirectory(TestCases/test_arm_elementwise_mul_s8)
add_subdirectory(TestCases/test_arm_fully_connected_s16)
-add_subdirectory(TestCases/test_arm_max_pool_s8)
+add_subdirectory(TestCases/test_arm_fully_connected_s8)
add_subdirectory(TestCases/test_arm_max_pool_s16)
+add_subdirectory(TestCases/test_arm_max_pool_s8)
add_subdirectory(TestCases/test_arm_softmax_s8)
add_subdirectory(TestCases/test_arm_svdf_s8)
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/add/input.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add/input.txt
new file mode 100644
index 0000000..b36be40
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add/input.txt
@@ -0,0 +1,17 @@
+# 1,4,4,8
+-9.295782470703125000e+01,-1.086945724487304688e+02,6.923994445800781250e+01,-1.057811355590820312e+02,3.317919921875000000e+01,9.666183471679687500e+00,1.256012420654296875e+02,-1.729664611816406250e+01
+-1.201002273559570312e+02,8.354003906250000000e+00,1.095191192626953125e+02,-5.217841339111328125e+01,6.855964660644531250e+01,-1.136402664184570312e+02,-9.744009399414062500e+01,9.253901672363281250e+01
+7.336495971679687500e+01,-5.832271575927734375e+01,7.275946044921875000e+01,5.621168518066406250e+01,-2.811369323730468750e+01,-1.064083709716796875e+02,-5.671269226074218750e+01,1.013197326660156250e+02
+-5.051823425292968750e+01,-5.165785789489746094e+00,-8.920204162597656250e+01,-2.471372985839843750e+01,5.408367919921875000e+01,-3.100613403320312500e+01,1.517187500000000000e+01,1.175676574707031250e+02
+-6.835147094726562500e+01,3.722920227050781250e+01,7.254766845703125000e+01,4.179951477050781250e+01,-6.894918823242187500e+01,1.138137664794921875e+02,-1.040448760986328125e+01,8.314089965820312500e+01
+3.977279901504516602e+00,6.227731323242187500e+01,1.250961456298828125e+02,5.788429260253906250e+01,1.245972595214843750e+02,-1.194153518676757812e+02,-6.506634521484375000e+01,9.341912841796875000e+01
+-1.228842010498046875e+02,1.207274169921875000e+02,1.059566955566406250e+02,-8.747183227539062500e+01,1.015738677978515625e+02,-7.528302764892578125e+01,6.476969909667968750e+01,-8.886234283447265625e+01
+-6.244555664062500000e+01,1.225449371337890625e+02,-2.085887908935546875e+01,6.471858215332031250e+01,-4.115431976318359375e+01,-5.191817474365234375e+01,-9.805821228027343750e+01,2.599142456054687500e+01
+3.284560918807983398e+00,8.422149658203125000e+01,1.475296020507812500e+01,-3.306756591796875000e+01,1.119789123535156250e+02,-3.768267822265625000e+01,-7.441608428955078125e+01,-1.155044555664062500e+01
+3.421226501464843750e+01,1.218995819091796875e+02,3.512068176269531250e+01,3.007461547851562500e+01,-4.178165435791015625e+01,-7.703953552246093750e+01,-4.402140808105468750e+01,-1.098309249877929688e+02
+-1.149062347412109375e+01,3.958906555175781250e+01,3.185077095031738281e+01,6.083290100097656250e+01,1.213119812011718750e+02,4.139105224609375000e+01,-4.911738586425781250e+01,5.071537780761718750e+01
+3.887196350097656250e+01,-2.090080261230468750e+01,-1.239508743286132812e+02,3.305541992187500000e+01,1.023658294677734375e+02,8.029883575439453125e+01,1.267021331787109375e+02,1.110096435546875000e+02
+-7.935501098632812500e+00,4.101362609863281250e+01,1.688294982910156250e+01,1.209116363525390625e+02,6.502104187011718750e+01,-6.779858398437500000e+01,1.091404724121093750e+01,-8.844725036621093750e+01
+-1.136138763427734375e+02,1.071553039550781250e+02,6.892318725585937500e+01,-1.203222808837890625e+02,-7.168304443359375000e+01,-1.028485412597656250e+02,1.059083099365234375e+02,9.927880859375000000e+01
+-9.445242309570312500e+01,1.253308563232421875e+02,-1.191015472412109375e+02,9.833372497558593750e+01,-4.784445953369140625e+01,4.664404296875000000e+01,-2.272537231445312500e+01,-1.256185607910156250e+02
+-1.015063018798828125e+02,9.903826904296875000e+01,-2.011074829101562500e+01,4.990921020507812500e+00,-1.195664048194885254e+00,-1.579566955566406250e+00,6.589109802246093750e+01,-1.080894927978515625e+02
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/add/kernel.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add/kernel.txt
new file mode 100644
index 0000000..3961796
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add/kernel.txt
@@ -0,0 +1,17 @@
+# 1,4,4,8
+7.206729125976562500e+01,7.762304687500000000e+01,-7.801148986816406250e+01,-3.279899597167968750e+01,-3.281203460693359375e+01,4.526036071777343750e+01,1.080221099853515625e+02,-4.926947021484375000e+01
+1.214063110351562500e+02,4.310248184204101562e+01,-2.534054565429687500e+01,-4.714800262451171875e+01,3.566201782226562500e+01,1.475189208984375000e+01,8.790599060058593750e+01,3.113238525390625000e+01
+2.757206726074218750e+01,9.667665100097656250e+01,-1.555439758300781250e+01,7.675241088867187500e+01,-3.137766265869140625e+01,7.716320800781250000e+01,1.087682495117187500e+02,1.146668701171875000e+02
+9.014279174804687500e+01,2.376081848144531250e+01,3.309667968750000000e+01,1.097281341552734375e+02,-7.206906890869140625e+01,1.277008056640625000e+01,1.199069671630859375e+02,-4.403179931640625000e+01
+-1.938705444335937500e+01,-5.568097686767578125e+01,-7.269144439697265625e+01,9.460415649414062500e+01,-2.219662475585937500e+01,6.548899841308593750e+01,1.033119506835937500e+02,4.380880737304687500e+01
+-9.079716491699218750e+01,-7.181563568115234375e+01,5.841765880584716797e+00,1.155065612792968750e+02,-5.521842956542968750e+00,-5.773645019531250000e+01,5.913287353515625000e+01,7.079254150390625000e+01
+5.248806762695312500e+01,7.888586425781250000e+01,7.365135192871093750e+01,4.194131469726562500e+01,1.048916320800781250e+02,-6.166264343261718750e+01,8.936982727050781250e+01,4.464817810058593750e+01
+3.714846801757812500e+01,7.748014831542968750e+01,9.133015441894531250e+01,-2.577056884765625000e+00,7.319335937500000000e+00,-1.151926803588867188e+02,4.999467468261718750e+01,1.210927124023437500e+02
+-4.962999725341796875e+01,1.540466308593750000e+01,7.622338867187500000e+01,-1.218276443481445312e+02,1.004155273437500000e+02,1.104693603515625000e+01,-3.938659667968750000e+01,-4.596747589111328125e+01
+8.642079162597656250e+01,-3.542127990722656250e+01,1.208909606933593750e+02,-6.531406402587890625e+01,-9.495910644531250000e+00,-1.270257949829101562e+02,2.849827575683593750e+01,1.113162689208984375e+02
+2.672088623046875000e+01,1.042509613037109375e+02,1.162126464843750000e+02,8.939810180664062500e+01,3.087474060058593750e+01,9.776967620849609375e+01,-1.378107452392578125e+01,-1.244079132080078125e+02
+-1.016086807250976562e+02,9.447311401367187500e+01,3.661959838867187500e+01,-8.664979553222656250e+01,-7.529193115234375000e+01,4.999642944335937500e+01,6.983352184295654297e+00,4.055065917968750000e+01
+9.580595397949218750e+01,9.963572692871093750e+01,-7.723803710937500000e+01,-6.269092559814453125e+01,-7.430888366699218750e+01,-8.858144378662109375e+01,-1.248384170532226562e+02,9.718597412109375000e+01
+-3.876348876953125000e+01,1.225116882324218750e+02,-5.063705921173095703e+00,3.252441883087158203e+00,6.319960021972656250e+01,-9.489593505859375000e+01,2.160478210449218750e+01,1.838577270507812500e+01
+1.143972625732421875e+02,-6.992826843261718750e+01,2.026805114746093750e+01,1.951538085937500000e+01,-1.215406799316406250e+02,3.057399988174438477e-01,-5.957047271728515625e+01,-6.521572875976562500e+01
+-6.931716918945312500e+01,-7.156071472167968750e+01,-2.351660919189453125e+01,5.988192749023437500e+01,1.015448608398437500e+02,-5.560777282714843750e+01,-8.696453857421875000e+01,-6.869393920898437500e+01
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/add_s16/input.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add_s16/input.txt
new file mode 100644
index 0000000..f2902eb
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add_s16/input.txt
@@ -0,0 +1,17 @@
+# 1,4,4,8
+-9.079525390625000000e+03,2.064835546875000000e+04,-2.259560156250000000e+04,2.079294921875000000e+04,-2.136208789062500000e+04,-1.937610156250000000e+04,-3.269643359375000000e+04,-5.105984375000000000e+03
+-2.980064648437500000e+04,-2.159199218750000000e+04,4.030812500000000000e+03,-6.140292968750000000e+02,-2.234023828125000000e+04,-1.626913476562500000e+04,-1.295953710937500000e+04,1.518757812500000000e+03
+2.180444921875000000e+04,1.776383203125000000e+04,-1.697196093750000000e+04,-2.542497851562500000e+04,2.223205078125000000e+04,3.293941406250000000e+03,-1.414907226562500000e+04,1.225985937500000000e+04
+2.947183984375000000e+04,-1.136223242187500000e+04,2.756738281250000000e+03,-1.572518945312500000e+04,2.270083984375000000e+04,4.338339843750000000e+03,-9.487113281250000000e+03,-8.780263671875000000e+03
+9.803382812500000000e+03,1.046065234375000000e+04,1.408824609375000000e+04,-2.808096093750000000e+04,-1.423136914062500000e+04,2.328106640625000000e+04,-1.637939843750000000e+04,2.065651953125000000e+04
+-8.372460937500000000e+01,-5.054376953125000000e+03,-2.159832812500000000e+04,-8.347935546875000000e+03,-2.524527148437500000e+04,-1.336928906250000000e+04,7.721703125000000000e+03,-3.313300781250000000e+02
+3.246500390625000000e+04,-5.503087890625000000e+03,-1.652899023437500000e+04,3.107313671875000000e+04,5.627023437500000000e+03,1.666355859375000000e+04,2.707944531250000000e+04,2.323585546875000000e+04
+2.862367968750000000e+04,-5.446712890625000000e+03,-1.377110156250000000e+04,2.801305078125000000e+04,-6.471230468750000000e+02,2.540804687500000000e+03,2.545875000000000000e+03,-9.142298828125000000e+03
+2.996301953125000000e+04,2.687830859375000000e+04,-2.485566796875000000e+04,2.173520703125000000e+04,2.579554296875000000e+04,1.922597265625000000e+04,-2.404469531250000000e+04,3.844480468750000000e+03
+1.797230468750000000e+04,-1.486748437500000000e+04,-1.068930468750000000e+04,2.421027539062500000e+04,2.109536718750000000e+04,1.265168359375000000e+04,-1.248634765625000000e+04,-5.862738281250000000e+03
+-8.676210937500000000e+03,-5.523666015625000000e+03,-1.855750585937500000e+04,6.297933593750000000e+03,9.717687500000000000e+03,-7.208500000000000000e+03,-4.163576171875000000e+03,-1.354507421875000000e+04
+1.848515625000000000e+04,7.531996093750000000e+03,2.911646093750000000e+04,-1.695522656250000000e+04,-2.668137500000000000e+04,-1.033607617187500000e+04,1.683922656250000000e+04,2.731626953125000000e+04
+-1.629326757812500000e+04,-5.270310546875000000e+03,1.882841406250000000e+04,1.289742968750000000e+04,2.894542578125000000e+04,9.207265625000000000e+02,4.623945312500000000e+03,-1.920060546875000000e+04
+-2.503443750000000000e+04,-1.970680078125000000e+04,-1.017802343750000000e+04,-1.756417773437500000e+04,-3.641914062500000000e+03,1.925619921875000000e+04,9.496691406250000000e+03,2.083481640625000000e+04
+1.777793359375000000e+04,-6.373394531250000000e+03,1.066639062500000000e+04,-2.271009765625000000e+04,6.487472656250000000e+03,-2.858315820312500000e+04,2.349269531250000000e+03,-3.056509570312500000e+04
+-2.536204296875000000e+04,-2.154826562500000000e+04,3.162791406250000000e+04,-8.802865234375000000e+03,-3.153237890625000000e+04,1.109749609375000000e+04,-7.745958984375000000e+03,-1.160646484375000000e+03
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/add_s16/kernel.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add_s16/kernel.txt
new file mode 100644
index 0000000..e01cab9
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add_s16/kernel.txt
@@ -0,0 +1,17 @@
+# 1,4,4,8
+-2.510621875000000000e+04,-8.289482421875000000e+03,-1.004274609375000000e+04,1.211134765625000000e+04,2.726040625000000000e+04,4.021187500000000000e+03,-1.736442968750000000e+04,3.379667968750000000e+03
+5.150546875000000000e+02,-1.722215234375000000e+04,6.376589843750000000e+03,8.065445312500000000e+03,-1.617891796875000000e+04,-2.077441015625000000e+04,-2.789935546875000000e+04,-2.434612109375000000e+04
+2.390995703125000000e+04,-1.405164453125000000e+04,4.294128906250000000e+03,2.591995703125000000e+04,-2.936564648437500000e+04,1.356523046875000000e+04,-1.808466015625000000e+04,8.300250000000000000e+03
+1.956511718750000000e+04,-3.172256250000000000e+04,1.644530468750000000e+04,-2.475911328125000000e+04,-1.322433593750000000e+03,2.335518359375000000e+04,3.142789062500000000e+03,3.228850781250000000e+04
+-1.646988281250000000e+04,-1.530034375000000000e+04,8.058761718750000000e+03,2.689437890625000000e+04,-2.077601953125000000e+04,-1.135917773437500000e+04,-2.272833984375000000e+04,1.609108984375000000e+04
+-3.111657226562500000e+04,3.138480859375000000e+04,-2.444791015625000000e+03,-2.954947851562500000e+04,-1.540992187500000000e+04,-1.874251953125000000e+04,-3.189831835937500000e+04,-3.276591406250000000e+04
+2.404804687500000000e+02,-8.225742187500000000e+02,1.046314453125000000e+04,1.268813281250000000e+04,-2.960290039062500000e+04,-3.183902929687500000e+04,2.186680078125000000e+04,-3.173025781250000000e+04
+-4.092718750000000000e+03,2.163746484375000000e+04,-2.644751953125000000e+04,-3.250920703125000000e+04,2.340273828125000000e+04,2.503908593750000000e+04,3.030681250000000000e+04,-3.101775390625000000e+04
+3.055777734375000000e+04,6.341121093750000000e+03,-5.359464843750000000e+03,2.628112890625000000e+04,-1.890782812500000000e+04,-1.919164453125000000e+04,-6.493166015625000000e+03,2.822343750000000000e+04
+5.650789062500000000e+03,2.273074218750000000e+03,3.781004150390625000e+03,4.078617187500000000e+03,-3.150773828125000000e+04,2.127483203125000000e+04,-2.997192578125000000e+04,-2.671396679687500000e+04
+-4.630195312500000000e+03,-6.975160156250000000e+03,-8.764160156250000000e+02,-2.242787695312500000e+04,-2.371823046875000000e+04,-3.140927148437500000e+04,-3.152499414062500000e+04,-1.458019140625000000e+04
+1.876931640625000000e+04,1.524257812500000000e+03,2.807634375000000000e+04,-2.026568359375000000e+04,1.982835937500000000e+03,-3.155298828125000000e+04,2.618632812500000000e+03,1.168980468750000000e+03
+-1.691392187500000000e+04,3.104346484375000000e+04,1.788054687500000000e+04,-2.306708593750000000e+04,2.814215625000000000e+04,1.514268359375000000e+04,-2.964392822265625000e+03,2.067519921875000000e+04
+3.525765625000000000e+03,-3.185146679687500000e+04,5.391292968750000000e+03,6.632703125000000000e+03,-2.413116406250000000e+04,1.372278125000000000e+04,1.524700000000000000e+04,2.658232031250000000e+04
+-2.227673828125000000e+04,-6.355027343750000000e+03,1.518551953125000000e+04,1.752178125000000000e+04,2.853054296875000000e+04,-1.196411523437500000e+04,-6.326130859375000000e+03,-1.093160742187500000e+04
+3.191719140625000000e+04,2.933204296875000000e+04,-1.924806640625000000e+04,-6.136812500000000000e+03,2.344466406250000000e+04,2.127325390625000000e+04,1.861765234375000000e+04,6.950957031250000000e+03
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul/input.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul/input.txt
new file mode 100644
index 0000000..3e8ed64
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul/input.txt
@@ -0,0 +1,21 @@
+# 1,5,4,8
+6.087265014648437500e+00,4.634196472167968750e+01,2.089966058731079102e+00,-1.591230773925781250e+01,-6.946073913574218750e+01,1.575343322753906250e+01,1.107122802734375000e+02,1.175067749023437500e+02
+-4.076422119140625000e+01,-4.779721832275390625e+01,-9.678024291992187500e+00,-1.763525390625000000e+01,1.265813293457031250e+02,-1.080362701416015625e+02,8.487304687500000000e+00,-4.212655639648437500e+01
+-9.519744873046875000e+00,-1.702813720703125000e+01,-6.921460032463073730e-01,-1.264048156738281250e+02,1.116436996459960938e+02,1.142576599121093750e+01,1.058777313232421875e+02,9.793907165527343750e+01
+-5.287223815917968750e+00,-1.031986083984375000e+02,4.567663574218750000e+01,-2.745009613037109375e+01,6.937374877929687500e+01,-1.074696197509765625e+02,1.659455871582031250e+01,-5.155500030517578125e+01
+1.225355682373046875e+02,3.871371459960937500e+01,-7.985780334472656250e+01,8.569786071777343750e+01,-4.570547485351562500e+01,2.623266601562500000e+01,6.994493103027343750e+01,3.592803955078125000e+00
+7.499845886230468750e+01,-3.264853954315185547e+00,-9.696961975097656250e+01,-8.154697418212890625e+01,1.242909317016601562e+02,7.681944274902343750e+01,1.141309051513671875e+02,-1.198213195800781250e+02
+1.118294219970703125e+02,1.540080261230468750e+01,3.581042480468750000e+01,-7.329165649414062500e+01,-1.510517120361328125e+01,-2.156777191162109375e+01,3.607449340820312500e+01,-7.490602111816406250e+01
+-2.859937286376953125e+01,3.708404064178466797e+00,5.488592529296875000e+01,-5.864614868164062500e+01,8.717007446289062500e+01,-8.396488952636718750e+01,1.024245910644531250e+02,4.356243896484375000e+01
+-1.126940026879310608e-01,-8.146022033691406250e+01,5.181336975097656250e+01,-9.854276275634765625e+01,-9.841476440429687500e+00,-5.197341918945312500e+00,7.240777587890625000e+01,1.021828842163085938e+02
+6.944224548339843750e+01,7.731994152069091797e+00,-3.599164581298828125e+01,-4.081647491455078125e+01,-1.188762054443359375e+02,-6.660906982421875000e+01,2.065304565429687500e+01,1.003328552246093750e+02
+-7.838453674316406250e+01,5.980604553222656250e+01,6.418049621582031250e+01,-9.981350708007812500e+01,-2.877880859375000000e+01,3.104080200195312500e+01,6.499343872070312500e+01,8.385769653320312500e+01
+-4.646399974822998047e+00,1.077828216552734375e+02,-6.764379882812500000e+01,1.130384826660156250e+02,-2.629809570312500000e+01,1.142396392822265625e+02,3.708171081542968750e+01,-5.075689697265625000e+01
+-2.185798645019531250e+01,-1.192111968994140625e+01,-1.044805679321289062e+02,-5.409135437011718750e+01,1.236510467529296875e+02,-9.662140655517578125e+01,-1.060680007934570312e+02,-5.500813293457031250e+01
+-1.237120437622070312e+02,1.238904876708984375e+02,7.293537902832031250e+01,-5.333233642578125000e+01,8.378683471679687500e+01,-1.104943389892578125e+02,9.256475830078125000e+01,-1.205067749023437500e+02
+-7.697004699707031250e+01,-2.728594970703125000e+01,5.983824157714843750e+01,9.902799987792968750e+01,-4.200209975242614746e-01,7.821754455566406250e+01,9.404795837402343750e+01,1.249092864990234375e+02
+-1.036167678833007812e+02,-5.795334625244140625e+01,-1.503917694091796875e+01,-7.366282653808593750e+01,1.146158523559570312e+02,4.718167114257812500e+01,-2.744146728515625000e+01,-7.087094879150390625e+01
+1.061934661865234375e+02,9.054618835449218750e+01,-6.263049840927124023e-01,-9.296089172363281250e+01,-4.776609039306640625e+01,1.142856140136718750e+02,6.502186584472656250e+01,-1.208296051025390625e+02
+6.298899841308593750e+01,1.053328094482421875e+02,-1.000544433593750000e+02,-3.328774261474609375e+01,4.162991333007812500e+01,-1.166616821289062500e+02,-1.814725494384765625e+01,5.189924621582031250e+01
+-8.104315185546875000e+01,-1.097107315063476562e+02,-7.609074401855468750e+01,7.721604919433593750e+01,3.400953674316406250e+01,-2.119123077392578125e+01,1.236600341796875000e+02,5.729858398437500000e+01
+-4.588986968994140625e+01,6.198348617553710938e+01,-1.117596435546875000e+01,7.594029235839843750e+01,-7.848075866699218750e+00,-9.412262725830078125e+01,-5.613299560546875000e+01,-4.313046264648437500e+01
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul/kernel.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul/kernel.txt
new file mode 100644
index 0000000..416626e
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul/kernel.txt
@@ -0,0 +1,21 @@
+# 1,5,4,8
+3.217739868164062500e+01,-4.575654983520507812e+01,-1.250932769775390625e+02,1.135259246826171875e+02,1.870326232910156250e+01,4.811854553222656250e+01,4.280308532714843750e+01,-1.320864868164062500e+01
+8.949476623535156250e+01,-3.795236968994140625e+01,2.203830033540725708e-01,-1.026309814453125000e+02,-1.138934860229492188e+02,1.262885894775390625e+02,-1.092149658203125000e+02,-8.937467956542968750e+01
+5.905468750000000000e+01,1.048391265869140625e+02,-1.242777557373046875e+02,-7.588436889648437500e+01,7.236830139160156250e+01,-9.981600189208984375e+01,8.246325683593750000e+01,9.635452270507812500e+01
+-7.473950195312500000e+01,9.809680175781250000e+01,9.104861450195312500e+01,-4.299919128417968750e+00,2.549674987792968750e+01,1.863145446777343750e+01,-1.093729705810546875e+02,-2.607852172851562500e+01
+-4.541124725341796875e+01,-4.975766754150390625e+01,-5.976505279541015625e+01,1.020122070312500000e+02,-6.062745666503906250e+01,-1.135064544677734375e+02,-1.030823974609375000e+02,3.759553527832031250e+01
+6.611544799804687500e+01,4.390705871582031250e+01,8.441696166992187500e+01,6.809455871582031250e+01,6.209817504882812500e+01,-6.231644153594970703e+00,-9.188059997558593750e+01,4.051831054687500000e+01
+-5.956570434570312500e+01,1.831413269042968750e+01,-1.913550567626953125e+01,-1.693329620361328125e+01,6.862281799316406250e+01,5.921721935272216797e+00,-3.916234588623046875e+01,-1.090182266235351562e+02
+-3.356804656982421875e+01,1.147797088623046875e+02,-1.196589279174804688e+02,4.634918212890625000e+00,-3.748310852050781250e+01,-6.430694103240966797e+00,3.020614624023437500e+01,-6.272321319580078125e+01
+-9.786624908447265625e+01,-1.989810943603515625e+01,-1.861423492431640625e+01,-4.597468566894531250e+01,2.524142456054687500e+01,-1.041804504394531250e+01,-5.897865295410156250e+01,9.512591552734375000e+01
+4.394152832031250000e+01,-6.969282531738281250e+01,-3.005688476562500000e+01,-9.578435516357421875e+01,-5.227052307128906250e+01,1.192088317871093750e+02,-1.004997482299804688e+02,-5.553445434570312500e+01
+-3.174073791503906250e+01,-1.032137145996093750e+02,-1.198178863525390625e+02,2.824850463867187500e+01,-2.728929138183593750e+01,-8.447923278808593750e+01,8.743931579589843750e+01,-3.851726531982421875e+01
+7.478394031524658203e+00,-8.185469818115234375e+01,-4.803730010986328125e+01,-3.908844757080078125e+01,-1.059177703857421875e+02,2.568099975585937500e+00,1.097543029785156250e+02,-1.218703536987304688e+02
+-6.512222290039062500e+01,-4.518241882324218750e+00,-7.977529907226562500e+01,-4.495685577392578125e+01,-9.700138854980468750e+01,-9.754153442382812500e+01,1.235915832519531250e+02,6.547801208496093750e+01
+-2.471759033203125000e+01,-1.922925567626953125e+01,1.023207855224609375e+02,2.611119079589843750e+01,-1.144902954101562500e+02,-8.321784973144531250e+01,2.100730705261230469e+01,4.209094238281250000e+01
+-6.505221557617187500e+01,-1.287672424316406250e+01,-9.540896606445312500e+01,-7.592190551757812500e+01,6.253384399414062500e+01,-2.609190368652343750e+01,9.872793579101562500e+01,-8.309507751464843750e+01
+1.092861785888671875e+02,1.240314178466796875e+02,-4.161978149414062500e+01,-3.673020172119140625e+01,-7.146414184570312500e+01,3.408689880371093750e+01,4.019821166992187500e+01,5.090278625488281250e+01
+1.034992065429687500e+02,4.370433044433593750e+01,1.106397247314453125e+02,2.916382074356079102e+00,-1.993814849853515625e+01,-1.844695281982421875e+01,1.091971893310546875e+02,6.092024230957031250e+01
+6.083335876464843750e+01,-1.325509643554687500e+01,-3.023486328125000000e+01,5.027542114257812500e+00,5.767028808593750000e+01,1.196073608398437500e+02,-4.072114562988281250e+01,-9.284222412109375000e+01
+1.542956542968750000e+01,2.754859924316406250e+01,8.004570770263671875e+01,-1.157010345458984375e+02,9.206977844238281250e+01,8.940296936035156250e+01,-1.523937988281250000e+01,7.872830200195312500e+01
+-3.596887969970703125e+01,-5.636372375488281250e+01,2.250534057617187500e+01,-6.072342681884765625e+01,1.269192047119140625e+02,2.537686157226562500e+01,1.922425842285156250e+01,4.299319458007812500e+01
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul_s16/input.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul_s16/input.txt
new file mode 100644
index 0000000..e85694d
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul_s16/input.txt
@@ -0,0 +1,21 @@
+# 1,4,5,8
+-1.425103515625000000e+03,2.724908593750000000e+04,-7.046689453125000000e+03,3.719925781250000000e+03,-1.967012890625000000e+04,-2.690407421875000000e+04,-2.594553320312500000e+04,2.021718750000000000e+03
+1.392343750000000000e+04,2.192769531250000000e+04,-3.343753906250000000e+03,-1.949542187500000000e+04,-8.453089843750000000e+03,-1.351201171875000000e+04,9.689523437500000000e+03,8.383769531250000000e+03
+2.571485937500000000e+04,3.158718750000000000e+03,6.739875000000000000e+03,7.599640625000000000e+03,1.506513671875000000e+04,1.666317578125000000e+04,-2.332417578125000000e+04,2.980700781250000000e+04
+4.226511718750000000e+03,-1.883247851562500000e+04,-1.854366992187500000e+04,9.583074218750000000e+03,-1.749661718750000000e+04,-5.655492187500000000e+03,3.274915625000000000e+04,2.420670703125000000e+04
+2.057914843750000000e+04,2.856615429687500000e+04,-3.074000781250000000e+04,6.501425781250000000e+03,1.871196484375000000e+04,1.566367187500000000e+03,-2.764669531250000000e+04,-1.365868164062500000e+04
+-1.518945507812500000e+04,-3.163897070312500000e+04,-5.636765625000000000e+03,1.310699609375000000e+04,2.548141406250000000e+04,8.109695312500000000e+03,1.609133984375000000e+04,3.034515234375000000e+04
+2.585478515625000000e+04,-1.406016015625000000e+04,-2.045400976562500000e+04,7.801171875000000000e+03,1.260697265625000000e+04,8.043085937500000000e+02,2.842230859375000000e+04,2.205817187500000000e+04
+-4.720560546875000000e+03,2.435938671875000000e+04,2.020263671875000000e+04,-2.363878906250000000e+04,2.352510937500000000e+04,2.559359375000000000e+04,1.056709765625000000e+04,-7.187898437500000000e+03
+-2.620789648437500000e+04,-2.513364843750000000e+04,-2.550296289062500000e+04,-2.147159375000000000e+04,1.157153515625000000e+04,-8.240796875000000000e+03,-3.169450000000000000e+04,1.099613671875000000e+04
+-4.015791015625000000e+03,2.448114843750000000e+04,-1.993103906250000000e+04,-3.094414062500000000e+02,-9.149316406250000000e+02,-3.018190625000000000e+04,2.310589062500000000e+04,-1.069234375000000000e+04
+-6.850175781250000000e+03,-5.861050781250000000e+03,3.156734765625000000e+04,-9.949003906250000000e+03,-2.072949609375000000e+04,1.264927734375000000e+04,-1.933557226562500000e+04,-7.511267089843750000e+03
+6.989882812500000000e+02,-2.016032421875000000e+04,-2.656232812500000000e+04,2.920930468750000000e+04,1.385375781250000000e+04,1.086737500000000000e+04,2.469232031250000000e+04,-2.150264843750000000e+04
+-3.044261328125000000e+04,2.828403125000000000e+04,-1.451576367187500000e+04,-1.543800585937500000e+04,-2.524538085937500000e+04,2.741479296875000000e+04,-4.386527343750000000e+03,-2.135996289062500000e+04
+-1.337448437500000000e+04,-6.476855468750000000e+02,8.589453125000000000e+03,-1.510911328125000000e+04,2.095779687500000000e+04,1.100132421875000000e+04,4.636781250000000000e+03,-3.099037109375000000e+04
+5.427675781250000000e+03,3.042142578125000000e+04,-4.869300781250000000e+03,1.268807031250000000e+04,1.068471093750000000e+04,-3.059081445312500000e+04,-3.049953515625000000e+04,-2.132600390625000000e+04
+-1.625230664062500000e+04,-2.833001171875000000e+04,-1.370808789062500000e+04,2.979532031250000000e+04,-1.282239843750000000e+04,-3.746115234375000000e+03,2.388173046875000000e+04,-3.151986328125000000e+04
+-2.632854296875000000e+04,5.485324218750000000e+03,1.691496093750000000e+04,-4.177771484375000000e+03,-5.471197265625000000e+03,-3.216432031250000000e+04,1.795335937500000000e+04,6.862597656250000000e+03
+-1.270100976562500000e+04,1.277389843750000000e+04,-2.745739453125000000e+04,-9.879919921875000000e+03,-2.054106835937500000e+04,1.300287890625000000e+04,1.573573046875000000e+04,-1.924201171875000000e+04
+-1.955396679687500000e+04,2.350113476562500000e+04,1.203774609375000000e+04,-2.216824804687500000e+04,2.762613867187500000e+04,1.861830078125000000e+04,2.143993554687500000e+04,-6.484220703125000000e+03
+2.451112500000000000e+04,6.074679687500000000e+03,-1.673687695312500000e+04,-5.000000000000000000e-01,-1.350646484375000000e+04,2.484089453125000000e+04,3.024713281250000000e+04,-1.898581250000000000e+04
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul_s16/kernel.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul_s16/kernel.txt
new file mode 100644
index 0000000..44cb8cb
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul_s16/kernel.txt
@@ -0,0 +1,21 @@
+# 1,4,5,8
+-4.777974609375000000e+03,-3.022644726562500000e+04,3.051593750000000000e+03,4.274800781250000000e+03,3.067806250000000000e+04,3.673921875000000000e+03,1.718764453125000000e+04,-3.046211328125000000e+04
+7.490152343750000000e+03,1.482325000000000000e+04,-9.932605468750000000e+03,-2.892601953125000000e+04,-7.409101562500000000e+02,7.841324218750000000e+03,-1.220482226562500000e+04,5.028820312500000000e+03
+-2.685716796875000000e+03,-3.229366406250000000e+04,2.755517187500000000e+04,-2.232371484375000000e+04,3.135405468750000000e+04,-2.100906250000000000e+03,1.976414453125000000e+04,1.086859375000000000e+04
+-2.691685546875000000e+04,-2.816652343750000000e+04,-3.250991015625000000e+04,1.354970703125000000e+04,2.720939843750000000e+04,-1.721851953125000000e+04,3.098962109375000000e+04,-1.985275976562500000e+04
+-1.329346875000000000e+04,-1.365750976562500000e+04,2.165435546875000000e+04,-3.271517968750000000e+04,5.469042968750000000e+03,5.795738281250000000e+03,-2.592745703125000000e+04,-2.421553710937500000e+04
+2.024035546875000000e+04,1.745229101562500000e+04,-2.459751953125000000e+03,-1.473367187500000000e+03,-5.458197265625000000e+03,1.650673437500000000e+04,-1.516569921875000000e+04,2.526665234375000000e+04
+-1.614822265625000000e+04,3.262424609375000000e+04,1.546023437500000000e+03,2.737675390625000000e+04,2.449608593750000000e+04,2.759233593750000000e+04,1.447634765625000000e+04,-3.042522265625000000e+04
+3.148691796875000000e+04,2.878721093750000000e+04,6.741296875000000000e+03,3.191499609375000000e+04,5.728968750000000000e+03,2.654271875000000000e+04,4.854316406250000000e+03,2.049345312500000000e+04
+-8.639166015625000000e+03,2.560458593750000000e+04,-1.394436523437500000e+04,-2.895786914062500000e+04,3.090774609375000000e+04,-1.290558593750000000e+03,3.628796875000000000e+03,1.451579296875000000e+04
+2.470129687500000000e+04,-1.155723925781250000e+04,2.789051953125000000e+04,-2.129849609375000000e+04,-9.051681640625000000e+03,1.586535937500000000e+04,7.718070312500000000e+03,-7.519947265625000000e+03
+-1.088936523437500000e+04,2.536461328125000000e+04,2.352063281250000000e+04,-2.222727734375000000e+04,-9.340599609375000000e+03,-3.074339843750000000e+04,3.072170312500000000e+04,2.295575000000000000e+04
+-2.855066406250000000e+03,-9.062822265625000000e+03,-1.171989843750000000e+04,4.407031250000000000e+01,-1.095287890625000000e+04,4.721859375000000000e+03,-1.592970312500000000e+04,-2.097570312500000000e+03
+-2.296621093750000000e+03,-2.095112695312500000e+04,7.745070312500000000e+03,1.284000781250000000e+04,-1.214282910156250000e+04,-3.136181054687500000e+04,-2.446296289062500000e+04,1.189535156250000000e+03
+7.879519531250000000e+03,-1.398351367187500000e+04,3.205658984375000000e+04,1.532867968750000000e+04,-4.250740234375000000e+03,6.809074218750000000e+03,-6.571464843750000000e+02,1.408371484375000000e+04
+-4.874878906250000000e+03,1.943416406250000000e+04,-1.250314453125000000e+04,2.886201953125000000e+04,6.276917968750000000e+03,-3.115597851562500000e+04,1.462097265625000000e+04,1.351455859375000000e+04
+-3.782425781250000000e+03,-3.008590039062500000e+04,-3.033045898437500000e+04,1.503886328125000000e+04,2.230409765625000000e+04,-1.056054101562500000e+04,1.364308984375000000e+04,-9.474941406250000000e+03
+-3.764076171875000000e+03,-2.037296289062500000e+04,1.263115234375000000e+04,1.373478906250000000e+04,-1.410587109375000000e+04,2.712390625000000000e+04,1.946813281250000000e+04,2.658819531250000000e+04
+8.712945312500000000e+03,2.198334375000000000e+04,-1.218856445312500000e+04,6.357152343750000000e+03,-7.507048828125000000e+03,2.581895703125000000e+04,-1.851900781250000000e+04,-3.242390625000000000e+04
+2.034826171875000000e+04,-2.546758007812500000e+04,-2.071638671875000000e+04,-1.694490625000000000e+04,-1.941093750000000000e+04,-2.470357617187500000e+04,2.161869531250000000e+04,-1.076089062500000000e+04
+1.648766406250000000e+04,-1.072070312500000000e+04,-1.696540625000000000e+04,9.039542968750000000e+03,-2.013055859375000000e+04,2.089603125000000000e+04,-2.788040234375000000e+04,1.449826171875000000e+04
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/config_data.h
new file mode 100644
index 0000000..3e77310
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/config_data.h
@@ -0,0 +1,15 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#define ADD_DST_SIZE 128
+#define ADD_OUT_ACTIVATION_MIN -128
+#define ADD_OUT_ACTIVATION_MAX 127
+#define ADD_INPUT1_OFFSET 128
+#define ADD_INPUT2_OFFSET 128
+#define ADD_OUTPUT_MULT 1073741824
+#define ADD_OUTPUT_SHIFT -19
+#define ADD_OUTPUT_OFFSET -128
+#define ADD_LEFT_SHIFT 20
+#define ADD_INPUT1_SHIFT 0
+#define ADD_INPUT2_SHIFT 0
+#define ADD_INPUT1_MULT 1073741824
+#define ADD_INPUT2_MULT 1073741824
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input1_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input1_data.h
new file mode 100644
index 0000000..efa875a
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input1_data.h
@@ -0,0 +1,12 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t add_input1[128] = {
+ -92, -108, 69, -105, 33, 9, 125, -17, -120, 8, 109, -52, 68, -113, -97, 92, 73, -58, 72,
+ 56, -28, -106, -56, 101, -50, -5, -89, -24, 54, -31, 15, 117, -68, 37, 72, 41, -68, 113,
+ -10, 83, 3, 62, 125, 57, 124, -119, -65, 93, -122, 120, 105, -87, 101, -75, 64, -88, -62,
+ 122, -20, 64, -41, -51, -98, 25, 3, 84, 14, -33, 111, -37, -74, -11, 34, 121, 35, 30,
+ -41, -77, -44, -109, -11, 39, 31, 60, 121, 41, -49, 50, 38, -20, -123, 33, 102, 80, 126,
+ 111, -7, 41, 16, 120, 65, -67, 10, -88, -113, 107, 68, -120, -71, -102, 105, 99, -94, 125,
+ -119, 98, -47, 46, -22, -125, -101, 99, -20, 4, -1, -1, 65, -108};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input2_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input2_data.h
new file mode 100644
index 0000000..7ac5e2e
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input2_data.h
@@ -0,0 +1,11 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t add_input2[128] = {
+ 72, 77, -78, -32, -32, 45, 108, -49, 121, 43, -25, -47, 35, 14, 87, 31, 27, 96, -15, 76, -31, 77,
+ 108, 114, 90, 23, 33, 109, -72, 12, 119, -44, -19, -55, -72, 94, -22, 65, 103, 43, -90, -71, 5, 115,
+ -5, -57, 59, 70, 52, 78, 73, 41, 104, -61, 89, 44, 37, 77, 91, -2, 7, -115, 49, 121, -49, 15,
+ 76, -121, 100, 11, -39, -45, 86, -35, 120, -65, -9, -127, 28, 111, 26, 104, 116, 89, 30, 97, -13, -124,
+ -101, 94, 36, -86, -75, 49, 6, 40, 95, 99, -77, -62, -74, -88, -124, 97, -38, 122, -5, 3, 63, -94,
+ 21, 18, 114, -69, 20, 19, -121, 0, -59, -65, -69, -71, -23, 59, 101, -55, -86, -68};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input_data.h
new file mode 100644
index 0000000..fd82139
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input_data.h
@@ -0,0 +1,12 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t add_input[128] = {-19, 39, -65, -80, 29, 69, 98, -96, -124, -88, -50, -86, -89, -123, 81, 44,
+ 71, -17, -114, -121, -11, 88, -4, -46, -36, 75, 32, -74, 8, -57, -79, 94,
+ -81, -112, 62, 77, 1, -73, 51, -23, -112, -69, -80, -40, -42, -77, -96, 118,
+ -63, -71, -108, -16, -73, -119, 3, -104, 6, 51, 16, -98, -100, 68, -125, -97,
+ 122, -92, 2, 85, -47, 93, 108, -31, -27, 123, 86, 16, 38, 62, 28, 7,
+ 95, 71, 116, -65, 87, -109, -73, -44, 85, -9, 99, 21, 51, -9, -16, -7,
+ 24, 109, -52, 111, -83, -17, 5, -37, 25, -62, -20, 84, 109, -120, -54, 42,
+ 13, 71, 28, -112, -1, 116, -86, -58, 115, -89, 38, -98, -113, -114, 106, 24};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/output_ref_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/output_ref_data.h
new file mode 100644
index 0000000..e942c3a
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/output_ref_data.h
@@ -0,0 +1,11 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t add_output_ref[128] = {
+ -10, -15, -4, -68, 1, 27, 117, -33, 1, 26, 42, -49, 52, -49, -5, 62, 50, 19, 29, 66, -29, -14,
+ 26, 108, 20, 9, -28, 43, -9, -9, 67, 37, -43, -9, 0, 68, -45, 89, 47, 63, -43, -4, 65, 86,
+ 60, -88, -3, 82, -35, 99, 89, -23, 103, -68, 77, -22, -12, 100, 36, 31, -17, -83, -24, 73, -23, 50,
+ 45, -77, 106, -13, -56, -28, 60, 43, 78, -17, -25, -102, -8, 1, 8, 72, 74, 75, 76, 69, -31, -37,
+ -31, 37, -43, -26, 14, 65, 66, 76, 44, 70, -30, 29, -4, -77, -57, 5, -75, 115, 32, -58, -4, -98,
+ 63, 59, 10, 28, -49, 59, -84, 23, -40, -95, -85, 14, -21, 32, 50, -28, -10, -88};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/test_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/test_data.h
new file mode 100644
index 0000000..a649371
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/test_data.h
@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#include "config_data.h"
+#include "input1_data.h"
+#include "input2_data.h"
+#include "output_ref_data.h"
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/config_data.h
new file mode 100644
index 0000000..f651ac3
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/config_data.h
@@ -0,0 +1,15 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#define ADD_S16_DST_SIZE 128
+#define ADD_S16_OUT_ACTIVATION_MIN -32768
+#define ADD_S16_OUT_ACTIVATION_MAX 32767
+#define ADD_S16_INPUT1_OFFSET 0
+#define ADD_S16_INPUT2_OFFSET 0
+#define ADD_S16_OUTPUT_MULT 1073741824
+#define ADD_S16_OUTPUT_SHIFT -14
+#define ADD_S16_OUTPUT_OFFSET 0
+#define ADD_S16_LEFT_SHIFT 15
+#define ADD_S16_INPUT1_SHIFT 0
+#define ADD_S16_INPUT2_SHIFT 0
+#define ADD_S16_INPUT1_MULT 1073741824
+#define ADD_S16_INPUT2_MULT 1073741824
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/input1_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/input1_data.h
new file mode 100644
index 0000000..efae5da
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/input1_data.h
@@ -0,0 +1,15 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t add_s16_input1[128] = {
+ -9079, 20648, -22595, 20792, -21362, -19376, -32696, -5105, -29800, -21591, 4030, -614, -22340,
+ -16269, -12959, 1518, 21804, 17763, -16971, -25424, 22232, 3293, -14149, 12259, 29471, -11362,
+ 2756, -15725, 22700, 4338, -9487, -8780, 9803, 10460, 14088, -28080, -14231, 23281, -16379,
+ 20656, -83, -5054, -21598, -8347, -25245, -13369, 7721, -331, 32465, -5503, -16528, 31073,
+ 5627, 16663, 27079, 23235, 28623, -5446, -13771, 28013, -647, 2540, 2545, -9142, 29963,
+ 26878, -24855, 21735, 25795, 19225, -24044, 3844, 17972, -14867, -10689, 24210, 21095, 12651,
+ -12486, -5862, -8676, -5523, -18557, 6297, 9717, -7208, -4163, -13545, 18485, 7531, 29116,
+ -16955, -26681, -10336, 16839, 27316, -16293, -5270, 18828, 12897, 28945, 920, 4623, -19200,
+ -25034, -19706, -10178, -17564, -3641, 19256, 9496, 20834, 17777, -6373, 10666, -22710, 6487,
+ -28583, 2349, -30565, -25362, -21548, 31627, -8802, -31532, 11097, -7745, -1160};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/input2_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/input2_data.h
new file mode 100644
index 0000000..fe0ba1d
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/input2_data.h
@@ -0,0 +1,15 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t add_s16_input2[128] = {
+ -25106, -8289, -10042, 12111, 27260, 4021, -17364, 3379, 515, -17222, 6376, 8065, -16178,
+ -20774, -27899, -24346, 23909, -14051, 4294, 25919, -29365, 13565, -18084, 8300, 19565, -31722,
+ 16445, -24759, -1322, 23355, 3142, 32288, -16469, -15300, 8058, 26894, -20776, -11359, -22728,
+ 16091, -31116, 31384, -2444, -29549, -15409, -18742, -31898, -32765, 240, -822, 10463, 12688,
+ -29602, -31839, 21866, -31730, -4092, 21637, -26447, -32509, 23402, 25039, 30306, -31017, 30557,
+ 6341, -5359, 26281, -18907, -19191, -6493, 28223, 5650, 2273, 3781, 4078, -31507, 21274,
+ -29971, -26713, -4630, -6975, -876, -22427, -23718, -31409, -31524, -14580, 18769, 1524, 28076,
+ -20265, 1982, -31552, 2618, 1168, -16913, 31043, 17880, -23067, 28142, 15142, -2964, 20675,
+ 3525, -31851, 5391, 6632, -24131, 13722, 15247, 26582, -22276, -6355, 15185, 17521, 28530,
+ -11964, -6326, -10931, 31917, 29332, -19248, -6136, 23444, 21273, 18617, 6950};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/output_ref_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/output_ref_data.h
new file mode 100644
index 0000000..eacff7b
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/output_ref_data.h
@@ -0,0 +1,15 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t add_s16_output_ref[128] = {
+ -17093, 6180, -16319, 16452, 2949, -7678, -25030, -863, -14643, -19407, 5203, 3726, -19259,
+ -18522, -20429, -11414, 22857, 1856, -6339, 248, -3567, 8429, -16117, 10280, 24518, -21542,
+ 9601, -20242, 10689, 13847, -3173, 11754, -3333, -2420, 11073, -593, -17504, 5961, -19554,
+ 18374, -15600, 13165, -12021, -18948, -20327, -16056, -12089, -16548, 16353, -3163, -3033, 21881,
+ -11988, -7588, 24473, -4248, 12266, 8096, -20109, -2248, 11378, 13790, 16426, -20080, 30260,
+ 16610, -15107, 24008, 3444, 17, -15269, 16034, 11811, -6297, -3454, 14144, -5206, 16963,
+ -21229, -16288, -6653, -6249, -9717, -8065, -7001, -19309, -17844, -14063, 18627, 4528, 28596,
+ -18610, -12350, -20944, 9729, 14242, -16603, 12887, 18354, -5085, 28544, 8031, 830, 738,
+ -10755, -25779, -2394, -5466, -13886, 16489, 12372, 23708, -2250, -6364, 12926, -2595, 17509,
+ -20274, -1989, -20748, 3278, 3892, 6190, -7469, -4044, 16185, 5436, 2895};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/test_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/test_data.h
new file mode 100644
index 0000000..a649371
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/test_data.h
@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#include "config_data.h"
+#include "input1_data.h"
+#include "input2_data.h"
+#include "output_ref_data.h"
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/config_data.h
new file mode 100644
index 0000000..d0fd23e
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/config_data.h
@@ -0,0 +1,10 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#define MUL_DST_SIZE 160
+#define MUL_OUT_ACTIVATION_MIN -128
+#define MUL_OUT_ACTIVATION_MAX 127
+#define MUL_INPUT1_OFFSET 128
+#define MUL_INPUT2_OFFSET 128
+#define MUL_OUTPUT_MULT 1077952640
+#define MUL_OUTPUT_SHIFT -7
+#define MUL_OUTPUT_OFFSET -128
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/input1_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/input1_data.h
new file mode 100644
index 0000000..831a687
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/input1_data.h
@@ -0,0 +1,13 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t mul_input1[160] = {
+ 6, 46, 2, -15, -69, 15, 110, 117, -40, -47, -9, -17, 126, -108, 8, -42, -9, -17, 0, -126,
+ 111, 11, 105, 97, -5, -103, 45, -27, 69, -107, 16, -51, 122, 38, -79, 85, -45, 26, 69, 3,
+ 74, -3, -96, -81, 124, 76, 114, -119, 111, 15, 35, -73, -15, -21, 36, -74, -28, 3, 54, -58,
+ 87, -83, 102, 43, 0, -81, 51, -98, -9, -5, 72, 102, 69, 7, -35, -40, -118, -66, 20, 100,
+ -78, 59, 64, -99, -28, 31, 64, 83, -4, 107, -67, 113, -26, 114, 37, -50, -21, -11, -104, -54,
+ 123, -96, -106, -55, -123, 123, 72, -53, 83, -110, 92, -120, -76, -27, 59, 99, 0, 78, 94, 124,
+ -103, -57, -15, -73, 114, 47, -27, -70, 106, 90, 0, -92, -47, 114, 65, -120, 62, 105, -100, -33,
+ 41, -116, -18, 51, -81, -109, -76, 77, 34, -21, 123, 57, -45, 61, -11, 75, -7, -94, -56, -43};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/input2_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/input2_data.h
new file mode 100644
index 0000000..be6665f
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/input2_data.h
@@ -0,0 +1,13 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t mul_input2[160] = {
+ 32, -45, -125, 113, 18, 48, 42, -13, 89, -37, 0, -102, -113, 126, -109, -89, 59, 104, -124, -75,
+ 72, -99, 82, 96, -74, 98, 91, -4, 25, 18, -109, -26, -45, -49, -59, 102, -60, -113, -103, 37,
+ 66, 43, 84, 68, 62, -6, -91, 40, -59, 18, -19, -16, 68, 5, -39, -109, -33, 114, -119, 4,
+ -37, -6, 30, -62, -97, -19, -18, -45, 25, -10, -58, 95, 43, -69, -30, -95, -52, 119, -100, -55,
+ -31, -103, -119, 28, -27, -84, 87, -38, 7, -81, -48, -39, -105, 2, 109, -121, -65, -4, -79, -44,
+ -97, -97, 123, 65, -24, -19, 102, 26, -114, -83, 21, 42, -65, -12, -95, -75, 62, -26, 98, -83,
+ 109, 124, -41, -36, -71, 34, 40, 50, 103, 43, 110, 2, -19, -18, 109, 60, 60, -13, -30, 5,
+ 57, 119, -40, -92, 15, 27, 80, -115, 92, 89, -15, 78, -35, -56, 22, -60, 126, 25, 19, 42};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/output_ref_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/output_ref_data.h
new file mode 100644
index 0000000..81a63d7
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/output_ref_data.h
@@ -0,0 +1,14 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t mul_output_ref[160] = {
+ -44, -71, -126, -21, -94, -29, 31, -18, -53, -99, -68, -117, -113, -108, -118, -115, -41, -27,
+ -126, -128, 59, -112, 64, 70, -102, -106, 21, -79, -10, -116, -117, -97, -47, -77, -115, 64,
+ -106, -119, -109, -43, 26, -44, -101, -92, 60, -30, -93, -122, -63, -46, -58, -104, -41, -72,
+ -71, -124, -91, -4, -122, -92, -51, -106, 15, -84, -112, -108, -51, -118, -57, -71, -73, 73,
+ 4, -97, -92, -117, -125, -68, -112, -63, -109, -110, -121, -110, -88, -101, 34, -54, -62, -85,
+ -109, -44, -119, -5, 25, -126, -102, -71, -123, -104, -97, -124, -106, -73, -126, -21, 52, -83,
+ -116, -125, 1, -123, -115, -82, -104, -81, -33, -46, 69, -84, -105, -58, -89, -108, -74, -17,
+ -61, -88, 84, 18, -9, -110, -93, -24, 51, -122, 12, -23, -117, -78, -5, -116, -90, -103,
+ -102, -116, -86, -118, 12, -37, -17, 21, -98, -75, -59, -74, -7, -108, -86, -71};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/test_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/test_data.h
new file mode 100644
index 0000000..a649371
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/test_data.h
@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#include "config_data.h"
+#include "input1_data.h"
+#include "input2_data.h"
+#include "output_ref_data.h"
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/config_data.h
new file mode 100644
index 0000000..6b32b0e
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/config_data.h
@@ -0,0 +1,10 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#define MUL_S16_DST_SIZE 160
+#define MUL_S16_OUT_ACTIVATION_MIN -32768
+#define MUL_S16_OUT_ACTIVATION_MAX 32767
+#define MUL_S16_INPUT1_OFFSET 0
+#define MUL_S16_INPUT2_OFFSET 0
+#define MUL_S16_OUTPUT_MULT 1073774592
+#define MUL_S16_OUTPUT_SHIFT -14
+#define MUL_S16_OUTPUT_OFFSET 0
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/input1_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/input1_data.h
new file mode 100644
index 0000000..36a89b2
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/input1_data.h
@@ -0,0 +1,17 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t mul_s16_input1[160] = {
+ -1425, 27249, -7046, 3719, -19670, -26904, -25945, 2021, 13923, 21927, -3343, -19495, -8453, -13512,
+ 9689, 8383, 25714, 3158, 6739, 7599, 15065, 16663, -23324, 29807, 4226, -18832, -18543, 9583,
+ -17496, -5655, 32749, 24206, 20579, 28566, -30740, 6501, 18711, 1566, -27646, -13658, -15189, -31638,
+ -5636, 13106, 25481, 8109, 16091, 30345, 25854, -14060, -20454, 7801, 12606, 804, 28422, 22058,
+ -4720, 24359, 20202, -23638, 23525, 25593, 10567, -7187, -26207, -25133, -25502, -21471, 11571, -8240,
+ -31694, 10996, -4015, 24481, -19931, -309, -914, -30181, 23105, -10692, -6850, -5861, 31567, -9949,
+ -20729, 12649, -19335, -7511, 698, -20160, -26562, 29209, 13853, 10867, 24692, -21502, -30442, 28284,
+ -14515, -15438, -25245, 27414, -4386, -21359, -13374, -647, 8589, -15109, 20957, 11001, 4636, -30990,
+ 5427, 30421, -4869, 12688, 10684, -30590, -30499, -21326, -16252, -28330, -13708, 29795, -12822, -3746,
+ 23881, -31519, -26328, 5485, 16914, -4177, -5471, -32164, 17953, 6862, -12701, 12773, -27457, -9879,
+ -20541, 13002, 15735, -19242, -19553, 23501, 12037, -22168, 27626, 18618, 21439, -6484, 24511, 6074,
+ -16736, 0, -13506, 24840, 30247, -18985};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/input2_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/input2_data.h
new file mode 100644
index 0000000..7cc4cf6
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/input2_data.h
@@ -0,0 +1,17 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t mul_s16_input2[160] = {
+ -4777, -30226, 3051, 4274, 30678, 3673, 17187, -30462, 7490, 14823, -9932, -28926, -740, 7841,
+ -12204, 5028, -2685, -32293, 27555, -22323, 31354, -2100, 19764, 10868, -26916, -28166, -32509, 13549,
+ 27209, -17218, 30989, -19852, -13293, -13657, 21654, -32715, 5469, 5795, -25927, -24215, 20240, 17452,
+ -2459, -1473, -5458, 16506, -15165, 25266, -16148, 32624, 1546, 27376, 24496, 27592, 14476, -30425,
+ 31486, 28787, 6741, 31914, 5728, 26542, 4854, 20493, -8639, 25604, -13944, -28957, 30907, -1290,
+ 3628, 14515, 24701, -11557, 27890, -21298, -9051, 15865, 7718, -7519, -10889, 25364, 23520, -22227,
+ -9340, -30743, 30721, 22955, -2855, -9062, -11719, 44, -10952, 4721, -15929, -2097, -2296, -20951,
+ 7745, 12840, -12142, -31361, -24462, 1189, 7879, -13983, 32056, 15328, -4250, 6809, -657, 14083,
+ -4874, 19434, -12503, 28862, 6276, -31155, 14620, 13514, -3782, -30085, -30330, 15038, 22304, -10560,
+ 13643, -9474, -3764, -20372, 12631, 13734, -14105, 27123, 19468, 26588, 8712, 21983, -12188, 6357,
+ -7507, 25818, -18519, -32423, 20348, -25467, -20716, -16944, -19410, -24703, 21618, -10760, 16487, -10720,
+ -16965, 9039, -20130, 20896, -27880, 14498};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/output_ref_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/output_ref_data.h
new file mode 100644
index 0000000..379788d
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/output_ref_data.h
@@ -0,0 +1,17 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t mul_s16_output_ref[160] = {
+ 208, -25136, -656, 485, -18416, -3016, -13609, -1879, 3183, 9919, 1013, 17210, 191, -3233,
+ -3609, 1286, -2107, -3112, 5667, -5177, 14415, -1068, -14068, 9886, -3471, 16188, 18397, 3963,
+ -14528, 2972, 30972, -14665, -8349, -11906, -20314, -6491, 3123, 277, 21875, 10093, -9382, -16851,
+ 423, -589, -4244, 4085, -7447, 23398, -12741, -13999, -965, 6518, 9424, 677, 12556, -20481,
+ -4535, 21400, 4156, -23023, 4112, 20731, 1565, -4495, 6909, -19639, 10852, 18974, 10914, 324,
+ -3509, 4871, -3027, -8635, -16964, 201, 252, -14613, 5442, 2453, 2276, -4537, 22659, 6749,
+ 5909, -11868, -18128, -5262, -61, 5575, 9500, 39, -4630, 1566, -12004, 1376, 2133, -18085,
+ -3431, -6049, 9355, -26238, 3274, -775, -3216, 276, 8403, -7068, -2718, 2286, -93, -13319,
+ -807, 18043, 1858, 11176, 2046, 29085, -13608, -8795, 1876, 26011, 12688, 13674, -8728, 1207,
+ 9943, 9113, 3024, -3410, 6520, -1751, 2355, -26624, 10666, 5568, -3377, 8569, 10213, -1917,
+ 4706, 10245, -8893, 19040, -12142, -18265, -7610, 11463, -16365, -14036, 14144, 2129, 12333, -1987,
+ 8665, 0, 8297, 15841, -25736, -8400};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/test_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/test_data.h
new file mode 100644
index 0000000..a649371
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/test_data.h
@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#include "config_data.h"
+#include "input1_data.h"
+#include "input2_data.h"
+#include "output_ref_data.h"
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/CMakeLists.txt
new file mode 100644
index 0000000..6442420
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2022 Arm Limited or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_elementwise_add_s16)
+
+target_sources(test_arm_elementwise_add_s16 PRIVATE
+ Unity/unity_test_arm_elementwise_add_s16.c
+ Unity/TestRunner/unity_test_arm_elementwise_add_s16_runner.c)
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/Unity/unity_test_arm_elementwise_add_s16.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/Unity/unity_test_arm_elementwise_add_s16.c
new file mode 100644
index 0000000..51709c7
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/Unity/unity_test_arm_elementwise_add_s16.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_elementwise_add_s16.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+ uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_add_s16_arm_elementwise_add_s16(void) { add_s16_arm_elementwise_add_s16(); }
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/test_arm_elementwise_add_s16.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/test_arm_elementwise_add_s16.c
new file mode 100644
index 0000000..a9af146
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/test_arm_elementwise_add_s16.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_nnfunctions.h"
+#include "unity.h"
+
+#include "../TestData/add_s16/test_data.h"
+#include "../Utils/validate.h"
+
+void add_s16_arm_elementwise_add_s16(void)
+{
+ const arm_status expected = ARM_MATH_SUCCESS;
+ int16_t output[ADD_S16_DST_SIZE] = {0};
+
+ const int16_t *input_data1 = add_s16_input1;
+ const int16_t *input_data2 = add_s16_input2;
+
+ const int32_t input_1_mult = ADD_S16_INPUT1_MULT;
+ const int32_t input_1_shift = ADD_S16_INPUT1_SHIFT;
+ const int32_t input_1_offset = ADD_S16_INPUT1_OFFSET;
+ const int32_t input_2_mult = ADD_S16_INPUT2_MULT;
+ const int32_t input_2_shift = ADD_S16_INPUT2_SHIFT;
+ const int32_t input_2_offset = ADD_S16_INPUT2_OFFSET;
+
+ const int32_t left_shift = ADD_S16_LEFT_SHIFT;
+
+ const int32_t out_offset = ADD_S16_OUTPUT_OFFSET;
+ const int32_t out_mult = ADD_S16_OUTPUT_MULT;
+ const int32_t out_shift = ADD_S16_OUTPUT_SHIFT;
+
+ const int32_t out_activation_min = ADD_S16_OUT_ACTIVATION_MIN;
+ const int32_t out_activation_max = ADD_S16_OUT_ACTIVATION_MAX;
+
+ arm_status result = arm_elementwise_add_s16(input_data1,
+ input_data2,
+ input_1_offset,
+ input_1_mult,
+ input_1_shift,
+ input_2_offset,
+ input_2_mult,
+ input_2_shift,
+ left_shift,
+ output,
+ out_offset,
+ out_mult,
+ out_shift,
+ out_activation_min,
+ out_activation_max,
+ ADD_S16_DST_SIZE);
+
+ TEST_ASSERT_EQUAL(expected, result);
+ TEST_ASSERT_TRUE(validate_s16(output, add_s16_output_ref, ADD_S16_DST_SIZE));
+}
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/CMakeLists.txt
new file mode 100644
index 0000000..a87c4df
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2022 Arm Limited or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_elementwise_add_s8)
+
+target_sources(test_arm_elementwise_add_s8 PRIVATE
+ Unity/unity_test_arm_elementwise_add_s8.c
+ Unity/TestRunner/unity_test_arm_elementwise_add_s8_runner.c)
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/Unity/unity_test_arm_elementwise_add_s8.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/Unity/unity_test_arm_elementwise_add_s8.c
new file mode 100644
index 0000000..2971660
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/Unity/unity_test_arm_elementwise_add_s8.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_elementwise_add_s8.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+ uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_add_arm_elementwise_add_s8(void) { add_arm_elementwise_add_s8(); }
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/test_arm_elementwise_add_s8.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/test_arm_elementwise_add_s8.c
new file mode 100644
index 0000000..d62a567
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/test_arm_elementwise_add_s8.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_nnfunctions.h"
+#include "unity.h"
+
+#include "../TestData/add/test_data.h"
+#include "../Utils/validate.h"
+
+void add_arm_elementwise_add_s8(void)
+{
+ const arm_status expected = ARM_MATH_SUCCESS;
+ int8_t output[ADD_DST_SIZE] = {0};
+
+ const int8_t *input_data1 = add_input1;
+ const int8_t *input_data2 = add_input2;
+
+ const int32_t input_1_mult = ADD_INPUT1_MULT;
+ const int32_t input_1_shift = ADD_INPUT1_SHIFT;
+ const int32_t input_1_offset = ADD_INPUT1_OFFSET;
+ const int32_t input_2_mult = ADD_INPUT2_MULT;
+ const int32_t input_2_shift = ADD_INPUT2_SHIFT;
+ const int32_t input_2_offset = ADD_INPUT2_OFFSET;
+
+ const int32_t left_shift = ADD_LEFT_SHIFT;
+
+ const int32_t out_offset = ADD_OUTPUT_OFFSET;
+ const int32_t out_mult = ADD_OUTPUT_MULT;
+ const int32_t out_shift = ADD_OUTPUT_SHIFT;
+
+ const int32_t out_activation_min = ADD_OUT_ACTIVATION_MIN;
+ const int32_t out_activation_max = ADD_OUT_ACTIVATION_MAX;
+
+ arm_status result = arm_elementwise_add_s8(input_data1,
+ input_data2,
+ input_1_offset,
+ input_1_mult,
+ input_1_shift,
+ input_2_offset,
+ input_2_mult,
+ input_2_shift,
+ left_shift,
+ output,
+ out_offset,
+ out_mult,
+ out_shift,
+ out_activation_min,
+ out_activation_max,
+ ADD_DST_SIZE);
+
+ TEST_ASSERT_EQUAL(expected, result);
+ TEST_ASSERT_TRUE(validate(output, add_output_ref, ADD_DST_SIZE));
+}
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/CMakeLists.txt
new file mode 100644
index 0000000..f3f5bbc
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2022 Arm Limited or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_elementwise_mul_s16)
+
+target_sources(test_arm_elementwise_mul_s16 PRIVATE
+ Unity/unity_test_arm_elementwise_mul_s16.c
+ Unity/TestRunner/unity_test_arm_elementwise_mul_s16_runner.c)
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/Unity/unity_test_arm_elementwise_mul_s16.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/Unity/unity_test_arm_elementwise_mul_s16.c
new file mode 100644
index 0000000..940dd59
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/Unity/unity_test_arm_elementwise_mul_s16.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_elementwise_mul_s16.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+ uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_mul_s16_arm_elementwise_mul_s16(void) { mul_s16_arm_elementwise_mul_s16(); }
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/test_arm_elementwise_mul_s16.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/test_arm_elementwise_mul_s16.c
new file mode 100644
index 0000000..4b71dbc
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/test_arm_elementwise_mul_s16.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_nnfunctions.h"
+#include "unity.h"
+
+#include "../TestData/mul_s16/test_data.h"
+#include "../Utils/validate.h"
+
+void mul_s16_arm_elementwise_mul_s16(void)
+{
+ const arm_status expected = ARM_MATH_SUCCESS;
+ int16_t output[MUL_S16_DST_SIZE] = {0};
+
+ const int16_t *input_data1 = mul_s16_input1;
+ const int16_t *input_data2 = mul_s16_input2;
+
+ const int32_t input_1_offset = MUL_S16_INPUT1_OFFSET;
+ const int32_t input_2_offset = MUL_S16_INPUT2_OFFSET;
+
+ const int32_t out_offset = MUL_S16_OUTPUT_OFFSET;
+ const int32_t out_mult = MUL_S16_OUTPUT_MULT;
+ const int32_t out_shift = MUL_S16_OUTPUT_SHIFT;
+
+ const int32_t out_activation_min = MUL_S16_OUT_ACTIVATION_MIN;
+ const int32_t out_activation_max = MUL_S16_OUT_ACTIVATION_MAX;
+
+ arm_status result = arm_elementwise_mul_s16(input_data1,
+ input_data2,
+ input_1_offset,
+ input_2_offset,
+ output,
+ out_offset,
+ out_mult,
+ out_shift,
+ out_activation_min,
+ out_activation_max,
+ MUL_S16_DST_SIZE);
+
+ TEST_ASSERT_EQUAL(expected, result);
+ TEST_ASSERT_TRUE(validate_s16(output, mul_s16_output_ref, MUL_S16_DST_SIZE));
+}
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/CMakeLists.txt
new file mode 100644
index 0000000..438f261
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2022 Arm Limited or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_elementwise_mul_s8)
+
+target_sources(test_arm_elementwise_mul_s8 PRIVATE
+ Unity/unity_test_arm_elementwise_mul_s8.c
+ Unity/TestRunner/unity_test_arm_elementwise_mul_s8_runner.c)
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/Unity/unity_test_arm_elementwise_mul_s8.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/Unity/unity_test_arm_elementwise_mul_s8.c
new file mode 100644
index 0000000..26cdbff
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/Unity/unity_test_arm_elementwise_mul_s8.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_elementwise_mul_s8.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+ uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_mul_arm_elementwise_mul_s8(void) { mul_arm_elementwise_mul_s8(); }
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/test_arm_elementwise_mul_s8.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/test_arm_elementwise_mul_s8.c
new file mode 100644
index 0000000..dac9e81
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/test_arm_elementwise_mul_s8.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_nnfunctions.h"
+#include "unity.h"
+
+#include "../TestData/mul/test_data.h"
+#include "../Utils/validate.h"
+
+void mul_arm_elementwise_mul_s8(void)
+{
+ const arm_status expected = ARM_MATH_SUCCESS;
+ int8_t output[MUL_DST_SIZE] = {0};
+
+ const int8_t *input_data1 = mul_input1;
+ const int8_t *input_data2 = mul_input2;
+
+ const int32_t input_1_offset = MUL_INPUT1_OFFSET;
+ const int32_t input_2_offset = MUL_INPUT2_OFFSET;
+
+ const int32_t out_offset = MUL_OUTPUT_OFFSET;
+ const int32_t out_mult = MUL_OUTPUT_MULT;
+ const int32_t out_shift = MUL_OUTPUT_SHIFT;
+
+ const int32_t out_activation_min = MUL_OUT_ACTIVATION_MIN;
+ const int32_t out_activation_max = MUL_OUT_ACTIVATION_MAX;
+
+ arm_status result = arm_elementwise_mul_s8(input_data1,
+ input_data2,
+ input_1_offset,
+ input_2_offset,
+ output,
+ out_offset,
+ out_mult,
+ out_shift,
+ out_activation_min,
+ out_activation_max,
+ MUL_DST_SIZE);
+
+ TEST_ASSERT_EQUAL(expected, result);
+ TEST_ASSERT_TRUE(validate(output, mul_output_ref, MUL_DST_SIZE));
+}
diff --git a/CMSIS/NN/Tests/UnitTest/generate_test_data.py b/CMSIS/NN/Tests/UnitTest/generate_test_data.py
index e018c47..9202b25 100755
--- a/CMSIS/NN/Tests/UnitTest/generate_test_data.py
+++ b/CMSIS/NN/Tests/UnitTest/generate_test_data.py
@@ -57,8 +57,8 @@
parser.add_argument('-a', '--regenerate-all', action='store_true', help="Regenerate and store all data.")
parser.add_argument('-t', '--testtype', type=str, default=None, choices=['conv', 'depthwise_conv', 'avgpool',
'maxpool', 'fully_connected', 'softmax',
- 'svdf'],
- help='Type of test.')
+ 'svdf', 'add', 'mul'],
+ help='Type of test. There are the operators that have unit tests.')
parser.add_argument('--run-all-testsets', action='store_true', help="Run the script for all existing test "
"sets. Regenerate all, partially all or no input data (output may still change, depending on"
" changes in script) depending on regenerate flags. If used together with the -t flag, only"
@@ -363,24 +363,31 @@
significand_q31 = round(significand * (1 << 31))
return significand_q31, shift
- def get_convolving_calib_data_func(self):
+ def get_convolving_calib_data_func(self, n_inputs):
def representative_data_gen():
- # testset = np.random.rand(self.batches, self.y_input, self.x_input, self.input_ch).astype('float32')
- testset = np.ones((self.batches, self.y_input, self.x_input, self.input_ch), dtype=np.float32)
- yield [testset]
+ representative_testsets = []
+ if n_inputs > 0:
+ for i in range(n_inputs):
+ representative_testsets.append(np.ones((self.batches, self.y_input, self.x_input, self.input_ch),
+ dtype=np.float32))
+ yield representative_testsets
+ else:
+ raise RuntimeError("Invalid number of representative test sets: {}. Must be more than 0".
+ format(self.test_type))
return representative_data_gen
- def convert_and_interpret(self, model, input_data, inttype):
+ def convert_and_interpret(self, model, inttype, input_data=None):
"""
Compile and convert a model to Tflite format, run interpreter and allocate tensors.
"""
model.compile(loss=tf.keras.losses.categorical_crossentropy,
optimizer=tf.keras.optimizers.Adam(),
metrics=['accuracy'])
+ n_inputs = len(model.inputs)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
- converter.representative_dataset = self.get_convolving_calib_data_func()
+ converter.representative_dataset = self.get_convolving_calib_data_func(n_inputs)
if self.is_int16xint8:
converter.target_spec.supported_ops = [
tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]
@@ -398,14 +405,15 @@
model_path=str(self.model_path_tflite), experimental_op_resolver_type=OpResolverType.BUILTIN_REF)
interpreter.allocate_tensors()
- input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
-
- (self.input_scale, self.input_zero_point) = input_details[0]['quantization']
(self.output_scale, self.output_zero_point) = output_details[0]['quantization']
- # Set input tensors
- interpreter.set_tensor(input_details[0]["index"], tf.cast(input_data, inttype))
+ if input_data is not None:
+ input_details = interpreter.get_input_details()
+ (self.input_scale, self.input_zero_point) = input_details[0]['quantization']
+
+ # Set input tensors
+ interpreter.set_tensor(input_details[0]["index"], tf.cast(input_data, inttype))
return interpreter
@@ -511,8 +519,7 @@
input_shape=input_shape[1:], dilation_rate=(self.dilation_y, self.dilation_x))
model.add(depthwise_layer)
depthwise_layer.set_weights([weights, biases])
-
- interpreter = self.convert_and_interpret(model, input_data, inttype)
+ interpreter = self.convert_and_interpret(model, inttype, input_data)
all_layers_details = interpreter.get_tensor_details()
filter_layer = all_layers_details[1]
@@ -580,7 +587,7 @@
else:
raise RuntimeError("Wrong test type")
- interpreter = self.convert_and_interpret(model, input_data, inttype)
+ interpreter = self.convert_and_interpret(model, inttype, input_data)
output_details = interpreter.get_output_details()
self.set_output_dims_and_padding(output_details[0]['shape'][2], output_details[0]['shape'][1])
@@ -671,7 +678,7 @@
model.add(fully_connected_layer)
fully_connected_layer.set_weights([weights, biases])
- interpreter = self.convert_and_interpret(model, input_data, inttype)
+ interpreter = self.convert_and_interpret(model, inttype, input_data)
all_layers_details = interpreter.get_tensor_details()
if self.is_int16xint8:
@@ -766,7 +773,7 @@
input_shape = (self.y_input, self.x_input)
model.add(tf.keras.layers.Softmax(input_shape=input_shape[1:]))
- interpreter = self.convert_and_interpret(model, input_data, tf.int8)
+ interpreter = self.convert_and_interpret(model, tf.int8, input_data)
self.calc_softmax_params()
@@ -989,6 +996,110 @@
return generated_json_file
+class AddMulSettings(TestSettings):
+
+ def __init__(self, dataset, testtype, args, channels=1, x_in=4, y_in=4, decimal_input=6, randmin=INT8_MIN,
+ randmax=INT8_MAX, out_activation_min=INT8_MIN, out_activation_max=INT8_MAX, int16xint8=False):
+ super().__init__(dataset, testtype, args, in_ch=channels, out_ch=channels, x_in=x_in, y_in=y_in, w_x=1, w_y=1,
+ stride_x=1, stride_y=1, pad=False, randmin=randmin, randmax=randmax, batches=1,
+ generate_bias=False, relu6=False, out_activation_min=out_activation_min,
+ out_activation_max=out_activation_max, int16xint8=int16xint8)
+
+ self.x_input = self.x_output = x_in
+ self.y_input = self.y_output = y_in
+ self.decimal_input = decimal_input
+
+ self.left_shift = 15 if self.is_int16xint8 else 20
+
+ def generate_data(self, input_data1=None, input_data2=None):
+ input_shape = (1, self.y_input, self.x_input, self.input_ch)
+
+ input_data1 = self.get_randomized_data(list(input_shape),
+ self.inputs_table_file,
+ regenerate=self.regenerate_new_input,
+ decimals=self.decimal_input)
+ input_data2 = self.get_randomized_data(list(input_shape),
+ self.kernel_table_file,
+ regenerate=self.regenerate_new_weights,
+ decimals=self.decimal_input)
+
+ if self.is_int16xint8:
+ inttype = "int16_t"
+ inttype_tf = tf.int16
+ else:
+ inttype = "int8_t"
+ inttype_tf = tf.int8
+
+ # Create a one-layer functional Keras model as add/mul cannot use a sequntial Keras model.
+ input1 = tf.keras.layers.Input(shape=input_shape[1:])
+ input2 = tf.keras.layers.Input(shape=input_shape[1:])
+ if self.test_type == 'add':
+ layer = tf.keras.layers.Add()([input1, input2])
+ elif self.test_type == 'mul':
+ layer = tf.keras.layers.Multiply()([input1, input2])
+ else:
+ raise RuntimeError("Wrong test type")
+ out = tf.keras.layers.Lambda(function=lambda x: x)(layer)
+ model = tf.keras.models.Model(inputs=[input1, input2], outputs=out)
+
+ interpreter = self.convert_and_interpret(model, inttype_tf)
+
+ input_details = interpreter.get_input_details()
+ interpreter.set_tensor(input_details[0]["index"], tf.cast(input_data1, inttype_tf))
+ interpreter.set_tensor(input_details[1]["index"], tf.cast(input_data2, inttype_tf))
+
+ # Calculate multipliers, shifts and offsets.
+ (input1_scale, self.input1_zero_point) = input_details[0]['quantization']
+ (input2_scale, self.input2_zero_point) = input_details[1]['quantization']
+ self.input1_zero_point = -self.input1_zero_point
+ self.input2_zero_point = -self.input2_zero_point
+ double_max_input_scale = max(input1_scale, input2_scale) * 2
+ (self.input1_mult, self.input1_shift) = self.quantize_scale(input1_scale/double_max_input_scale)
+ (self.input2_mult, self.input2_shift) = self.quantize_scale(input2_scale/double_max_input_scale)
+
+ if self.test_type == 'add':
+ actual_output_scale = double_max_input_scale / ((1 << self.left_shift) * self.output_scale)
+ elif self.test_type == 'mul':
+ actual_output_scale = input1_scale * input2_scale / self.output_scale
+ (self.output_mult, self.output_shift) = self.quantize_scale(actual_output_scale)
+
+ # Generate reference.
+ interpreter.invoke()
+ output_details = interpreter.get_output_details()
+ output_data = interpreter.get_tensor(output_details[0]["index"])
+ self.generate_c_array("input1", input_data1, datatype=inttype)
+ self.generate_c_array("input2", input_data2, datatype=inttype)
+ self.generate_c_array("output_ref", np.clip(output_data, self.out_activation_min, self.out_activation_max),
+ datatype=inttype)
+
+ self.write_c_config_header()
+ self.write_c_header_wrapper()
+
+ def write_c_config_header(self):
+ super().write_c_config_header(write_common_parameters=False)
+
+ filename = self.config_data
+ filepath = self.headers_dir + filename
+ prefix = self.testdataset.upper()
+
+ with open(filepath, "a") as f:
+ f.write("#define {}_DST_SIZE {}\n".format(prefix,
+ self.batches * self.y_input * self.x_input * self.input_ch))
+ f.write("#define {}_OUT_ACTIVATION_MIN {}\n".format(prefix, self.out_activation_min))
+ f.write("#define {}_OUT_ACTIVATION_MAX {}\n".format(prefix, self.out_activation_max))
+ f.write("#define {}_INPUT1_OFFSET {}\n".format(prefix, self.input1_zero_point))
+ f.write("#define {}_INPUT2_OFFSET {}\n".format(prefix, self.input2_zero_point))
+ f.write("#define {}_OUTPUT_MULT {}\n".format(prefix, self.output_mult))
+ f.write("#define {}_OUTPUT_SHIFT {}\n".format(prefix, self.output_shift))
+ f.write("#define {}_OUTPUT_OFFSET {}\n".format(prefix, self.output_zero_point))
+ if self.test_type == 'add':
+ f.write("#define {}_LEFT_SHIFT {}\n".format(prefix, self.left_shift))
+ f.write("#define {}_INPUT1_SHIFT {}\n".format(prefix, self.input1_shift))
+ f.write("#define {}_INPUT2_SHIFT {}\n".format(prefix, self.input2_shift))
+ f.write("#define {}_INPUT1_MULT {}\n".format(prefix, self.input1_mult))
+ f.write("#define {}_INPUT2_MULT {}\n".format(prefix, self.input2_mult))
+
+
def load_all_testdatasets():
"""
Add all new testdata sets here
@@ -1235,21 +1346,34 @@
dataset = 'svdf'
ALL_TESTDATA_SETS[dataset] = SVDFSettings(dataset, type_of_test, args, batches=2, number_inputs=2, rank=8,
memory_size=8, input_size=3, number_units=3)
- type_of_test = 'svdf'
dataset = 'svdf_1'
ALL_TESTDATA_SETS[dataset] = SVDFSettings(dataset, type_of_test, args, batches=3, number_inputs=2, rank=1,
memory_size=2, input_size=7, number_units=5)
-
- type_of_test = 'svdf'
dataset = 'svdf_2'
ALL_TESTDATA_SETS[dataset] = SVDFSettings(dataset, type_of_test, args, batches=3, number_inputs=2, rank=2,
memory_size=2, input_size=7, number_units=5, generate_bias=False)
-
- type_of_test = 'svdf'
dataset = 'svdf_3'
ALL_TESTDATA_SETS[dataset] = SVDFSettings(dataset, type_of_test, args, batches=1, number_inputs=2, rank=1,
memory_size=2, input_size=20, number_units=12, generate_bias=False)
+ type_of_test = 'add'
+ dataset = 'add'
+ ALL_TESTDATA_SETS[dataset] = AddMulSettings(dataset, type_of_test, args, channels=8, x_in=4, y_in=4,
+ randmin=INT8_MIN, randmax=INT8_MAX)
+ dataset = 'add_s16'
+ ALL_TESTDATA_SETS[dataset] = AddMulSettings(dataset, type_of_test, args, channels=8, x_in=4, y_in=4,
+ randmin=INT16_MIN, randmax=INT16_MAX, out_activation_min=INT16_MIN,
+ out_activation_max=INT16_MAX, int16xint8=True)
+
+ type_of_test = 'mul'
+ dataset = 'mul'
+ ALL_TESTDATA_SETS[dataset] = AddMulSettings(dataset, type_of_test, args, channels=8, x_in=4, y_in=5,
+ randmin=INT8_MIN, randmax=INT8_MAX)
+ dataset = 'mul_s16'
+ ALL_TESTDATA_SETS[dataset] = AddMulSettings(dataset, type_of_test, args, channels=8, x_in=5, y_in=4,
+ randmin=INT16_MIN, randmax=INT16_MAX, out_activation_min=INT16_MIN,
+ out_activation_max=INT16_MAX, int16xint8=True)
+
if __name__ == '__main__':
if version.parse(tf.__version__) < REQUIRED_MINIMUM_TENSORFLOW_VERSION:
@@ -1295,6 +1419,8 @@
generator = SoftmaxSettings(testdataset, test_type, args)
elif args.testtype == 'svdf':
generator = SVDFSettings(testdataset, test_type, args)
+ elif args.testtype == 'add' or args.testtype == 'mul':
+ generator = AddMulSettings(testdataset, test_type, args)
else:
raise RuntimeError("Please specify type of test with -t")
generator.generate_data()