CMSIS-NN: Add dilation support for generic depthwise conv (#1384)

* CMSIS-NN: Add dilation support for generic depthwise conv

Change-Id: I5731a3de681c7618e6d4408d117748bb5f384ae8

* CMSIS-NN: Also call wrapper in depthwise dilation unit test
diff --git a/ARM.CMSIS.pdsc b/ARM.CMSIS.pdsc
index 8f88f40..b6692de 100644
--- a/ARM.CMSIS.pdsc
+++ b/ARM.CMSIS.pdsc
@@ -14,7 +14,8 @@
       CMSIS-NN: 3.1.0 (see revision history for details)
        - Support for int16 convolution and fully connected for reference implementation
        - Support for DSP extension optimization for int16 convolution and fully connected
-       - Support for dilation for int8 convolution
+       - Support dilation for int8 convolution
+       - Support dilation for int8 depthwise convolution
       CMSIS-RTOS2:
         - RTX 5.5.4 (see revision history for details)
     </release>
diff --git a/CMSIS/DoxyGen/NN/src/history.txt b/CMSIS/DoxyGen/NN/src/history.txt
index 8807595..208429f 100644
--- a/CMSIS/DoxyGen/NN/src/history.txt
+++ b/CMSIS/DoxyGen/NN/src/history.txt
@@ -20,6 +20,7 @@
       <li> Added fully_connected int16 DSP implementation </li>
       <li> Added unit tests for int16 fully_connected kernel </li>
       <li> Added dilation support for int8 conv kernel </li>
+      <li> Added dilation support for int8 depthwise conv kernel </li>
       </ul>
     </td>
   </tr>
diff --git a/CMSIS/NN/README.md b/CMSIS/NN/README.md
index 39aaa13..e31efe8 100644
--- a/CMSIS/NN/README.md
+++ b/CMSIS/NN/README.md
@@ -27,9 +27,9 @@
 ||arm_convolve_s8()|CONV| None |4 * (ker_x * ker_y * input_ch + delta)| Yes | Yes |delta - MVE only|
 ||arm_convolve_1x1_s8_fast() | CONV | dilation = 1 <br/> ker_x = 1, ker_y = 1 <br/> pad = 0<br/> stride = 1<br/> input_ch % 4 = 0| No | Yes |Yes ||
 ||arm_convolve_1_x_n_s8() | CONV | dilation = 1 <br/> output_y % 4 = 0 | 4 * ker_x * ker_y * input_ch |Yes |Yes||
-|| arm_depthwise_conv_wrapper_s8()| DEPTHWISE_CONV | dilation = 1|n.a.| Yes| Yes| The additional memory required depends on the optimal convolution function called|
+|| arm_depthwise_conv_wrapper_s8()| DEPTHWISE_CONV | None |n.a.| Yes| Yes| The additional memory required depends on the optimal convolution function called|
 || arm_depthwise_conv_3x3_s8() | DEPTHWISE_CONV | dilation = 1 <br/> depth_multiplier = 1 <br/> pad_x <= 1 | No|No|No| Preferred function for 3x3 kernel size for DSP extension. </br> For MVE, use arm_depthwise_conv_s8_opt()||
-| | arm_depthwise_conv_s8() | DEPTHWISE_CONV | dilation = 1  | No|No|No||
+| | arm_depthwise_conv_s8() | DEPTHWISE_CONV | None | No|No|No||
 || arm_depthwise_conv_s8_opt()| DEPTHWISE_CONV | dilation = 1 <br/> depth_multiplier = 1 | DSP: 2 * ker_x * ker_y * input_ch <br/> MVE: 2 * DSP + 4 | Yes| Yes| Best case is when channels are multiple of 4 or <br/>at the least >= 4 |
 ||arm_convolve_wrapper_s16()|CONV|dilation = 1|n.a.| Yes | No |The additional memory required depends on the optimal convolution function called|
 ||arm_convolve_s16()|CONV|dilation = 1|No| No | No ||
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
index 82c7b2d..6836a16 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
@@ -21,8 +21,8 @@
  * Title:        arm_depthwise_conv_s8.c
  * Description:  s8 version of depthwise convolution.
  *
- * $Date:        05. Nov 2021
- * $Revision:    V.2.6.0
+ * $Date:        20. Dec 2021
+ * $Revision:    V.2.7.0
  *
  * Target Processor:  Cortex-M CPUs
  *
@@ -166,7 +166,10 @@
                                       const int32_t output_offset,
                                       const int32_t input_offset,
                                       const int32_t output_activation_min,
-                                      const int32_t output_activation_max)
+                                      const int32_t output_activation_max,
+                                      const uint16_t dilation_x,
+                                      const uint16_t dilation_y)
+
 {
     (void)output_ch;
     int i_out = 0;
@@ -186,12 +189,16 @@
                     {
                         const int idx_out_ch = i_ch_mult + i_input_ch * ch_mult;
                         int32_t acc_0 = 0;
-                        /* Condition for kernel start dimension: (base_idx_<x,y> + ker_<x,y>_start) >= 0 */
-                        const int ker_y_start = MAX(0, -base_idx_y);
-                        const int ker_x_start = MAX(0, -base_idx_x);
-                        /* Condition for kernel end dimension: (base_idx_<x,y> + ker_<x,y>_end) < input_<x,y> */
-                        const int ker_y_end = MIN(kernel_y, input_y - base_idx_y);
-                        const int ker_x_end = MIN(kernel_x, input_x - base_idx_x);
+
+                        const int32_t start_y_max = (-base_idx_y + dilation_y - 1) / dilation_y;
+                        const int32_t ker_y_start = MAX(0, start_y_max);
+                        const int32_t start_x_max = (-base_idx_x + dilation_x - 1) / dilation_x;
+                        const int32_t ker_x_start = MAX(0, start_x_max);
+                        const int32_t end_min_y = (input_y - base_idx_y + dilation_y - 1) / dilation_y;
+                        const int32_t ker_y_end = MIN(kernel_y, end_min_y);
+                        const int32_t end_min_x = (input_x - base_idx_x + dilation_x - 1) / dilation_x;
+                        const int32_t ker_x_end = MIN(kernel_x, end_min_x);
+
                         if (bias)
                         {
                             acc_0 = bias[idx_out_ch];
@@ -199,10 +206,10 @@
 
                         for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
                         {
-                            const int32_t idx_y = base_idx_y + i_ker_y;
+                            const int32_t idx_y = base_idx_y + dilation_y * i_ker_y;
                             for (int i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++)
                             {
-                                const int32_t idx_x = base_idx_x + i_ker_x;
+                                const int32_t idx_x = base_idx_x + dilation_x * i_ker_x;
                                 int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch;
                                 int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch;
 
@@ -245,11 +252,15 @@
                                  const cmsis_nn_dims *output_dims,
                                  q7_t *output)
 {
+    const uint16_t dilation_x = dw_conv_params->dilation.w;
+    const uint16_t dilation_y = dw_conv_params->dilation.h;
+
     (void)dw_conv_params->dilation;
     (void)bias_dims;
     (void)ctx;
 
-    if (dw_conv_params->ch_mult % 4 == 0 && input_dims->n == 1)
+    if (dw_conv_params->ch_mult % 4 == 0 && input_dims->n == 1 && dw_conv_params->dilation.w == 1 &&
+        dw_conv_params->dilation.h == 1)
     {
         depthwise_conv_s8_mult_4(input,
                                  input_dims->w,
@@ -300,7 +311,9 @@
                                   dw_conv_params->output_offset,
                                   dw_conv_params->input_offset,
                                   dw_conv_params->activation.min,
-                                  dw_conv_params->activation.max);
+                                  dw_conv_params->activation.max,
+                                  dilation_x,
+                                  dilation_y);
     }
 
     /* Return to application */
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c
index 8870b5f..23c8e46 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c
@@ -22,8 +22,8 @@
  * Description:  Wrapper API to select appropriate depthwise conv API based
  *               on dimensions.
  *
- * $Date:        11. May 2021
- * $Revision:    V.1.0.3
+ * $Date:        20. Dec 2021
+ * $Revision:    V.1.4.0
  *
  * Target Processor:  Cortex-M CPUs
  *
@@ -59,7 +59,8 @@
                                          q7_t *output)
 {
     arm_status status = ARM_MATH_SUCCESS;
-    if (1 == dw_conv_params->ch_mult && input_dims->n == 1)
+    if (1 == dw_conv_params->ch_mult && input_dims->n == 1 && dw_conv_params->dilation.w == 1 &&
+        dw_conv_params->dilation.h == 1)
     {
 #if !defined(ARM_MATH_MVEI)
         if ((filter_dims->w == 3) && (filter_dims->h == 3) && (dw_conv_params->padding.h <= 1) &&
@@ -120,7 +121,8 @@
     (void)dw_conv_params;
     int32_t size = 0;
 
-    if (input_dims->c == output_dims->c && input_dims->n == 1)
+    if (input_dims->c == output_dims->c && input_dims->n == 1 && dw_conv_params->dilation.w == 1 &&
+        dw_conv_params->dilation.h == 1)
     {
         size = arm_depthwise_conv_s8_opt_get_buffer_size(input_dims, filter_dims);
     }
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/depthwise_dilation/bias.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/depthwise_dilation/bias.txt
new file mode 100644
index 0000000..2871f59
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/depthwise_dilation/bias.txt
@@ -0,0 +1,2 @@
+# 9
+4.900000000000000000e+01,1.500000000000000000e+01,6.000000000000000000e+01,-9.800000000000000000e+01,4.600000000000000000e+01,1.230000000000000000e+02,-8.900000000000000000e+01,-2.000000000000000000e+00,-1.230000000000000000e+02
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/depthwise_dilation/input.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/depthwise_dilation/input.txt
new file mode 100644
index 0000000..06c6a92
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/depthwise_dilation/input.txt
@@ -0,0 +1,31 @@
+# 1,5,6,3
+7.200000000000000000e+01,2.800000000000000000e+01,-3.000000000000000000e+01
+-1.100000000000000000e+02,1.300000000000000000e+01,-9.600000000000000000e+01
+3.400000000000000000e+01,-9.500000000000000000e+01,9.400000000000000000e+01
+9.500000000000000000e+01,-9.800000000000000000e+01,-9.100000000000000000e+01
+2.100000000000000000e+01,1.000000000000000000e+01,-1.090000000000000000e+02
+-6.900000000000000000e+01,2.400000000000000000e+01,1.050000000000000000e+02
+-1.700000000000000000e+01,5.900000000000000000e+01,-3.000000000000000000e+01
+-1.140000000000000000e+02,-8.100000000000000000e+01,0.000000000000000000e+00
+1.040000000000000000e+02,-8.000000000000000000e+00,-3.000000000000000000e+01
+-5.800000000000000000e+01,-4.000000000000000000e+01,-7.100000000000000000e+01
+-6.300000000000000000e+01,9.500000000000000000e+01,5.300000000000000000e+01
+-9.000000000000000000e+00,1.020000000000000000e+02,-5.300000000000000000e+01
+4.700000000000000000e+01,5.900000000000000000e+01,-7.900000000000000000e+01
+-8.500000000000000000e+01,1.240000000000000000e+02,-7.900000000000000000e+01
+4.400000000000000000e+01,3.800000000000000000e+01,-1.250000000000000000e+02
+5.900000000000000000e+01,-2.900000000000000000e+01,-1.040000000000000000e+02
+-4.400000000000000000e+01,2.500000000000000000e+01,-8.000000000000000000e+00
+-9.500000000000000000e+01,-1.210000000000000000e+02,1.600000000000000000e+01
+1.170000000000000000e+02,1.300000000000000000e+01,4.700000000000000000e+01
+-1.500000000000000000e+01,-3.800000000000000000e+01,4.700000000000000000e+01
+-1.120000000000000000e+02,9.200000000000000000e+01,6.800000000000000000e+01
+-2.300000000000000000e+01,1.900000000000000000e+01,2.200000000000000000e+01
+1.040000000000000000e+02,-5.000000000000000000e+00,-1.170000000000000000e+02
+8.200000000000000000e+01,1.300000000000000000e+01,-6.900000000000000000e+01
+9.000000000000000000e+00,-9.900000000000000000e+01,-8.500000000000000000e+01
+3.800000000000000000e+01,-3.600000000000000000e+01,-8.000000000000000000e+00
+5.400000000000000000e+01,8.800000000000000000e+01,3.400000000000000000e+01
+-1.200000000000000000e+02,-1.230000000000000000e+02,1.700000000000000000e+01
+2.100000000000000000e+01,5.500000000000000000e+01,-1.150000000000000000e+02
+1.040000000000000000e+02,6.600000000000000000e+01,-1.100000000000000000e+01
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/depthwise_dilation/kernel.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/depthwise_dilation/kernel.txt
new file mode 100644
index 0000000..178f1ff
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/depthwise_dilation/kernel.txt
@@ -0,0 +1,37 @@
+# 4,3,3,3
+1.030000000000000000e+02,6.000000000000000000e+00,-8.200000000000000000e+01
+1.500000000000000000e+01,-5.400000000000000000e+01,9.000000000000000000e+00
+9.300000000000000000e+01,3.500000000000000000e+01,-1.000000000000000000e+02
+1.160000000000000000e+02,3.800000000000000000e+01,-7.800000000000000000e+01
+2.100000000000000000e+01,3.100000000000000000e+01,-4.100000000000000000e+01
+1.090000000000000000e+02,-4.600000000000000000e+01,-6.800000000000000000e+01
+-3.100000000000000000e+01,2.200000000000000000e+01,1.500000000000000000e+01
+-5.300000000000000000e+01,-2.000000000000000000e+00,7.000000000000000000e+00
+-6.100000000000000000e+01,-4.100000000000000000e+01,-8.600000000000000000e+01
+1.220000000000000000e+02,0.000000000000000000e+00,7.200000000000000000e+01
+-6.600000000000000000e+01,-1.280000000000000000e+02,-1.100000000000000000e+02
+-1.090000000000000000e+02,1.080000000000000000e+02,-5.600000000000000000e+01
+-2.100000000000000000e+01,2.700000000000000000e+01,-9.100000000000000000e+01
+4.200000000000000000e+01,-4.100000000000000000e+01,9.300000000000000000e+01
+-1.110000000000000000e+02,1.800000000000000000e+01,9.700000000000000000e+01
+1.140000000000000000e+02,-7.000000000000000000e+01,-1.800000000000000000e+01
+-2.300000000000000000e+01,-3.300000000000000000e+01,-3.700000000000000000e+01
+2.700000000000000000e+01,-3.300000000000000000e+01,-4.000000000000000000e+01
+1.010000000000000000e+02,-3.500000000000000000e+01,9.000000000000000000e+00
+4.000000000000000000e+00,1.200000000000000000e+02,-6.700000000000000000e+01
+3.900000000000000000e+01,-7.000000000000000000e+00,-7.000000000000000000e+00
+-3.600000000000000000e+01,3.500000000000000000e+01,-1.080000000000000000e+02
+1.100000000000000000e+02,5.000000000000000000e+00,-3.100000000000000000e+01
+-1.030000000000000000e+02,1.170000000000000000e+02,-3.800000000000000000e+01
+-1.270000000000000000e+02,2.700000000000000000e+01,4.700000000000000000e+01
+4.000000000000000000e+00,-8.100000000000000000e+01,-8.200000000000000000e+01
+-7.900000000000000000e+01,-1.000000000000000000e+01,5.100000000000000000e+01
+9.900000000000000000e+01,7.000000000000000000e+01,1.220000000000000000e+02
+-8.200000000000000000e+01,1.020000000000000000e+02,5.400000000000000000e+01
+5.700000000000000000e+01,-8.000000000000000000e+01,-4.000000000000000000e+01
+0.000000000000000000e+00,-7.900000000000000000e+01,8.000000000000000000e+00
+3.400000000000000000e+01,8.200000000000000000e+01,1.050000000000000000e+02
+9.800000000000000000e+01,9.900000000000000000e+01,1.010000000000000000e+02
+-1.230000000000000000e+02,-5.100000000000000000e+01,-7.900000000000000000e+01
+-8.400000000000000000e+01,-6.800000000000000000e+01,-2.200000000000000000e+01
+-4.100000000000000000e+01,-1.250000000000000000e+02,-1.700000000000000000e+01
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_2/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_2/config_data.h
index 45f8a3a..e575bbf 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_2/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_2/config_data.h
@@ -20,3 +20,5 @@
 #define DEPTHWISE_2_CH_MULT 3
 #define DEPTHWISE_2_INPUT_OFFSET 128
 #define DEPTHWISE_2_OUTPUT_OFFSET 11
+#define DEPTHWISE_2_DILATION_X 1
+#define DEPTHWISE_2_DILATION_Y 1
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/biases_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/biases_data.h
new file mode 100644
index 0000000..1e5e060
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/biases_data.h
@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int32_t depthwise_dilation_biases[9] = {12495, 6149, 15927, -28852, 11638, 36212, -25966, -518, -39439};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/config_data.h
new file mode 100644
index 0000000..5302717
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/config_data.h
@@ -0,0 +1,24 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#define DEPTHWISE_DILATION_OUT_CH 9
+#define DEPTHWISE_DILATION_IN_CH 3
+#define DEPTHWISE_DILATION_INPUT_W 6
+#define DEPTHWISE_DILATION_INPUT_H 5
+#define DEPTHWISE_DILATION_DST_SIZE 81
+#define DEPTHWISE_DILATION_INPUT_SIZE 90
+#define DEPTHWISE_DILATION_OUT_ACTIVATION_MIN -70
+#define DEPTHWISE_DILATION_OUT_ACTIVATION_MAX 127
+#define DEPTHWISE_DILATION_INPUT_BATCHES 1
+#define DEPTHWISE_DILATION_FILTER_X 3
+#define DEPTHWISE_DILATION_FILTER_Y 4
+#define DEPTHWISE_DILATION_STRIDE_X 2
+#define DEPTHWISE_DILATION_STRIDE_Y 2
+#define DEPTHWISE_DILATION_PAD_X 1
+#define DEPTHWISE_DILATION_PAD_Y 4
+#define DEPTHWISE_DILATION_OUTPUT_W 3
+#define DEPTHWISE_DILATION_OUTPUT_H 3
+#define DEPTHWISE_DILATION_CH_MULT 3
+#define DEPTHWISE_DILATION_INPUT_OFFSET 128
+#define DEPTHWISE_DILATION_OUTPUT_OFFSET -4
+#define DEPTHWISE_DILATION_DILATION_X 2
+#define DEPTHWISE_DILATION_DILATION_Y 3
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/input_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/input_data.h
new file mode 100644
index 0000000..4ccd143
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/input_data.h
@@ -0,0 +1,10 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q7_t depthwise_dilation_input[90] = {
+    72,  28,  -30, -110, 13,  -96, 34,   -95, 94,   95,   -98,  -91,  21,  10, -109, -69, 24,   105,
+    -17, 59,  -30, -114, -81, 0,   104,  -8,  -30,  -58,  -40,  -71,  -63, 95, 53,   -9,  102,  -53,
+    47,  59,  -79, -85,  124, -79, 44,   38,  -125, 59,   -29,  -104, -44, 25, -8,   -95, -121, 16,
+    117, 13,  47,  -15,  -38, 47,  -112, 92,  68,   -23,  19,   22,   104, -5, -117, 82,  13,   -69,
+    9,   -99, -85, 38,   -36, -8,  54,   88,  34,   -120, -123, 17,   21,  55, -115, 104, 66,   -11};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/output_mult_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/output_mult_data.h
new file mode 100644
index 0000000..1226536
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/output_mult_data.h
@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int32_t depthwise_dilation_output_mult[9] =
+    {1253719158, 1559745097, 1204360159, 1085898488, 1263590957, 1085898488, 1095770288, 1233975558, 1994104433};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/output_ref_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/output_ref_data.h
new file mode 100644
index 0000000..fb5f03a
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/output_ref_data.h
@@ -0,0 +1,9 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q7_t depthwise_dilation_output_ref[81] = {
+    -19, 8,  18,  0,   2,   14,  -38, 2,   -41, 3,   7,  -7,  -19, 44,  8,   -43, -3, -33, 30,  -6,  11,
+    -31, 23, 24,  -46, 14,  -46, 11,  2,   -9,  -21, 4,  29,  -70, 10,  -25, 11,  -6, 18,  -36, -16, 0,
+    -70, 27, -45, 9,   13,  -22, -4,  -13, 38,  -54, 18, -41, 16,  2,   2,   -31, 5,  29,  -46, -10, -33,
+    80,  -2, -13, -42, -24, 31,  -70, 6,   -66, 54,  11, -26, -33, -15, 21,  -23, 4,  -65};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/output_shift_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/output_shift_data.h
new file mode 100644
index 0000000..6352872
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/output_shift_data.h
@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int32_t depthwise_dilation_output_shift[9] = {-9, -10, -9, -9, -9, -9, -9, -9, -10};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/test_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/test_data.h
new file mode 100644
index 0000000..99f0472
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/test_data.h
@@ -0,0 +1,8 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/weights_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/weights_data.h
new file mode 100644
index 0000000..56de55d
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_dilation/weights_data.h
@@ -0,0 +1,11 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q7_t depthwise_dilation_weights[108] = {
+    103,  10,   -85, 17,  -54, 10,  106,  36,  -126, 116,  61,   -81,  24,  31,   -47,  125,  -47,  -86,
+    -31,  35,   16,  -61, -2,  8,   -70,  -42, -108, 122,  0,    75,   -76, -127, -127, -125, 110,  -70,
+    -21,  43,   -95, 48,  -41, 107, -127, 18,  122,  114,  -113, -19,  -27, -33,  -43,  31,   -34,  -50,
+    101,  -56,  9,   5,   119, -77, 45,   -7,  -9,   -36,  56,   -112, 127, 5,    -36,  -118, 119,  -48,
+    -127, 43,   49,  5,   -80, -95, -90,  -10, 64,   99,   113,  127,  -95, 101,  62,   65,   -81,  -50,
+    0,    -127, 8,   39,  81,  121, 112,  101, 127,  -123, -82,  -82,  -97, -67,  -25,  -47,  -127, -21};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_eq_in_out_ch/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_eq_in_out_ch/config_data.h
index f76cdd8..a70b1a8 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_eq_in_out_ch/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_eq_in_out_ch/config_data.h
@@ -20,3 +20,5 @@
 #define DEPTHWISE_EQ_IN_OUT_CH_CH_MULT 1
 #define DEPTHWISE_EQ_IN_OUT_CH_INPUT_OFFSET 128
 #define DEPTHWISE_EQ_IN_OUT_CH_OUTPUT_OFFSET -1
+#define DEPTHWISE_EQ_IN_OUT_CH_DILATION_X 1
+#define DEPTHWISE_EQ_IN_OUT_CH_DILATION_Y 1
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_kernel_3x3/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_kernel_3x3/config_data.h
index 74d4044..db79d41 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_kernel_3x3/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_kernel_3x3/config_data.h
@@ -20,3 +20,5 @@
 #define DEPTHWISE_KERNEL_3X3_CH_MULT 1
 #define DEPTHWISE_KERNEL_3X3_INPUT_OFFSET 128
 #define DEPTHWISE_KERNEL_3X3_OUTPUT_OFFSET 0
+#define DEPTHWISE_KERNEL_3X3_DILATION_X 1
+#define DEPTHWISE_KERNEL_3X3_DILATION_Y 1
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_mult_batches/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_mult_batches/config_data.h
index ed03049..701d2f7 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_mult_batches/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_mult_batches/config_data.h
@@ -20,3 +20,5 @@
 #define DEPTHWISE_MULT_BATCHES_CH_MULT 1
 #define DEPTHWISE_MULT_BATCHES_INPUT_OFFSET 128
 #define DEPTHWISE_MULT_BATCHES_OUTPUT_OFFSET 14
+#define DEPTHWISE_MULT_BATCHES_DILATION_X 1
+#define DEPTHWISE_MULT_BATCHES_DILATION_Y 1
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_null_bias_0/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_null_bias_0/config_data.h
index b93fe16..c4ed9fc 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_null_bias_0/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_null_bias_0/config_data.h
@@ -20,3 +20,5 @@
 #define DEPTHWISE_NULL_BIAS_0_CH_MULT 1
 #define DEPTHWISE_NULL_BIAS_0_INPUT_OFFSET 128
 #define DEPTHWISE_NULL_BIAS_0_OUTPUT_OFFSET -15
+#define DEPTHWISE_NULL_BIAS_0_DILATION_X 1
+#define DEPTHWISE_NULL_BIAS_0_DILATION_Y 1
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_null_bias_1/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_null_bias_1/config_data.h
index 15748d6..a033554 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_null_bias_1/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_null_bias_1/config_data.h
@@ -20,3 +20,5 @@
 #define DEPTHWISE_NULL_BIAS_1_CH_MULT 4
 #define DEPTHWISE_NULL_BIAS_1_INPUT_OFFSET 128
 #define DEPTHWISE_NULL_BIAS_1_OUTPUT_OFFSET 7
+#define DEPTHWISE_NULL_BIAS_1_DILATION_X 1
+#define DEPTHWISE_NULL_BIAS_1_DILATION_Y 1
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_out_activation/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_out_activation/config_data.h
index 78e6f1f..2164372 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_out_activation/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/depthwise_out_activation/config_data.h
@@ -20,3 +20,5 @@
 #define DEPTHWISE_OUT_ACTIVATION_CH_MULT 1
 #define DEPTHWISE_OUT_ACTIVATION_INPUT_OFFSET 128
 #define DEPTHWISE_OUT_ACTIVATION_OUTPUT_OFFSET 127
+#define DEPTHWISE_OUT_ACTIVATION_DILATION_X 1
+#define DEPTHWISE_OUT_ACTIVATION_DILATION_Y 1
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_3x3_s8/test_arm_depthwise_conv_3x3_s8.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_3x3_s8/test_arm_depthwise_conv_3x3_s8.c
index 7094fe1..c4a4378 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_3x3_s8/test_arm_depthwise_conv_3x3_s8.c
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_3x3_s8/test_arm_depthwise_conv_3x3_s8.c
@@ -56,6 +56,9 @@
     dw_conv_params.padding.h = DEPTHWISE_KERNEL_3X3_PAD_Y;
     dw_conv_params.stride.w = DEPTHWISE_KERNEL_3X3_STRIDE_X;
     dw_conv_params.stride.h = DEPTHWISE_KERNEL_3X3_STRIDE_Y;
+    dw_conv_params.dilation.w = DEPTHWISE_KERNEL_3X3_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_KERNEL_3X3_DILATION_Y;
+
     dw_conv_params.ch_mult = DEPTHWISE_KERNEL_3X3_CH_MULT;
 
     dw_conv_params.input_offset = DEPTHWISE_KERNEL_3X3_INPUT_OFFSET;
@@ -136,6 +139,9 @@
     dw_conv_params.padding.h = DEPTHWISE_KERNEL_3X3_PAD_Y;
     dw_conv_params.stride.w = DEPTHWISE_KERNEL_3X3_STRIDE_X;
     dw_conv_params.stride.h = DEPTHWISE_KERNEL_3X3_STRIDE_Y;
+    dw_conv_params.dilation.w = DEPTHWISE_KERNEL_3X3_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_KERNEL_3X3_DILATION_Y;
+
     dw_conv_params.ch_mult = DEPTHWISE_KERNEL_3X3_CH_MULT;
 
     dw_conv_params.input_offset = DEPTHWISE_KERNEL_3X3_INPUT_OFFSET;
@@ -216,6 +222,9 @@
     dw_conv_params.padding.h = DEPTHWISE_KERNEL_3X3_PAD_Y;
     dw_conv_params.stride.w = DEPTHWISE_KERNEL_3X3_STRIDE_X;
     dw_conv_params.stride.h = DEPTHWISE_KERNEL_3X3_STRIDE_Y;
+    dw_conv_params.dilation.w = DEPTHWISE_KERNEL_3X3_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_KERNEL_3X3_DILATION_Y;
+
     dw_conv_params.ch_mult = DEPTHWISE_KERNEL_3X3_CH_MULT;
 
     dw_conv_params.input_offset = DEPTHWISE_KERNEL_3X3_INPUT_OFFSET;
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/Unity/unity_test_arm_depthwise_conv_s8.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/Unity/unity_test_arm_depthwise_conv_s8.c
index eab9a58..d60e771 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/Unity/unity_test_arm_depthwise_conv_s8.c
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/Unity/unity_test_arm_depthwise_conv_s8.c
@@ -57,3 +57,5 @@
 void test_depthwise_null_bias_0_arm_depthwise_conv_s8(void) { depthwise_null_bias_0_arm_depthwise_conv_s8(); }
 
 void test_depthwise_null_bias_1_arm_depthwise_conv_s8(void) { depthwise_null_bias_1_arm_depthwise_conv_s8(); }
+
+void test_depthwise_dilation_arm_depthwise_conv_s8(void) { depthwise_dilation_arm_depthwise_conv_s8(); }
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/test_arm_depthwise_conv_s8.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/test_arm_depthwise_conv_s8.c
index 6b18b1e..35de2ae 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/test_arm_depthwise_conv_s8.c
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/test_arm_depthwise_conv_s8.c
@@ -21,6 +21,7 @@
 
 #include "../TestData/basic/test_data.h"
 #include "../TestData/depthwise_2/test_data.h"
+#include "../TestData/depthwise_dilation/test_data.h"
 #include "../TestData/depthwise_mult_batches/test_data.h"
 #include "../TestData/depthwise_null_bias_0/test_data.h"
 #include "../TestData/depthwise_null_bias_1/test_data.h"
@@ -72,6 +73,9 @@
     dw_conv_params.padding.h = BASIC_PAD_Y;
     dw_conv_params.stride.w = BASIC_STRIDE_X;
     dw_conv_params.stride.h = BASIC_STRIDE_Y;
+    dw_conv_params.dilation.w = BASIC_DILATION_X;
+    dw_conv_params.dilation.h = BASIC_DILATION_Y;
+
     dw_conv_params.ch_mult = 1;
 
     dw_conv_params.input_offset = BASIC_INPUT_OFFSET;
@@ -152,6 +156,9 @@
     dw_conv_params.padding.h = STRIDE2PAD1_PAD_Y;
     dw_conv_params.stride.w = STRIDE2PAD1_STRIDE_X;
     dw_conv_params.stride.h = STRIDE2PAD1_STRIDE_Y;
+    dw_conv_params.dilation.w = STRIDE2PAD1_DILATION_X;
+    dw_conv_params.dilation.h = STRIDE2PAD1_DILATION_Y;
+
     dw_conv_params.ch_mult = 1;
 
     dw_conv_params.input_offset = STRIDE2PAD1_INPUT_OFFSET;
@@ -232,6 +239,9 @@
     dw_conv_params.padding.h = DEPTHWISE_2_PAD_Y;
     dw_conv_params.stride.w = DEPTHWISE_2_STRIDE_X;
     dw_conv_params.stride.h = DEPTHWISE_2_STRIDE_Y;
+    dw_conv_params.dilation.w = DEPTHWISE_2_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_2_DILATION_Y;
+
     dw_conv_params.ch_mult = DEPTHWISE_2_CH_MULT;
 
     dw_conv_params.input_offset = DEPTHWISE_2_INPUT_OFFSET;
@@ -314,6 +324,8 @@
     dw_conv_params.stride.w = DEPTHWISE_OUT_ACTIVATION_STRIDE_X;
     dw_conv_params.stride.h = DEPTHWISE_OUT_ACTIVATION_STRIDE_Y;
     dw_conv_params.ch_mult = DEPTHWISE_OUT_ACTIVATION_CH_MULT;
+    dw_conv_params.dilation.w = DEPTHWISE_OUT_ACTIVATION_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_OUT_ACTIVATION_DILATION_Y;
 
     dw_conv_params.input_offset = DEPTHWISE_OUT_ACTIVATION_INPUT_OFFSET;
     dw_conv_params.output_offset = DEPTHWISE_OUT_ACTIVATION_OUTPUT_OFFSET;
@@ -392,6 +404,9 @@
     dw_conv_params.padding.h = DEPTHWISE_MULT_BATCHES_PAD_Y;
     dw_conv_params.stride.w = DEPTHWISE_MULT_BATCHES_STRIDE_X;
     dw_conv_params.stride.h = DEPTHWISE_MULT_BATCHES_STRIDE_Y;
+    dw_conv_params.dilation.w = DEPTHWISE_MULT_BATCHES_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_MULT_BATCHES_DILATION_Y;
+
     dw_conv_params.ch_mult = DEPTHWISE_MULT_BATCHES_CH_MULT;
 
     dw_conv_params.input_offset = DEPTHWISE_MULT_BATCHES_INPUT_OFFSET;
@@ -471,6 +486,9 @@
     dw_conv_params.padding.h = DEPTHWISE_NULL_BIAS_0_PAD_Y;
     dw_conv_params.stride.w = DEPTHWISE_NULL_BIAS_0_STRIDE_X;
     dw_conv_params.stride.h = DEPTHWISE_NULL_BIAS_0_STRIDE_Y;
+    dw_conv_params.dilation.w = DEPTHWISE_NULL_BIAS_0_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_NULL_BIAS_0_DILATION_Y;
+
     dw_conv_params.ch_mult = DEPTHWISE_NULL_BIAS_0_CH_MULT;
 
     dw_conv_params.input_offset = DEPTHWISE_NULL_BIAS_0_INPUT_OFFSET;
@@ -531,6 +549,9 @@
     dw_conv_params.padding.h = DEPTHWISE_NULL_BIAS_1_PAD_Y;
     dw_conv_params.stride.w = DEPTHWISE_NULL_BIAS_1_STRIDE_X;
     dw_conv_params.stride.h = DEPTHWISE_NULL_BIAS_1_STRIDE_Y;
+    dw_conv_params.dilation.w = DEPTHWISE_NULL_BIAS_1_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_NULL_BIAS_1_DILATION_Y;
+
     dw_conv_params.ch_mult = DEPTHWISE_NULL_BIAS_1_CH_MULT;
 
     dw_conv_params.input_offset = DEPTHWISE_NULL_BIAS_1_INPUT_OFFSET;
@@ -558,4 +579,84 @@
     free(ctx.buf);
     TEST_ASSERT_EQUAL(expected, result);
     TEST_ASSERT_TRUE(validate(output, depthwise_null_bias_1_output_ref, DEPTHWISE_NULL_BIAS_1_DST_SIZE));
-}
\ No newline at end of file
+}
+
+void depthwise_dilation_arm_depthwise_conv_s8(void)
+{
+    const arm_status expected = ARM_MATH_SUCCESS;
+    q7_t output[DEPTHWISE_DILATION_DST_SIZE] = {0};
+
+    cmsis_nn_context ctx;
+    cmsis_nn_dw_conv_params dw_conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const q31_t *bias_data = get_bias_address(depthwise_dilation_biases, DEPTHWISE_DILATION_OUT_CH);
+    const q7_t *kernel_data = depthwise_dilation_weights;
+    const q7_t *input_data = depthwise_dilation_input;
+
+    input_dims.n = DEPTHWISE_DILATION_INPUT_BATCHES;
+    input_dims.w = DEPTHWISE_DILATION_INPUT_W;
+    input_dims.h = DEPTHWISE_DILATION_INPUT_H;
+    input_dims.c = DEPTHWISE_DILATION_IN_CH;
+    filter_dims.w = DEPTHWISE_DILATION_FILTER_X;
+    filter_dims.h = DEPTHWISE_DILATION_FILTER_Y;
+    output_dims.w = DEPTHWISE_DILATION_OUTPUT_W;
+    output_dims.h = DEPTHWISE_DILATION_OUTPUT_H;
+    output_dims.c = DEPTHWISE_DILATION_OUT_CH;
+
+    dw_conv_params.padding.w = DEPTHWISE_DILATION_PAD_X;
+    dw_conv_params.padding.h = DEPTHWISE_DILATION_PAD_Y;
+    dw_conv_params.stride.w = DEPTHWISE_DILATION_STRIDE_X;
+    dw_conv_params.stride.h = DEPTHWISE_DILATION_STRIDE_Y;
+    dw_conv_params.dilation.w = DEPTHWISE_DILATION_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_DILATION_DILATION_Y;
+
+    dw_conv_params.ch_mult = DEPTHWISE_DILATION_CH_MULT;
+
+    dw_conv_params.input_offset = DEPTHWISE_DILATION_INPUT_OFFSET;
+    dw_conv_params.output_offset = DEPTHWISE_DILATION_OUTPUT_OFFSET;
+    dw_conv_params.activation.min = DEPTHWISE_DILATION_OUT_ACTIVATION_MIN;
+    dw_conv_params.activation.max = DEPTHWISE_DILATION_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)depthwise_dilation_output_mult;
+    quant_params.shift = (int32_t *)depthwise_dilation_output_shift;
+
+    ctx.buf = NULL;
+    ctx.size = 0;
+
+    arm_status result = arm_depthwise_conv_s8(&ctx,
+                                              &dw_conv_params,
+                                              &quant_params,
+                                              &input_dims,
+                                              input_data,
+                                              &filter_dims,
+                                              kernel_data,
+                                              &bias_dims,
+                                              bias_data,
+                                              &output_dims,
+                                              output);
+
+    free(ctx.buf);
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, depthwise_dilation_output_ref, DEPTHWISE_DILATION_DST_SIZE));
+
+    const int32_t buf_size =
+        arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+    TEST_ASSERT_EQUAL(0, buf_size);
+    result = arm_depthwise_conv_wrapper_s8(&ctx,
+                                           &dw_conv_params,
+                                           &quant_params,
+                                           &input_dims,
+                                           input_data,
+                                           &filter_dims,
+                                           kernel_data,
+                                           &bias_dims,
+                                           bias_data,
+                                           &output_dims,
+                                           output);
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, depthwise_dilation_output_ref, DEPTHWISE_DILATION_DST_SIZE));
+}
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8_opt/test_arm_depthwise_conv_s8_opt.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8_opt/test_arm_depthwise_conv_s8_opt.c
index 44f4b6e..b00a2b4 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8_opt/test_arm_depthwise_conv_s8_opt.c
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8_opt/test_arm_depthwise_conv_s8_opt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -59,6 +59,9 @@
     dw_conv_params.padding.h = BASIC_PAD_Y;
     dw_conv_params.stride.w = BASIC_STRIDE_X;
     dw_conv_params.stride.h = BASIC_STRIDE_Y;
+    dw_conv_params.dilation.w = BASIC_DILATION_X;
+    dw_conv_params.dilation.h = BASIC_DILATION_Y;
+
     dw_conv_params.ch_mult = 1;
 
     dw_conv_params.input_offset = BASIC_INPUT_OFFSET;
@@ -119,6 +122,9 @@
     dw_conv_params.padding.h = STRIDE2PAD1_PAD_Y;
     dw_conv_params.stride.w = STRIDE2PAD1_STRIDE_X;
     dw_conv_params.stride.h = STRIDE2PAD1_STRIDE_Y;
+    dw_conv_params.dilation.w = STRIDE2PAD1_DILATION_X;
+    dw_conv_params.dilation.h = STRIDE2PAD1_DILATION_Y;
+
     dw_conv_params.ch_mult = 1;
 
     dw_conv_params.input_offset = STRIDE2PAD1_INPUT_OFFSET;
@@ -179,6 +185,9 @@
     dw_conv_params.padding.h = DEPTHWISE_EQ_IN_OUT_CH_PAD_Y;
     dw_conv_params.stride.w = DEPTHWISE_EQ_IN_OUT_CH_STRIDE_X;
     dw_conv_params.stride.h = DEPTHWISE_EQ_IN_OUT_CH_STRIDE_Y;
+    dw_conv_params.dilation.w = DEPTHWISE_EQ_IN_OUT_CH_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_EQ_IN_OUT_CH_DILATION_Y;
+
     dw_conv_params.ch_mult = 1;
 
     dw_conv_params.input_offset = DEPTHWISE_EQ_IN_OUT_CH_INPUT_OFFSET;
diff --git a/CMSIS/NN/Tests/UnitTest/generate_test_data.py b/CMSIS/NN/Tests/UnitTest/generate_test_data.py
index 363ebd8..ea99e53 100755
--- a/CMSIS/NN/Tests/UnitTest/generate_test_data.py
+++ b/CMSIS/NN/Tests/UnitTest/generate_test_data.py
@@ -419,7 +419,8 @@
         super().__init__(dataset, testtype, args, in_ch, out_ch, x_in, y_in, w_x, w_y, stride_x, stride_y, pad,
                          randmin, randmax, batches, generate_bias=generate_bias, relu6=relu6,
                          out_activation_min=out_activation_min, out_activation_max=out_activation_max,
-                         int16xint8=int16xint8, bias_min=bias_min, bias_max=bias_max, dilation_x=dilation_x, dilation_y=dilation_y)
+                         int16xint8=int16xint8, bias_min=bias_min, bias_max=bias_max, dilation_x=dilation_x,
+                         dilation_y=dilation_y)
 
         self.scaling_factors = []
 
@@ -501,7 +502,8 @@
         if self.test_type == 'conv':
             conv_layer = tf.keras.layers.Conv2D(self.output_ch, kernel_size=(self.filter_y, self.filter_x),
                                                 strides=(self.stride_y, self.stride_x),
-                                                padding=self.padding, input_shape=input_shape[1:], dilation_rate=(self.dilation_y, self.dilation_x))
+                                                padding=self.padding, input_shape=input_shape[1:],
+                                                dilation_rate=(self.dilation_y, self.dilation_x))
             model.add(conv_layer)
             conv_layer.set_weights([weights, biases])
         elif self.test_type == 'depthwise_conv':
@@ -509,7 +511,7 @@
                 kernel_size=(self.filter_y, self.filter_x),
                 strides=(self.stride_y, self.stride_x),
                 padding=self.padding, depth_multiplier=self.channel_multiplier,
-                input_shape=input_shape[1:])
+                input_shape=input_shape[1:], dilation_rate=(self.dilation_y, self.dilation_x))
             model.add(depthwise_layer)
             depthwise_layer.set_weights([weights, biases])
 
@@ -1026,8 +1028,8 @@
                                               w_y=3, stride_x=1, stride_y=1, pad=True, out_activation_min=-61,
                                               out_activation_max=107)
     dataset = 'conv_dilation_golden'
-    ALL_TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=1, batches=2, out_ch=3, x_in=6, y_in=4, w_x=2,
-                                              w_y=2, stride_x=1, stride_y=1, pad=True, out_activation_min=-128,
+    ALL_TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=1, batches=2, out_ch=3, x_in=6, y_in=4,
+                                              w_x=2, w_y=2, stride_x=1, stride_y=1, pad=True, out_activation_min=-128,
                                               out_activation_max=127, dilation_x=3, dilation_y=2)
     dataset = 'conv_2x2_dilation'
     ALL_TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=2, out_ch=2, x_in=10, y_in=10, w_x=3,
@@ -1088,7 +1090,13 @@
     dataset = 'depthwise_null_bias_1'
     ALL_TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=2, out_ch=8, x_in=4, y_in=5, w_x=2,
                                               w_y=2, stride_x=1, stride_y=1, pad=True, generate_bias=False,
-                                              batches=1)                                                                                         
+                                              batches=1)
+    dataset = 'depthwise_dilation'
+    ALL_TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=3, out_ch=9, x_in=6, y_in=5, w_x=3,
+                                              w_y=4, stride_x=2, stride_y=2, pad=True,
+                                              out_activation_min=-70, out_activation_max=127, dilation_x=2,
+                                              dilation_y=3)
+
     type_of_test = 'fully_connected'
     dataset = 'fully_connected'
     ALL_TESTDATA_SETS[dataset] = FullyConnectedSettings(dataset, type_of_test, args, in_ch=10, out_ch=6, x_in=2, y_in=1,