CMSIS-DSP: Added f16 versions of the distance functions
diff --git a/ARM.CMSIS.pdsc b/ARM.CMSIS.pdsc
index f3c8434..2315ded 100644
--- a/ARM.CMSIS.pdsc
+++ b/ARM.CMSIS.pdsc
@@ -3252,6 +3252,7 @@
<file category="source" name="CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctionsF16.c"/>
<file category="source" name="CMSIS/DSP/Source/SupportFunctions/SupportFunctionsF16.c"/>
<file category="source" name="CMSIS/DSP/Source/FastMathFunctions/FastMathFunctionsF16.c"/>
+ <file category="source" name="CMSIS/DSP/Source/DistanceFunctions/DistanceFunctionsF16.c"/>
<!-- Compute Library for Cortex-A -->
<file category="header" name="CMSIS/DSP/ComputeLibrary/Include/NEMath.h" condition="ARMv7-A Device"/>
diff --git a/CMSIS/DSP/Include/arm_vec_math_f16.h b/CMSIS/DSP/Include/arm_vec_math_f16.h
index bd292c9..0c1f441 100755
--- a/CMSIS/DSP/Include/arm_vec_math_f16.h
+++ b/CMSIS/DSP/Include/arm_vec_math_f16.h
@@ -40,6 +40,39 @@
static const float16_t __logf_rng_f16=0.693147180f16;
+/* fast inverse approximation (3x newton) */
+__STATIC_INLINE f16x8_t vrecip_medprec_f16(
+ f16x8_t x)
+{
+ q15x8_t m;
+ f16x8_t b;
+ any16x8_t xinv;
+ f16x8_t ax = vabsq(x);
+
+ xinv.f = ax;
+
+ m = 0x03c00 - (xinv.i & 0x07c00);
+ xinv.i = xinv.i + m;
+ xinv.f = 1.41176471f16 - 0.47058824f16 * xinv.f;
+ xinv.i = xinv.i + m;
+
+ b = 2.0f16 - xinv.f * ax;
+ xinv.f = xinv.f * b;
+
+ b = 2.0f16 - xinv.f * ax;
+ xinv.f = xinv.f * b;
+
+ b = 2.0f16 - xinv.f * ax;
+ xinv.f = xinv.f * b;
+
+ xinv.f = vdupq_m(xinv.f, F16INFINITY, vcmpeqq(x, 0.0f));
+ /*
+ * restore sign
+ */
+ xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
+
+ return xinv.f;
+}
/* fast inverse approximation (4x newton) */
__STATIC_INLINE f16x8_t vrecip_hiprec_f16(
@@ -212,6 +245,12 @@
return (r);
}
+__STATIC_INLINE f16x8_t vpowq_f16(
+ f16x8_t val,
+ f16x8_t n)
+{
+ return vexpq_f16(vmulq_f16(n, vlogq_f16(val)));
+}
#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/
diff --git a/CMSIS/DSP/Include/dsp/distance_functions_f16.h b/CMSIS/DSP/Include/dsp/distance_functions_f16.h
index 7c05a18..d115308 100755
--- a/CMSIS/DSP/Include/dsp/distance_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/distance_functions_f16.h
@@ -26,12 +26,144 @@
#ifndef _DISTANCE_FUNCTIONS_F16_H_
#define _DISTANCE_FUNCTIONS_F16_H_
+#include "arm_math_types_f16.h"
+#include "arm_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/statistics_functions_f16.h"
+#include "dsp/basic_math_functions_f16.h"
+#include "dsp/fast_math_functions_f16.h"
+
#ifdef __cplusplus
extern "C"
{
#endif
#if defined(ARM_FLOAT16_SUPPORTED)
+
+/**
+ * @brief Euclidean distance between two vectors
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+
+float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief Bray-Curtis distance between two vectors
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief Canberra distance between two vectors
+ *
+ * This function may divide by zero when samples pA[i] and pB[i] are both zero.
+ * The result of the computation will be correct. So the division per zero may be
+ * ignored.
+ *
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+
+/**
+ * @brief Chebyshev distance between two vectors
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+
+/**
+ * @brief Cityblock (Manhattan) distance between two vectors
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief Correlation distance between two vectors
+ *
+ * The input vectors are modified in place !
+ *
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+float16_t arm_correlation_distance_f16(float16_t *pA,float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief Cosine distance between two vectors
+ *
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+
+float16_t arm_cosine_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief Jensen-Shannon distance between two vectors
+ *
+ * This function is assuming that elements of second vector are > 0
+ * and 0 only when the corresponding element of first vector is 0.
+ * Otherwise the result of the computation does not make sense
+ * and for speed reasons, the cases returning NaN or Infinity are not
+ * managed.
+ *
+ * When the function is computing x log (x / y) with x 0 and y 0,
+ * it will compute the right value (0) but a division per zero will occur
+ * and shoudl be ignored in client code.
+ *
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+
+float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB,uint32_t blockSize);
+
+/**
+ * @brief Minkowski distance between two vectors
+ *
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] n Norm order (>= 2)
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+
+
+
+float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize);
+
+
#endif /*defined(ARM_FLOAT16_SUPPORTED)*/
#ifdef __cplusplus
}
diff --git a/CMSIS/DSP/Source/DistanceFunctions/CMakeLists.txt b/CMSIS/DSP/Source/DistanceFunctions/CMakeLists.txt
index a965a91..7f6c389 100755
--- a/CMSIS/DSP/Source/DistanceFunctions/CMakeLists.txt
+++ b/CMSIS/DSP/Source/DistanceFunctions/CMakeLists.txt
@@ -7,7 +7,28 @@
file(GLOB SRC "./*_*.c")
-add_library(CMSISDSPDistance STATIC ${SRC})
+add_library(CMSISDSPDistance STATIC)
+
+target_sources(CMSISDSPDistance PRIVATE arm_boolean_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_braycurtis_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_canberra_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_chebyshev_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cityblock_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_correlation_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cosine_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_dice_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_euclidean_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_hamming_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_jaccard_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_jensenshannon_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_kulsinski_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_minkowski_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_rogerstanimoto_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_russellrao_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_sokalmichener_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_sokalsneath_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_yule_distance.c)
+
configLib(CMSISDSPDistance ${ROOT})
configDsp(CMSISDSPDistance ${ROOT})
@@ -16,5 +37,16 @@
target_include_directories(CMSISDSPDistance PUBLIC "${DSP}/Include")
target_include_directories(CMSISDSPDistance PRIVATE ".")
+if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
+target_sources(CMSISDSPDistance PRIVATE arm_braycurtis_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_canberra_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_chebyshev_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cityblock_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_correlation_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cosine_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_euclidean_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_jensenshannon_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_minkowski_distance_f16.c)
+endif()
-
+
\ No newline at end of file
diff --git a/CMSIS/DSP/Source/DistanceFunctions/DistanceFunctionsF16.c b/CMSIS/DSP/Source/DistanceFunctions/DistanceFunctionsF16.c
new file mode 100755
index 0000000..a0be2d4
--- /dev/null
+++ b/CMSIS/DSP/Source/DistanceFunctions/DistanceFunctionsF16.c
@@ -0,0 +1,36 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: DistanceFunctions.c
+ * Description: Combination of all distance function f16 source files.
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_braycurtis_distance_f16.c"
+#include "arm_canberra_distance_f16.c"
+#include "arm_chebyshev_distance_f16.c"
+#include "arm_cityblock_distance_f16.c"
+#include "arm_correlation_distance_f16.c"
+#include "arm_cosine_distance_f16.c"
+#include "arm_euclidean_distance_f16.c"
+#include "arm_jensenshannon_distance_f16.c"
+#include "arm_minkowski_distance_f16.c"
+
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
new file mode 100755
index 0000000..172dae9
--- /dev/null
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
@@ -0,0 +1,141 @@
+
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_braycurtis_distance_f16.c
+ * Description: Bray-Curtis distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+
+
+/**
+ @addtogroup FloatDist
+ @{
+ */
+
+
+/**
+ * @brief Bray-Curtis distance between two vectors
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+
+float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ float16_t accumDiff = 0.0f, accumSum = 0.0f;
+ uint32_t blkCnt;
+ f16x8_t a, b, c, accumDiffV, accumSumV;
+
+
+ accumDiffV = vdupq_n_f16(0.0f);
+ accumSumV = vdupq_n_f16(0.0f);
+
+ blkCnt = blockSize >> 3;
+ while (blkCnt > 0) {
+ a = vld1q(pA);
+ b = vld1q(pB);
+
+ c = vabdq(a, b);
+ accumDiffV = vaddq(accumDiffV, c);
+
+ c = vaddq_f16(a, b);
+ c = vabsq_f16(c);
+ accumSumV = vaddq(accumSumV, c);
+
+ pA += 8;
+ pB += 8;
+ blkCnt--;
+ }
+
+ blkCnt = blockSize & 7;
+ if (blkCnt > 0U) {
+ mve_pred16_t p0 = vctp16q(blkCnt);
+
+ a = vldrhq_z_f16(pA, p0);
+ b = vldrhq_z_f16(pB, p0);
+
+ c = vabdq(a, b);
+ accumDiffV = vaddq_m(accumDiffV, accumDiffV, c, p0);
+
+ c = vaddq_f16(a, b);
+ c = vabsq_f16(c);
+ accumSumV = vaddq_m(accumSumV, accumSumV, c, p0);
+ }
+
+ accumDiff = vecAddAcrossF16Mve(accumDiffV);
+ accumSum = vecAddAcrossF16Mve(accumSumV);
+
+ /*
+ It is assumed that accumSum is not zero. Since it is the sum of several absolute
+ values it would imply that all of them are zero. It is very unlikely for long vectors.
+ */
+ return (accumDiff / accumSum);
+}
+#else
+
+float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ float16_t accumDiff=0.0f, accumSum=0.0f, tmpA, tmpB;
+
+ while(blockSize > 0)
+ {
+ tmpA = *pA++;
+ tmpB = *pB++;
+ accumDiff += fabsf(tmpA - tmpB);
+ accumSum += fabsf(tmpA + tmpB);
+ blockSize --;
+ }
+ /*
+
+ It is assumed that accumSum is not zero. Since it is the sum of several absolute
+ values it would imply that all of them are zero. It is very unlikely for long vectors.
+
+ */
+ return(accumDiff / accumSum);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+/**
+ * @} end of groupDistance group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c
new file mode 100755
index 0000000..186ea03
--- /dev/null
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c
@@ -0,0 +1,159 @@
+
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_canberra_distance_f16.c
+ * Description: Canberra distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+ @addtogroup FloatDist
+ @{
+ */
+
+
+/**
+ * @brief Canberra distance between two vectors
+ *
+ * This function may divide by zero when samples pA[i] and pB[i] are both zero.
+ * The result of the computation will be correct. So the division per zero may be
+ * ignored.
+ *
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math_f16.h"
+
+float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ float16_t accum = 0.0f;
+ uint32_t blkCnt;
+ f16x8_t a, b, c, accumV;
+
+ accumV = vdupq_n_f16(0.0f);
+
+ blkCnt = blockSize >> 3;
+ while (blkCnt > 0) {
+ a = vld1q(pA);
+ b = vld1q(pB);
+
+ c = vabdq(a, b);
+
+ a = vabsq(a);
+ b = vabsq(b);
+ a = vaddq(a, b);
+
+ /*
+ * May divide by zero when a and b have both the same lane at zero.
+ */
+ a = vrecip_hiprec_f16(a);
+
+ /*
+ * Force result of a division by 0 to 0. It the behavior of the
+ * sklearn canberra function.
+ */
+ a = vdupq_m_n_f16(a, 0.0f, vcmpeqq(a, 0.0f));
+ c = vmulq(c, a);
+ accumV = vaddq(accumV, c);
+
+ pA += 8;
+ pB += 8;
+ blkCnt--;
+ }
+
+ blkCnt = blockSize & 7;
+ if (blkCnt > 0U) {
+ mve_pred16_t p0 = vctp16q(blkCnt);
+
+ a = vldrhq_z_f16(pA, p0);
+ b = vldrhq_z_f16(pB, p0);
+
+ c = vabdq(a, b);
+
+ a = vabsq(a);
+ b = vabsq(b);
+ a = vaddq(a, b);
+
+ /*
+ * May divide by zero when a and b have both the same lane at zero.
+ */
+ a = vrecip_hiprec_f16(a);
+
+ /*
+ * Force result of a division by 0 to 0. It the behavior of the
+ * sklearn canberra function.
+ */
+ a = vdupq_m_n_f16(a, 0.0f, vcmpeqq(a, 0.0f));
+ c = vmulq(c, a);
+ accumV = vaddq_m(accumV, accumV, c, p0);
+ }
+
+ accum = vecAddAcrossF16Mve(accumV);
+
+ return (accum);
+}
+
+
+#else
+float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ float16_t accum=0.0f, tmpA, tmpB,diff,sum;
+
+ while(blockSize > 0)
+ {
+ tmpA = *pA++;
+ tmpB = *pB++;
+
+ diff = fabsf(tmpA - tmpB);
+ sum = fabsf(tmpA) + fabsf(tmpB);
+ if ((tmpA != 0.0f) || (tmpB != 0.0f))
+ {
+ accum += (diff / sum);
+ }
+ blockSize --;
+ }
+ return(accum);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
new file mode 100755
index 0000000..f6ab96c
--- /dev/null
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
@@ -0,0 +1,135 @@
+
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_chebyshev_distance_f16.c
+ * Description: Chebyshev distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+ @addtogroup FloatDist
+ @{
+ */
+
+
+/**
+ * @brief Chebyshev distance between two vectors
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+
+float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ uint32_t blkCnt; /* loop counters */
+ f16x8_t vecA, vecB;
+ f16x8_t vecDiff = vdupq_n_f16(0.0);
+ float16_t maxValue = 0.0;
+
+
+ blkCnt = blockSize >> 3;
+ while (blkCnt > 0U) {
+ vecA = vld1q(pA);
+ pA += 8;
+ vecB = vld1q(pB);
+ pB += 8;
+ /*
+ * update per-lane max.
+ */
+ vecDiff = vmaxnmaq(vsubq(vecA, vecB), vecDiff);
+ /*
+ * Decrement the blockSize loop counter
+ */
+ blkCnt--;
+ }
+ /*
+ * tail
+ * (will be merged thru tail predication)
+ */
+ blkCnt = blockSize & 7;
+ if (blkCnt > 0U) {
+ mve_pred16_t p0 = vctp16q(blkCnt);
+
+ vecA = vldrhq_z_f16(pA, p0);
+ vecB = vldrhq_z_f16(pB, p0);
+
+ /*
+ * Get current max per lane and current index per lane
+ * when a max is selected
+ */
+ vecDiff = vmaxnmaq_m(vecDiff, vsubq(vecA, vecB), p0);
+ }
+ /*
+ * Get max value across the vector
+ */
+ return vmaxnmavq(maxValue, vecDiff);
+}
+
+#else
+float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ float16_t diff=0.0f, maxVal,tmpA, tmpB;
+
+ tmpA = *pA++;
+ tmpB = *pB++;
+ diff = fabsf(tmpA - tmpB);
+ maxVal = diff;
+ blockSize--;
+
+ while(blockSize > 0)
+ {
+ tmpA = *pA++;
+ tmpB = *pB++;
+ diff = fabsf(tmpA - tmpB);
+ if (diff > maxVal)
+ {
+ maxVal = diff;
+ }
+ blockSize --;
+ }
+
+ return(maxVal);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c
new file mode 100755
index 0000000..e9810b7
--- /dev/null
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c
@@ -0,0 +1,116 @@
+
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cityblock_distance_f16.c
+ * Description: Cityblock (Manhattan) distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+/**
+ @addtogroup FloatDist
+ @{
+ */
+
+
+/**
+ * @brief Cityblock (Manhattan) distance between two vectors
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+
+float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ uint32_t blkCnt;
+ f16x8_t a, b, accumV, tempV;
+
+ accumV = vdupq_n_f16(0.0f);
+
+ blkCnt = blockSize >> 3;
+ while (blkCnt > 0U) {
+ a = vld1q(pA);
+ b = vld1q(pB);
+
+ tempV = vabdq(a, b);
+ accumV = vaddq(accumV, tempV);
+
+ pA += 8;
+ pB += 8;
+ blkCnt--;
+ }
+
+ /*
+ * tail
+ * (will be merged thru tail predication)
+ */
+ blkCnt = blockSize & 7;
+ if (blkCnt > 0U) {
+ mve_pred16_t p0 = vctp16q(blkCnt);
+
+ a = vldrhq_z_f16(pA, p0);
+ b = vldrhq_z_f16(pB, p0);
+
+ tempV = vabdq(a, b);
+ accumV = vaddq_m(accumV, accumV, tempV, p0);
+ }
+
+ return vecAddAcrossF16Mve(accumV);
+}
+
+#else
+float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ float16_t accum,tmpA, tmpB;
+
+ accum = 0.0f;
+ while(blockSize > 0)
+ {
+ tmpA = *pA++;
+ tmpB = *pB++;
+ accum += fabsf(tmpA - tmpB);
+
+ blockSize --;
+ }
+
+ return(accum);
+}
+#endif
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c
new file mode 100755
index 0000000..e3b3a78
--- /dev/null
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c
@@ -0,0 +1,88 @@
+
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_correlation_distance_f16.c
+ * Description: Correlation distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+
+/**
+ @addtogroup FloatDist
+ @{
+ */
+
+
+/**
+ * @brief Correlation distance between two vectors
+ *
+ * The input vectors are modified in place !
+ *
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+
+float16_t arm_correlation_distance_f16(float16_t *pA,float16_t *pB, uint32_t blockSize)
+{
+ float16_t ma,mb,pwra,pwrb,dot,tmp;
+
+ arm_mean_f16(pA, blockSize, &ma);
+ arm_mean_f16(pB, blockSize, &mb);
+
+ arm_offset_f16(pA, -ma, pA, blockSize);
+ arm_offset_f16(pB, -mb, pB, blockSize);
+
+ arm_power_f16(pA, blockSize, &pwra);
+ arm_power_f16(pB, blockSize, &pwrb);
+
+ arm_dot_prod_f16(pA,pB,blockSize,&dot);
+
+ dot = dot / blockSize;
+ pwra = pwra / blockSize;
+ pwrb = pwrb / blockSize;
+
+ arm_sqrt_f16(pwra * pwrb,&tmp);
+
+ return(1.0f - dot / tmp);
+
+
+}
+
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c
new file mode 100755
index 0000000..207fa97
--- /dev/null
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c
@@ -0,0 +1,74 @@
+
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cosine_distance_f16.c
+ * Description: Cosine distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+ @addtogroup FloatDist
+ @{
+ */
+
+
+
+/**
+ * @brief Cosine distance between two vectors
+ *
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+
+float16_t arm_cosine_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ float16_t pwra,pwrb,dot,tmp;
+
+ arm_power_f16(pA, blockSize, &pwra);
+ arm_power_f16(pB, blockSize, &pwrb);
+
+ arm_dot_prod_f16(pA,pB,blockSize,&dot);
+
+ arm_sqrt_f16(pwra * pwrb, &tmp);
+ return(1.0f - dot / tmp);
+
+}
+
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f16.c
new file mode 100755
index 0000000..97f41aa
--- /dev/null
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f16.c
@@ -0,0 +1,118 @@
+
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_euclidean_distance_f16.c
+ * Description: Euclidean distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+
+/**
+ @addtogroup FloatDist
+ @{
+ */
+
+
+/**
+ * @brief Euclidean distance between two vectors
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ uint32_t blkCnt;
+ float16_t tmp;
+ f16x8_t a, b, accumV, tempV;
+
+ accumV = vdupq_n_f16(0.0f);
+
+ blkCnt = blockSize >> 3;
+ while (blkCnt > 0U) {
+ a = vld1q(pA);
+ b = vld1q(pB);
+
+ tempV = vsubq(a, b);
+ accumV = vfmaq(accumV, tempV, tempV);
+
+ pA += 8;
+ pB += 8;
+ blkCnt--;
+ }
+
+ /*
+ * tail
+ * (will be merged thru tail predication)
+ */
+ blkCnt = blockSize & 7;
+ if (blkCnt > 0U) {
+ mve_pred16_t p0 = vctp16q(blkCnt);
+
+ a = vldrhq_z_f16(pA, p0);
+ b = vldrhq_z_f16(pB, p0);
+
+ tempV = vsubq(a, b);
+ accumV = vfmaq_m(accumV, tempV, tempV, p0);
+ }
+
+ arm_sqrt_f16(vecAddAcrossF16Mve(accumV), &tmp);
+ return (tmp);
+}
+
+#else
+float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ float16_t accum=0.0f,tmp;
+
+ while(blockSize > 0)
+ {
+ tmp = *pA++ - *pB++;
+ accum += SQ(tmp);
+ blockSize --;
+ }
+ arm_sqrt_f16(accum,&tmp);
+ return(tmp);
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
new file mode 100755
index 0000000..83a499c
--- /dev/null
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
@@ -0,0 +1,164 @@
+
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_jensenshannon_distance_f16.c
+ * Description: Jensen-Shannon distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+ @addtogroup FloatDist
+ @{
+ */
+
+#if !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE)
+/// @private
+__STATIC_INLINE float16_t rel_entr(float16_t x, float16_t y)
+{
+ return (x * logf(x / y));
+}
+#endif
+
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math_f16.h"
+
+float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ uint32_t blkCnt;
+ float16_t tmp;
+ f16x8_t a, b, t, tmpV, accumV;
+
+ accumV = vdupq_n_f16(0.0f);
+
+ blkCnt = blockSize >> 3;
+ while (blkCnt > 0U) {
+ a = vld1q(pA);
+ b = vld1q(pB);
+
+ t = vaddq(a, b);
+ t = vmulq(t, 0.5f);
+
+ tmpV = vmulq(a, vrecip_medprec_f16(t));
+ tmpV = vlogq_f16(tmpV);
+ accumV = vfmaq(accumV, a, tmpV);
+
+ tmpV = vmulq_f16(b, vrecip_medprec_f16(t));
+ tmpV = vlogq_f16(tmpV);
+ accumV = vfmaq(accumV, b, tmpV);
+
+ pA += 8;
+ pB += 8;
+ blkCnt--;
+ }
+
+ /*
+ * tail
+ * (will be merged thru tail predication)
+ */
+ blkCnt = blockSize & 7;
+ if (blkCnt > 0U) {
+ mve_pred16_t p0 = vctp16q(blkCnt);
+
+ a = vldrhq_z_f16(pA, p0);
+ b = vldrhq_z_f16(pB, p0);
+
+ t = vaddq(a, b);
+ t = vmulq(t, 0.5f);
+
+ tmpV = vmulq_f16(a, vrecip_medprec_f16(t));
+ tmpV = vlogq_f16(tmpV);
+ accumV = vfmaq_m_f16(accumV, a, tmpV, p0);
+
+ tmpV = vmulq_f16(b, vrecip_medprec_f16(t));
+ tmpV = vlogq_f16(tmpV);
+ accumV = vfmaq_m_f16(accumV, b, tmpV, p0);
+
+ }
+
+ arm_sqrt_f16(vecAddAcrossF16Mve(accumV) / 2.0f, &tmp);
+ return (tmp);
+}
+
+#else
+
+
+/**
+ * @brief Jensen-Shannon distance between two vectors
+ *
+ * This function is assuming that elements of second vector are > 0
+ * and 0 only when the corresponding element of first vector is 0.
+ * Otherwise the result of the computation does not make sense
+ * and for speed reasons, the cases returning NaN or Infinity are not
+ * managed.
+ *
+ * When the function is computing x log (x / y) with x == 0 and y == 0,
+ * it will compute the right result (0) but a division by zero will occur
+ * and should be ignored in client code.
+ *
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] blockSize vector length
+ * @return distance
+ *
+ */
+
+
+float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+ float16_t left, right,sum, result, tmp;
+ uint32_t i;
+
+ left = 0.0f;
+ right = 0.0f;
+ for(i=0; i < blockSize; i++)
+ {
+ tmp = (pA[i] + pB[i]) / 2.0f;
+ left += rel_entr(pA[i], tmp);
+ right += rel_entr(pB[i], tmp);
+ }
+
+
+ sum = left + right;
+ arm_sqrt_f16(sum/2.0f, &result);
+ return(result);
+
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c
new file mode 100755
index 0000000..35cf9f6
--- /dev/null
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c
@@ -0,0 +1,127 @@
+
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_minkowski_distance_f16.c
+ * Description: Minkowski distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+ @addtogroup FloatDist
+ @{
+ */
+
+
+/**
+ * @brief Minkowski distance between two vectors
+ *
+ * @param[in] pA First vector
+ * @param[in] pB Second vector
+ * @param[in] order Distance order
+ * @param[in] blockSize Number of samples
+ * @return distance
+ *
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math_f16.h"
+
+float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize)
+{
+ uint32_t blkCnt;
+ f16x8_t a, b, tmpV, accumV, sumV;
+
+ sumV = vdupq_n_f16(0.0f);
+ accumV = vdupq_n_f16(0.0f);
+
+ blkCnt = blockSize >> 3;
+ while (blkCnt > 0U) {
+ a = vld1q(pA);
+ b = vld1q(pB);
+
+ tmpV = vabdq(a, b);
+ tmpV = vpowq_f16(tmpV, vdupq_n_f16(order));
+ sumV = vaddq(sumV, tmpV);
+
+ pA += 8;
+ pB += 8;
+ blkCnt--;
+ }
+
+ /*
+ * tail
+ * (will be merged thru tail predication)
+ */
+ blkCnt = blockSize & 7;
+ if (blkCnt > 0U) {
+ mve_pred16_t p0 = vctp16q(blkCnt);
+
+ a = vldrhq_z_f16(pA, p0);
+ b = vldrhq_z_f16(pB, p0);
+
+ tmpV = vabdq(a, b);
+ tmpV = vpowq_f16(tmpV, vdupq_n_f16(order));
+ sumV = vaddq_m(sumV, sumV, tmpV, p0);
+ }
+
+ return (powf(vecAddAcrossF16Mve(sumV), (1.0f / (float16_t) order)));
+}
+
+
+#else
+
+
+float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize)
+{
+ float16_t sum;
+ uint32_t i;
+
+ sum = 0.0f;
+ for(i=0; i < blockSize; i++)
+ {
+ sum += powf(fabsf(pA[i] - pB[i]),order);
+ }
+
+
+ return(powf(sum,(1.0f/order)));
+
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
diff --git a/CMSIS/DSP/Testing/CMakeLists.txt b/CMSIS/DSP/Testing/CMakeLists.txt
index e6f4670..c9378ae 100644
--- a/CMSIS/DSP/Testing/CMakeLists.txt
+++ b/CMSIS/DSP/Testing/CMakeLists.txt
@@ -340,6 +340,7 @@
Source/Tests/SupportTestsF16.cpp
Source/Tests/SupportBarTestsF16.cpp
Source/Tests/FastMathF16.cpp
+ Source/Tests/DistanceTestsF16.cpp
)
endif()
endif()
diff --git a/CMSIS/DSP/Testing/Include/Tests/DistanceTestsF16.h b/CMSIS/DSP/Testing/Include/Tests/DistanceTestsF16.h
new file mode 100755
index 0000000..7ba9ded
--- /dev/null
+++ b/CMSIS/DSP/Testing/Include/Tests/DistanceTestsF16.h
@@ -0,0 +1,30 @@
+#include "Test.h"
+#include "Pattern.h"
+
+#include "dsp/distance_functions_f16.h"
+
+class DistanceTestsF16:public Client::Suite
+ {
+ public:
+ DistanceTestsF16(Testing::testID_t id);
+ virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr);
+ virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
+ private:
+ #include "DistanceTestsF16_decl.h"
+
+ Client::Pattern<float16_t> inputA;
+ Client::Pattern<float16_t> inputB;
+ Client::Pattern<int16_t> dims;
+
+ Client::LocalPattern<float16_t> output;
+ Client::LocalPattern<float16_t> tmpA;
+ Client::LocalPattern<float16_t> tmpB;
+
+ // Reference patterns are not loaded when we are in dump mode
+ Client::RefPattern<float16_t> ref;
+
+ int vecDim;
+ int nbPatterns;
+
+
+ };
diff --git a/CMSIS/DSP/Testing/Source/Tests/DistanceTestsF16.cpp b/CMSIS/DSP/Testing/Source/Tests/DistanceTestsF16.cpp
new file mode 100755
index 0000000..12ec71e
--- /dev/null
+++ b/CMSIS/DSP/Testing/Source/Tests/DistanceTestsF16.cpp
@@ -0,0 +1,303 @@
+#include "DistanceTestsF16.h"
+#include <stdio.h>
+#include "Error.h"
+#include "Test.h"
+
+#define REL_ERROR (2e-3)
+
+#define REL_JS_ERROR (3e-2)
+
+#define REL_MK_ERROR (1e-2)
+
+
+ void DistanceTestsF16::test_braycurtis_distance_f16()
+ {
+ const float16_t *inpA = inputA.ptr();
+ const float16_t *inpB = inputB.ptr();
+
+ float16_t *outp = output.ptr();
+
+ for(int i=0; i < this->nbPatterns ; i ++)
+ {
+ *outp = arm_braycurtis_distance_f16(inpA, inpB, this->vecDim);
+
+ inpA += this->vecDim;
+ inpB += this->vecDim;
+ outp ++;
+ }
+
+ ASSERT_REL_ERROR(output,ref,REL_ERROR);
+ }
+
+ void DistanceTestsF16::test_canberra_distance_f16()
+ {
+ const float16_t *inpA = inputA.ptr();
+ const float16_t *inpB = inputB.ptr();
+
+ float16_t *outp = output.ptr();
+
+ for(int i=0; i < this->nbPatterns ; i ++)
+ {
+ *outp = arm_canberra_distance_f16(inpA, inpB, this->vecDim);
+
+ inpA += this->vecDim;
+ inpB += this->vecDim;
+ outp ++;
+ }
+
+ ASSERT_REL_ERROR(output,ref,REL_ERROR);
+ }
+
+ void DistanceTestsF16::test_chebyshev_distance_f16()
+ {
+ const float16_t *inpA = inputA.ptr();
+ const float16_t *inpB = inputB.ptr();
+
+ float16_t *outp = output.ptr();
+
+ for(int i=0; i < this->nbPatterns ; i ++)
+ {
+ *outp = arm_chebyshev_distance_f16(inpA, inpB, this->vecDim);
+
+ inpA += this->vecDim;
+ inpB += this->vecDim;
+ outp ++;
+ }
+
+ ASSERT_REL_ERROR(output,ref,REL_ERROR);
+ }
+
+ void DistanceTestsF16::test_cityblock_distance_f16()
+ {
+ const float16_t *inpA = inputA.ptr();
+ const float16_t *inpB = inputB.ptr();
+
+ float16_t *outp = output.ptr();
+
+ for(int i=0; i < this->nbPatterns ; i ++)
+ {
+ *outp = arm_cityblock_distance_f16(inpA, inpB, this->vecDim);
+
+ inpA += this->vecDim;
+ inpB += this->vecDim;
+ outp ++;
+ }
+
+ ASSERT_REL_ERROR(output,ref,REL_ERROR);
+ }
+
+ void DistanceTestsF16::test_correlation_distance_f16()
+ {
+ const float16_t *inpA = inputA.ptr();
+ const float16_t *inpB = inputB.ptr();
+
+ float16_t *tmpap = tmpA.ptr();
+ float16_t *tmpbp = tmpB.ptr();
+
+ float16_t *outp = output.ptr();
+
+ for(int i=0; i < this->nbPatterns ; i ++)
+ {
+ memcpy(tmpap, inpA, sizeof(float16_t) * this->vecDim);
+ memcpy(tmpbp, inpB, sizeof(float16_t) * this->vecDim);
+
+ *outp = arm_correlation_distance_f16(tmpap, tmpbp, this->vecDim);
+
+ inpA += this->vecDim;
+ inpB += this->vecDim;
+ outp ++;
+ }
+
+ ASSERT_REL_ERROR(output,ref,REL_ERROR);
+ }
+
+ void DistanceTestsF16::test_cosine_distance_f16()
+ {
+ const float16_t *inpA = inputA.ptr();
+ const float16_t *inpB = inputB.ptr();
+
+ float16_t *outp = output.ptr();
+
+ for(int i=0; i < this->nbPatterns ; i ++)
+ {
+ *outp = arm_cosine_distance_f16(inpA, inpB, this->vecDim);
+
+ inpA += this->vecDim;
+ inpB += this->vecDim;
+ outp ++;
+ }
+
+ ASSERT_REL_ERROR(output,ref,REL_ERROR);
+ }
+
+ void DistanceTestsF16::test_euclidean_distance_f16()
+ {
+ const float16_t *inpA = inputA.ptr();
+ const float16_t *inpB = inputB.ptr();
+
+ float16_t *outp = output.ptr();
+
+ for(int i=0; i < this->nbPatterns ; i ++)
+ {
+ *outp = arm_euclidean_distance_f16(inpA, inpB, this->vecDim);
+
+ inpA += this->vecDim;
+ inpB += this->vecDim;
+ outp ++;
+ }
+
+ ASSERT_REL_ERROR(output,ref,REL_ERROR);
+ }
+
+ void DistanceTestsF16::test_jensenshannon_distance_f16()
+ {
+ const float16_t *inpA = inputA.ptr();
+ const float16_t *inpB = inputB.ptr();
+
+ float16_t *outp = output.ptr();
+
+
+
+ for(int i=0; i < this->nbPatterns ; i ++)
+ {
+ *outp = arm_jensenshannon_distance_f16(inpA, inpB, this->vecDim);
+
+ inpA += this->vecDim;
+ inpB += this->vecDim;
+ outp ++;
+ }
+
+ ASSERT_REL_ERROR(output,ref,REL_JS_ERROR);
+ }
+
+ void DistanceTestsF16::test_minkowski_distance_f16()
+ {
+ const float16_t *inpA = inputA.ptr();
+ const float16_t *inpB = inputB.ptr();
+ const int16_t *dimsp= dims.ptr();
+ dimsp += 2;
+
+ float16_t *outp = output.ptr();
+
+ for(int i=0; i < this->nbPatterns ; i ++)
+ {
+ *outp = arm_minkowski_distance_f16(inpA, inpB, *dimsp,this->vecDim);
+
+ inpA += this->vecDim;
+ inpB += this->vecDim;
+ outp ++;
+ dimsp ++;
+ }
+
+ ASSERT_REL_ERROR(output,ref,REL_MK_ERROR);
+ }
+
+
+ void DistanceTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
+ {
+
+ (void)paramsArgs;
+ if ((id != DistanceTestsF16::TEST_MINKOWSKI_DISTANCE_F16_9) && (id != DistanceTestsF16::TEST_JENSENSHANNON_DISTANCE_F16_8))
+ {
+ inputA.reload(DistanceTestsF16::INPUTA_F16_ID,mgr);
+ inputB.reload(DistanceTestsF16::INPUTB_F16_ID,mgr);
+ dims.reload(DistanceTestsF16::DIMS_S16_ID,mgr);
+
+ const int16_t *dimsp = dims.ptr();
+
+ this->nbPatterns=dimsp[0];
+ this->vecDim=dimsp[1];
+ output.create(this->nbPatterns,DistanceTestsF16::OUT_F16_ID,mgr);
+ }
+
+ switch(id)
+ {
+ case DistanceTestsF16::TEST_BRAYCURTIS_DISTANCE_F16_1:
+ {
+ ref.reload(DistanceTestsF16::REF1_F16_ID,mgr);
+ }
+ break;
+
+ case DistanceTestsF16::TEST_CANBERRA_DISTANCE_F16_2:
+ {
+ ref.reload(DistanceTestsF16::REF2_F16_ID,mgr);
+ }
+ break;
+
+ case DistanceTestsF16::TEST_CHEBYSHEV_DISTANCE_F16_3:
+ {
+ ref.reload(DistanceTestsF16::REF3_F16_ID,mgr);
+ }
+ break;
+
+ case DistanceTestsF16::TEST_CITYBLOCK_DISTANCE_F16_4:
+ {
+ ref.reload(DistanceTestsF16::REF4_F16_ID,mgr);
+ }
+ break;
+
+ case DistanceTestsF16::TEST_CORRELATION_DISTANCE_F16_5:
+ {
+ ref.reload(DistanceTestsF16::REF5_F16_ID,mgr);
+ tmpA.create(this->vecDim,DistanceTestsF16::TMPA_F16_ID,mgr);
+ tmpB.create(this->vecDim,DistanceTestsF16::TMPB_F16_ID,mgr);
+ }
+ break;
+
+ case DistanceTestsF16::TEST_COSINE_DISTANCE_F16_6:
+ {
+ ref.reload(DistanceTestsF16::REF6_F16_ID,mgr);
+ }
+ break;
+
+ case DistanceTestsF16::TEST_EUCLIDEAN_DISTANCE_F16_7:
+ {
+ ref.reload(DistanceTestsF16::REF7_F16_ID,mgr);
+ }
+ break;
+
+ case DistanceTestsF16::TEST_JENSENSHANNON_DISTANCE_F16_8:
+ {
+ inputA.reload(DistanceTestsF16::INPUTA_JEN_F16_ID,mgr);
+ inputB.reload(DistanceTestsF16::INPUTB_JEN_F16_ID,mgr);
+ dims.reload(DistanceTestsF16::DIMS_S16_ID,mgr);
+
+ const int16_t *dimsp = dims.ptr();
+
+ this->nbPatterns=dimsp[0];
+ this->vecDim=dimsp[1];
+ output.create(this->nbPatterns,DistanceTestsF16::OUT_F16_ID,mgr);
+
+ ref.reload(DistanceTestsF16::REF8_F16_ID,mgr);
+ }
+ break;
+
+ case DistanceTestsF16::TEST_MINKOWSKI_DISTANCE_F16_9:
+ {
+ inputA.reload(DistanceTestsF16::INPUTA_F16_ID,mgr);
+ inputB.reload(DistanceTestsF16::INPUTB_F16_ID,mgr);
+ dims.reload(DistanceTestsF16::DIMS_MINKOWSKI_S16_ID,mgr);
+
+ const int16_t *dimsp = dims.ptr();
+
+ this->nbPatterns=dimsp[0];
+ this->vecDim=dimsp[1];
+ output.create(this->nbPatterns,DistanceTestsF16::OUT_F16_ID,mgr);
+
+ ref.reload(DistanceTestsF16::REF9_F16_ID,mgr);
+ }
+ break;
+
+ }
+
+
+
+
+
+ }
+
+ void DistanceTestsF16::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
+ {
+ (void)id;
+ output.dump(mgr);
+ }
diff --git a/CMSIS/DSP/Testing/desc_f16.txt b/CMSIS/DSP/Testing/desc_f16.txt
index 21635bf..7ed73db 100755
--- a/CMSIS/DSP/Testing/desc_f16.txt
+++ b/CMSIS/DSP/Testing/desc_f16.txt
@@ -382,6 +382,51 @@
}
}
+ group Distance Tests {
+ class = DistanceTests
+ folder = Distance
+
+ suite Distance Tests F16 {
+ class = DistanceTestsF16
+ folder = DistanceF16
+
+ Pattern DIMS_S16_ID : Dims1_s16.txt
+ Pattern DIMS_MINKOWSKI_S16_ID : Dims9_s16.txt
+
+ Pattern INPUTA_F16_ID : InputA1_f16.txt
+ Pattern INPUTB_F16_ID : InputB1_f16.txt
+
+ Pattern INPUTA_JEN_F16_ID : InputA8_f16.txt
+ Pattern INPUTB_JEN_F16_ID : InputB8_f16.txt
+
+ Pattern REF1_F16_ID : Ref1_f16.txt
+ Pattern REF2_F16_ID : Ref2_f16.txt
+ Pattern REF3_F16_ID : Ref3_f16.txt
+ Pattern REF4_F16_ID : Ref4_f16.txt
+ Pattern REF5_F16_ID : Ref5_f16.txt
+ Pattern REF6_F16_ID : Ref6_f16.txt
+ Pattern REF7_F16_ID : Ref7_f16.txt
+ Pattern REF8_F16_ID : Ref8_f16.txt
+ Pattern REF9_F16_ID : Ref9_f16.txt
+
+ Output OUT_F16_ID : Output
+ Output TMPA_F16_ID : TmpA
+ Output TMPB_F16_ID : TmpB
+
+ Functions {
+ arm_braycurtis_distance_f16:test_braycurtis_distance_f16
+ arm_canberra_distance_f16:test_canberra_distance_f16
+ arm_chebyshev_distance_f16:test_chebyshev_distance_f16
+ arm_cityblock_distance_f16:test_cityblock_distance_f16
+ arm_correlation_distance_f16:test_correlation_distance_f16
+ arm_cosine_distance_f16:test_cosine_distance_f16
+ arm_euclidean_distance_f16:test_euclidean_distance_f16
+ arm_jensenshannon_distance_f16:test_jensenshannon_distance_f16
+ arm_minkowski_distance_f16:test_minkowski_distance_f16
+ }
+ }
+ }
+
group Filtering Tests {
class = FilteringTests
folder = Filtering