Merge pull request #8705 from daverodgman/ctr-perf

Ctr perf
diff --git a/ChangeLog.d/ctr-perf.txt b/ChangeLog.d/ctr-perf.txt
new file mode 100644
index 0000000..bc04080
--- /dev/null
+++ b/ChangeLog.d/ctr-perf.txt
@@ -0,0 +1,3 @@
+Features
+   * Improve performance of AES-GCM, AES-CTR and CTR-DRBG when
+     hardware accelerated AES is not present (around 13-23% on 64-bit Arm).
diff --git a/library/aes.c b/library/aes.c
index f4b9739..b1a5c3e 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -53,6 +53,7 @@
 #endif
 
 #include "mbedtls/platform.h"
+#include "ctr.h"
 
 /*
  * This is a convenience shorthand macro to check if we need reverse S-box and
@@ -1441,36 +1442,38 @@
                           const unsigned char *input,
                           unsigned char *output)
 {
-    int c, i;
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    size_t n;
 
-    n = *nc_off;
+    size_t offset = *nc_off;
 
-    if (n > 0x0F) {
+    if (offset > 0x0F) {
         return MBEDTLS_ERR_AES_BAD_INPUT_DATA;
     }
 
-    while (length--) {
-        if (n == 0) {
+    for (size_t i = 0; i < length;) {
+        size_t n = 16;
+        if (offset == 0) {
             ret = mbedtls_aes_crypt_ecb(ctx, MBEDTLS_AES_ENCRYPT, nonce_counter, stream_block);
             if (ret != 0) {
                 goto exit;
             }
-
-            for (i = 16; i > 0; i--) {
-                if (++nonce_counter[i - 1] != 0) {
-                    break;
-                }
-            }
+            mbedtls_ctr_increment_counter(nonce_counter);
+        } else {
+            n -= offset;
         }
-        c = *input++;
-        *output++ = (unsigned char) (c ^ stream_block[n]);
 
-        n = (n + 1) & 0x0F;
+        if (n > (length - i)) {
+            n = (length - i);
+        }
+        mbedtls_xor(&output[i], &input[i], &stream_block[offset], n);
+        // offset might be non-zero for the last block, but in that case, we don't use it again
+        offset = 0;
+        i += n;
     }
 
-    *nc_off = n;
+    // capture offset for future resumption
+    *nc_off = (*nc_off + length) % 16;
+
     ret = 0;
 
 exit:
diff --git a/library/ctr.h b/library/ctr.h
new file mode 100644
index 0000000..aa48fb9
--- /dev/null
+++ b/library/ctr.h
@@ -0,0 +1,35 @@
+/**
+ * \file ctr.h
+ *
+ * \brief    This file contains common functionality for counter algorithms.
+ *
+ *  Copyright The Mbed TLS Contributors
+ *  SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+ */
+
+#ifndef MBEDTLS_CTR_H
+#define MBEDTLS_CTR_H
+
+#include "common.h"
+
+/**
+ * \brief               Increment a big-endian 16-byte value.
+ *                      This is quite performance-sensitive for AES-CTR and CTR-DRBG.
+ *
+ * \param n             A 16-byte value to be incremented.
+ */
+static inline void mbedtls_ctr_increment_counter(uint8_t n[16])
+{
+    // The 32-bit version seems to perform about the same as a 64-bit version
+    // on 64-bit architectures, so no need to define a 64-bit version.
+    for (int i = 3;; i--) {
+        uint32_t x = MBEDTLS_GET_UINT32_BE(n, i << 2);
+        x += 1;
+        MBEDTLS_PUT_UINT32_BE(x, n, i << 2);
+        if (x != 0 || i == 0) {
+            break;
+        }
+    }
+}
+
+#endif /* MBEDTLS_CTR_H */
diff --git a/library/ctr_drbg.c b/library/ctr_drbg.c
index da34f95..66d9d28 100644
--- a/library/ctr_drbg.c
+++ b/library/ctr_drbg.c
@@ -14,6 +14,7 @@
 
 #if defined(MBEDTLS_CTR_DRBG_C)
 
+#include "ctr.h"
 #include "mbedtls/ctr_drbg.h"
 #include "mbedtls/platform_util.h"
 #include "mbedtls/error.h"
@@ -333,7 +334,7 @@
 {
     unsigned char tmp[MBEDTLS_CTR_DRBG_SEEDLEN];
     unsigned char *p = tmp;
-    int i, j;
+    int j;
     int ret = 0;
 #if !defined(MBEDTLS_AES_C)
     psa_status_t status;
@@ -346,11 +347,7 @@
         /*
          * Increase counter
          */
-        for (i = MBEDTLS_CTR_DRBG_BLOCKSIZE; i > 0; i--) {
-            if (++ctx->counter[i - 1] != 0) {
-                break;
-            }
-        }
+        mbedtls_ctr_increment_counter(ctx->counter);
 
         /*
          * Crypt counter block
@@ -372,9 +369,7 @@
         p += MBEDTLS_CTR_DRBG_BLOCKSIZE;
     }
 
-    for (i = 0; i < MBEDTLS_CTR_DRBG_SEEDLEN; i++) {
-        tmp[i] ^= data[i];
-    }
+    mbedtls_xor(tmp, tmp, data, MBEDTLS_CTR_DRBG_SEEDLEN);
 
     /*
      * Update key and counter
@@ -617,10 +612,11 @@
 {
     int ret = 0;
     mbedtls_ctr_drbg_context *ctx = (mbedtls_ctr_drbg_context *) p_rng;
-    unsigned char add_input[MBEDTLS_CTR_DRBG_SEEDLEN];
     unsigned char *p = output;
-    unsigned char tmp[MBEDTLS_CTR_DRBG_BLOCKSIZE];
-    int i;
+    struct {
+        unsigned char add_input[MBEDTLS_CTR_DRBG_SEEDLEN];
+        unsigned char tmp[MBEDTLS_CTR_DRBG_BLOCKSIZE];
+    } locals;
     size_t use_len;
 
     if (output_len > MBEDTLS_CTR_DRBG_MAX_REQUEST) {
@@ -631,7 +627,7 @@
         return MBEDTLS_ERR_CTR_DRBG_INPUT_TOO_BIG;
     }
 
-    memset(add_input, 0, MBEDTLS_CTR_DRBG_SEEDLEN);
+    memset(locals.add_input, 0, MBEDTLS_CTR_DRBG_SEEDLEN);
 
     if (ctx->reseed_counter > ctx->reseed_interval ||
         ctx->prediction_resistance) {
@@ -642,30 +638,26 @@
     }
 
     if (add_len > 0) {
-        if ((ret = block_cipher_df(add_input, additional, add_len)) != 0) {
+        if ((ret = block_cipher_df(locals.add_input, additional, add_len)) != 0) {
             goto exit;
         }
-        if ((ret = ctr_drbg_update_internal(ctx, add_input)) != 0) {
+        if ((ret = ctr_drbg_update_internal(ctx, locals.add_input)) != 0) {
             goto exit;
         }
     }
 
     while (output_len > 0) {
         /*
-         * Increase counter
+         * Increase counter (treat it as a 128-bit big-endian integer).
          */
-        for (i = MBEDTLS_CTR_DRBG_BLOCKSIZE; i > 0; i--) {
-            if (++ctx->counter[i - 1] != 0) {
-                break;
-            }
-        }
+        mbedtls_ctr_increment_counter(ctx->counter);
 
         /*
          * Crypt counter block
          */
 #if defined(MBEDTLS_AES_C)
         if ((ret = mbedtls_aes_crypt_ecb(&ctx->aes_ctx, MBEDTLS_AES_ENCRYPT,
-                                         ctx->counter, tmp)) != 0) {
+                                         ctx->counter, locals.tmp)) != 0) {
             goto exit;
         }
 #else
@@ -673,7 +665,7 @@
         size_t tmp_len;
 
         status = psa_cipher_update(&ctx->psa_ctx.operation, ctx->counter, sizeof(ctx->counter),
-                                   tmp, MBEDTLS_CTR_DRBG_BLOCKSIZE, &tmp_len);
+                                   locals.tmp, MBEDTLS_CTR_DRBG_BLOCKSIZE, &tmp_len);
         if (status != PSA_SUCCESS) {
             ret = psa_generic_status_to_mbedtls(status);
             goto exit;
@@ -685,20 +677,19 @@
         /*
          * Copy random block to destination
          */
-        memcpy(p, tmp, use_len);
+        memcpy(p, locals.tmp, use_len);
         p += use_len;
         output_len -= use_len;
     }
 
-    if ((ret = ctr_drbg_update_internal(ctx, add_input)) != 0) {
+    if ((ret = ctr_drbg_update_internal(ctx, locals.add_input)) != 0) {
         goto exit;
     }
 
     ctx->reseed_counter++;
 
 exit:
-    mbedtls_platform_zeroize(add_input, sizeof(add_input));
-    mbedtls_platform_zeroize(tmp, sizeof(tmp));
+    mbedtls_platform_zeroize(&locals, sizeof(locals));
     return ret;
 }
 
diff --git a/library/gcm.c b/library/gcm.c
index 20d55c0..c677ca4 100644
--- a/library/gcm.c
+++ b/library/gcm.c
@@ -401,12 +401,9 @@
 /* Increment the counter. */
 static void gcm_incr(unsigned char y[16])
 {
-    size_t i;
-    for (i = 16; i > 12; i--) {
-        if (++y[i - 1] != 0) {
-            break;
-        }
-    }
+    uint32_t x = MBEDTLS_GET_UINT32_BE(y, 12);
+    x++;
+    MBEDTLS_PUT_UINT32_BE(x, y, 12);
 }
 
 /* Calculate and apply the encryption mask. Process use_len bytes of data,
diff --git a/programs/test/benchmark.c b/programs/test/benchmark.c
index 755a731..6f7f69b 100644
--- a/programs/test/benchmark.c
+++ b/programs/test/benchmark.c
@@ -507,7 +507,7 @@
     char md5, ripemd160, sha1, sha256, sha512,
          sha3_224, sha3_256, sha3_384, sha3_512,
          des3, des,
-         aes_cbc, aes_cfb128, aes_cfb8, aes_gcm, aes_ccm, aes_xts, chachapoly,
+         aes_cbc, aes_cfb128, aes_cfb8, aes_ctr, aes_gcm, aes_ccm, aes_xts, chachapoly,
          aes_cmac, des3_cmac,
          aria, camellia, chacha20,
          poly1305,
@@ -571,6 +571,8 @@
                 todo.aes_cfb128 = 1;
             } else if (strcmp(argv[i], "aes_cfb8") == 0) {
                 todo.aes_cfb8 = 1;
+            } else if (strcmp(argv[i], "aes_ctr") == 0) {
+                todo.aes_ctr = 1;
             } else if (strcmp(argv[i], "aes_xts") == 0) {
                 todo.aes_xts = 1;
             } else if (strcmp(argv[i], "aes_gcm") == 0) {
@@ -774,6 +776,31 @@
         mbedtls_aes_free(&aes);
     }
 #endif
+#if defined(MBEDTLS_CIPHER_MODE_CTR)
+    if (todo.aes_ctr) {
+        int keysize;
+        mbedtls_aes_context aes;
+
+        uint8_t stream_block[16];
+        size_t nc_off;
+
+        mbedtls_aes_init(&aes);
+        for (keysize = 128; keysize <= 256; keysize += 64) {
+            mbedtls_snprintf(title, sizeof(title), "AES-CTR-%d", keysize);
+
+            memset(buf, 0, sizeof(buf));
+            memset(tmp, 0, sizeof(tmp));
+            memset(stream_block, 0, sizeof(stream_block));
+            nc_off = 0;
+
+            CHECK_AND_CONTINUE(mbedtls_aes_setkey_enc(&aes, tmp, keysize));
+
+            TIME_AND_TSC(title, mbedtls_aes_crypt_ctr(&aes, BUFSIZE, &nc_off, tmp, stream_block,
+                                                      buf, buf));
+        }
+        mbedtls_aes_free(&aes);
+    }
+#endif
 #if defined(MBEDTLS_CIPHER_MODE_XTS)
     if (todo.aes_xts) {
         int keysize;
diff --git a/tests/suites/test_suite_aes.ctr.data b/tests/suites/test_suite_aes.ctr.data
new file mode 100644
index 0000000..a148236
--- /dev/null
+++ b/tests/suites/test_suite_aes.ctr.data
@@ -0,0 +1,137 @@
+# Test vectors from NIST Special Publication 800-38A 2001 Edition
+# Recommendation for Block Edition Cipher Modes of Operation
+
+# as below, but corrupt the key to check the test catches it
+AES-CTR NIST 128 bad
+aes_ctr:"00000000000000000000000000000000":"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff":"6bc1bee22e409f96e93d7e117393172aae2d8a571e03ac9c9eb76fac45af8e5130c81c46a35ce411e5fbc1191a0a52eff69f2445df4f9b17ad2b417be66c3710":"874d6191b620e3261bef6864990db6ce9806f66b7970fdff8617187bb9fffdff5ae4df3edbd5d35e5b4f09020db03eab1e031dda2fbe03d1792170a0f3009cee":1
+
+AES-CTR NIST 128
+aes_ctr:"2b7e151628aed2a6abf7158809cf4f3c":"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff":"6bc1bee22e409f96e93d7e117393172aae2d8a571e03ac9c9eb76fac45af8e5130c81c46a35ce411e5fbc1191a0a52eff69f2445df4f9b17ad2b417be66c3710":"874d6191b620e3261bef6864990db6ce9806f66b7970fdff8617187bb9fffdff5ae4df3edbd5d35e5b4f09020db03eab1e031dda2fbe03d1792170a0f3009cee":0
+
+AES-CTR NIST 192
+depends_on:!MBEDTLS_AES_ONLY_128_BIT_KEY_LENGTH
+aes_ctr:"8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b":"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff":"6bc1bee22e409f96e93d7e117393172aae2d8a571e03ac9c9eb76fac45af8e5130c81c46a35ce411e5fbc1191a0a52eff69f2445df4f9b17ad2b417be66c3710":"1abc932417521ca24f2b0459fe7e6e0b090339ec0aa6faefd5ccc2c6f4ce8e941e36b26bd1ebc670d1bd1d665620abf74f78a7f6d29809585a97daec58c6b050":0
+
+AES-CTR NIST 256
+depends_on:!MBEDTLS_AES_ONLY_128_BIT_KEY_LENGTH
+aes_ctr:"603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4":"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff":"6bc1bee22e409f96e93d7e117393172aae2d8a571e03ac9c9eb76fac45af8e5130c81c46a35ce411e5fbc1191a0a52eff69f2445df4f9b17ad2b417be66c3710":"601ec313775789a5b7a7f504bbf3d228f443e3ca4d62b59aca84e990cacaf5c52b0930daa23de94ce87017ba2d84988ddfc9c58db67aada613c2dd08457941a6":0
+
+AES-CTR aes_encrypt_ctr_multipart 1 1
+aes_encrypt_ctr_multipart:1:1
+
+AES-CTR aes_encrypt_ctr_multipart 2 1
+aes_encrypt_ctr_multipart:2:1
+
+AES-CTR aes_encrypt_ctr_multipart 2 2
+aes_encrypt_ctr_multipart:2:2
+
+AES-CTR aes_encrypt_ctr_multipart 4 1
+aes_encrypt_ctr_multipart:4:1
+
+AES-CTR aes_encrypt_ctr_multipart 4 2
+aes_encrypt_ctr_multipart:4:2
+
+AES-CTR aes_encrypt_ctr_multipart 15 1
+aes_encrypt_ctr_multipart:15:1
+
+AES-CTR aes_encrypt_ctr_multipart 15 2
+aes_encrypt_ctr_multipart:15:2
+
+AES-CTR aes_encrypt_ctr_multipart 15 8
+aes_encrypt_ctr_multipart:15:8
+
+AES-CTR aes_encrypt_ctr_multipart 15 15
+aes_encrypt_ctr_multipart:15:15
+
+AES-CTR aes_encrypt_ctr_multipart 16 1
+aes_encrypt_ctr_multipart:16:1
+
+AES-CTR aes_encrypt_ctr_multipart 16 2
+aes_encrypt_ctr_multipart:16:2
+
+AES-CTR aes_encrypt_ctr_multipart 16 8
+aes_encrypt_ctr_multipart:16:8
+
+AES-CTR aes_encrypt_ctr_multipart 16 15
+aes_encrypt_ctr_multipart:16:15
+
+AES-CTR aes_encrypt_ctr_multipart 16 16
+aes_encrypt_ctr_multipart:16:16
+
+AES-CTR aes_encrypt_ctr_multipart 17 1
+aes_encrypt_ctr_multipart:17:1
+
+AES-CTR aes_encrypt_ctr_multipart 17 2
+aes_encrypt_ctr_multipart:17:2
+
+AES-CTR aes_encrypt_ctr_multipart 17 8
+aes_encrypt_ctr_multipart:17:8
+
+AES-CTR aes_encrypt_ctr_multipart 17 15
+aes_encrypt_ctr_multipart:17:15
+
+AES-CTR aes_encrypt_ctr_multipart 17 16
+aes_encrypt_ctr_multipart:17:16
+
+AES-CTR aes_encrypt_ctr_multipart 63 1
+aes_encrypt_ctr_multipart:63:1
+
+AES-CTR aes_encrypt_ctr_multipart 63 2
+aes_encrypt_ctr_multipart:63:2
+
+AES-CTR aes_encrypt_ctr_multipart 63 8
+aes_encrypt_ctr_multipart:63:8
+
+AES-CTR aes_encrypt_ctr_multipart 63 15
+aes_encrypt_ctr_multipart:63:15
+
+AES-CTR aes_encrypt_ctr_multipart 63 16
+aes_encrypt_ctr_multipart:63:16
+
+AES-CTR aes_encrypt_ctr_multipart 63 17
+aes_encrypt_ctr_multipart:63:17
+
+AES-CTR aes_encrypt_ctr_multipart 64 1
+aes_encrypt_ctr_multipart:64:1
+
+AES-CTR aes_encrypt_ctr_multipart 64 2
+aes_encrypt_ctr_multipart:64:2
+
+AES-CTR aes_encrypt_ctr_multipart 64 8
+aes_encrypt_ctr_multipart:64:8
+
+AES-CTR aes_encrypt_ctr_multipart 64 15
+aes_encrypt_ctr_multipart:64:15
+
+AES-CTR aes_encrypt_ctr_multipart 64 16
+aes_encrypt_ctr_multipart:64:16
+
+AES-CTR aes_encrypt_ctr_multipart 64 17
+aes_encrypt_ctr_multipart:64:17
+
+AES-CTR aes_encrypt_ctr_multipart 1024 1
+aes_encrypt_ctr_multipart:1024:1
+
+AES-CTR aes_encrypt_ctr_multipart 1024 10
+aes_encrypt_ctr_multipart:1024:10
+
+AES-CTR aes_encrypt_ctr_multipart 1024 15
+aes_encrypt_ctr_multipart:1024:15
+
+AES-CTR aes_encrypt_ctr_multipart 1024 16
+aes_encrypt_ctr_multipart:1024:16
+
+AES-CTR aes_encrypt_ctr_multipart 1024 63
+aes_encrypt_ctr_multipart:1024:63
+
+AES-CTR aes_encrypt_ctr_multipart 1024 64
+aes_encrypt_ctr_multipart:1024:64
+
+AES-CTR aes_encrypt_ctr_multipart 1024 65
+aes_encrypt_ctr_multipart:1024:65
+
+AES-CTR aes_encrypt_ctr_multipart 1024 1023
+aes_encrypt_ctr_multipart:1024:1023
+
+AES-CTR aes_encrypt_ctr_multipart 1024 1024
+aes_encrypt_ctr_multipart:1024:1024
diff --git a/tests/suites/test_suite_aes.function b/tests/suites/test_suite_aes.function
index 2ca3f7f..9118a98 100644
--- a/tests/suites/test_suite_aes.function
+++ b/tests/suites/test_suite_aes.function
@@ -88,6 +88,124 @@
 }
 /* END_CASE */
 
+/* BEGIN_CASE depends_on:MBEDTLS_CIPHER_MODE_CTR */
+void aes_ctr(data_t *key, data_t *ictr, data_t *pt, data_t *ct, int expected)
+{
+    unsigned char *output = NULL;
+    unsigned char ctr[16];
+    unsigned char stream_block[16];
+    mbedtls_aes_context ctx;
+
+    // sanity checks on test input
+    TEST_ASSERT(pt->len == ct->len);
+    TEST_ASSERT(key->len == 16 || key->len == 24 || key->len == 32);
+
+    TEST_CALLOC(output, pt->len);
+
+    // expected result is always success on zero-length input, so skip len == 0 if expecting failure
+    for (size_t len = (expected == 0 ? 0 : 1); len <= pt->len; len++) {
+        for (int i = 0; i < 2; i++) {
+            mbedtls_aes_init(&ctx);
+            TEST_ASSERT(mbedtls_aes_setkey_enc(&ctx, key->x, key->len * 8) == 0);
+
+            memcpy(ctr, ictr->x, 16);
+            memset(stream_block, 0, 16);
+            memset(output, 0, pt->len);
+
+            size_t nc_off = 0;
+
+            if (i == 0) {
+                // encrypt
+                TEST_EQUAL(mbedtls_aes_crypt_ctr(&ctx, len, &nc_off, ctr,
+                                                 stream_block, pt->x, output), 0);
+                TEST_ASSERT(!!memcmp(output, ct->x, len) == expected);
+            } else {
+                // decrypt
+                TEST_EQUAL(mbedtls_aes_crypt_ctr(&ctx, len, &nc_off, ctr,
+                                                 stream_block, ct->x, output), 0);
+                TEST_ASSERT(!!memcmp(output, pt->x, len) == expected);
+            }
+        }
+    }
+
+exit:
+    mbedtls_free(output);
+    mbedtls_aes_free(&ctx);
+}
+/* END_CASE */
+
+/* BEGIN_CASE depends_on:MBEDTLS_CIPHER_MODE_CTR */
+void aes_encrypt_ctr_multipart(int length, int step_size)
+{
+    unsigned char key[16];
+    unsigned char ctr_a[16];
+    unsigned char ctr_b[16];
+    unsigned char stream_block_a[16];
+    unsigned char stream_block_b[16];
+    unsigned char *input = NULL;
+    unsigned char *output_a = NULL;
+    unsigned char *output_b = NULL;
+    mbedtls_aes_context ctx;
+    size_t nc_off_a, nc_off_b;
+
+    TEST_ASSERT(length >= 0);
+    TEST_ASSERT(step_size > 0);
+
+    TEST_CALLOC(input, length);
+    TEST_CALLOC(output_a, length);
+    TEST_CALLOC(output_b, length);
+
+    // set up a random key
+    mbedtls_test_rnd_std_rand(NULL, key, sizeof(key));
+
+    // random input
+    mbedtls_test_rnd_std_rand(NULL, input, length);
+
+
+    // complete encryption in one call
+    mbedtls_aes_init(&ctx);
+    TEST_ASSERT(mbedtls_aes_setkey_enc(&ctx, key, sizeof(key) * 8) == 0);
+    memset(ctr_a, 0, sizeof(ctr_a));
+    memset(stream_block_a, 0, sizeof(stream_block_a));
+    nc_off_a = 0;
+    TEST_EQUAL(mbedtls_aes_crypt_ctr(&ctx, length, &nc_off_a, ctr_a,
+                                     stream_block_a, input, output_a), 0);
+    mbedtls_aes_free(&ctx);
+
+
+    // encrypt in multiple steps of varying size
+    mbedtls_aes_init(&ctx);
+    TEST_ASSERT(mbedtls_aes_setkey_enc(&ctx, key, sizeof(key) * 8) == 0);
+    memset(ctr_b, 0, sizeof(ctr_b));
+    memset(stream_block_b, 0, sizeof(stream_block_b));
+    nc_off_b = 0;
+    size_t remaining = length;
+    unsigned char *ip = input, *op = output_b;
+    while (remaining != 0) {
+        size_t l = MIN(remaining, (size_t) step_size);
+        step_size *= 2;
+        remaining -= l;
+        TEST_EQUAL(mbedtls_aes_crypt_ctr(&ctx, l, &nc_off_b, ctr_b, stream_block_b, ip, op), 0);
+        ip += l;
+        op += l;
+    }
+
+    // finally, validate that multiple steps produced same result as single-pass
+    TEST_MEMORY_COMPARE(output_a, length, output_b, length);
+    TEST_MEMORY_COMPARE(ctr_a, sizeof(ctr_a), ctr_b, sizeof(ctr_b));
+    TEST_MEMORY_COMPARE(stream_block_a, sizeof(stream_block_a),
+                        stream_block_b, sizeof(stream_block_b));
+    TEST_EQUAL(nc_off_a, nc_off_b);
+
+exit:
+    mbedtls_free(input);
+    mbedtls_free(output_a);
+    mbedtls_free(output_b);
+
+    mbedtls_aes_free(&ctx);
+}
+/* END_CASE */
+
 /* BEGIN_CASE depends_on:!MBEDTLS_BLOCK_CIPHER_NO_DECRYPT */
 void aes_decrypt_ecb(data_t *key_str, data_t *src_str,
                      data_t *dst, int setkey_result)
diff --git a/tests/suites/test_suite_ctr_drbg.data b/tests/suites/test_suite_ctr_drbg.data
index 028a07f..89dfb97 100644
--- a/tests/suites/test_suite_ctr_drbg.data
+++ b/tests/suites/test_suite_ctr_drbg.data
@@ -1105,3 +1105,48 @@
 
 CTR_DRBG self test
 ctr_drbg_selftest:
+
+Increment counter rollover
+ctr_increment_rollover
+
+Increment counter 00
+ctr_increment:"00"
+
+Increment counter ff00
+ctr_increment:"ff00"
+
+Increment counter ff0000
+ctr_increment:"ff0000"
+
+Increment counter ff000000
+ctr_increment:"ff000000"
+
+Increment counter ff00000000
+ctr_increment:"ff00000000"
+
+Increment counter ff0000000000
+ctr_increment:"ff0000000000"
+
+Increment counter ff000000000000
+ctr_increment:"ff000000000000"
+
+Increment counter 01
+ctr_increment:"01"
+
+Increment counter ff01
+ctr_increment:"ff01"
+
+Increment counter ff0001
+ctr_increment:"ff0001"
+
+Increment counter ff000001
+ctr_increment:"ff000001"
+
+Increment counter ff00000001
+ctr_increment:"ff00000001"
+
+Increment counter ff0000000001
+ctr_increment:"ff0000000001"
+
+Increment counter ff000000000001
+ctr_increment:"ff000000000001"
diff --git a/tests/suites/test_suite_ctr_drbg.function b/tests/suites/test_suite_ctr_drbg.function
index 1f0a072..425c43e 100644
--- a/tests/suites/test_suite_ctr_drbg.function
+++ b/tests/suites/test_suite_ctr_drbg.function
@@ -2,6 +2,7 @@
 #include "mbedtls/entropy.h"
 #include "mbedtls/ctr_drbg.h"
 #include "string.h"
+#include "ctr.h"
 
 #if defined(MBEDTLS_THREADING_PTHREAD)
 #include "mbedtls/threading.h"
@@ -443,3 +444,75 @@
     AES_PSA_DONE();
 }
 /* END_CASE */
+
+/* BEGIN_CASE */
+void ctr_increment_rollover()
+{
+    uint8_t c[16];
+    uint8_t r[16];
+
+    // test all increments from 2^n - 1 to 2^n (i.e. where we roll over into the next bit)
+    for (int n = 0; n <= 128; n++) {
+        memset(c, 0, 16);
+        memset(r, 0, 16);
+
+        // set least significant (highest address) n bits to 1, i.e. generate (2^n - 1)
+        for (int i = 0; i < n; i++) {
+            int bit = i % 8;
+            int byte = (i / 8);
+            c[15 - byte] |= 1 << bit;
+        }
+        // increment to get 2^n
+        mbedtls_ctr_increment_counter(c);
+
+        // now generate a reference result equal to 2^n - i.e. set only bit (n + 1)
+        // if n == 127, this will not set any bits (i.e. wraps to 0).
+        int bit = n % 8;
+        int byte = n / 8;
+        if (byte < 16) {
+            r[15 - byte] = 1 << bit;
+        }
+
+        TEST_MEMORY_COMPARE(c, 16, r, 16);
+    }
+
+    uint64_t lsb = 10, msb = 20;
+    MBEDTLS_PUT_UINT64_BE(msb, c, 0);
+    MBEDTLS_PUT_UINT64_BE(lsb, c, 8);
+    memcpy(r, c, 16);
+    mbedtls_ctr_increment_counter(c);
+    for (int i = 15; i >= 0; i--) {
+        r[i] += 1;
+        if (r[i] != 0) {
+            break;
+        }
+    }
+    TEST_MEMORY_COMPARE(c, 16, r, 16);
+}
+/* END_CASE */
+
+/* BEGIN_CASE */
+void ctr_increment(data_t *x)
+{
+    uint8_t c[16];
+    uint8_t r[16];
+
+    // initialise c and r from test argument
+    memset(c, 0, 16);
+    memcpy(c, x->x, x->len);
+    memcpy(r, c, 16);
+
+    // increment c
+    mbedtls_ctr_increment_counter(c);
+    // increment reference
+    for (int i = 15; i >= 0; i--) {
+        r[i] += 1;
+        if (r[i] != 0) {
+            break;
+        }
+    }
+
+    // test that mbedtls_ctr_increment_counter behaviour matches reference
+    TEST_MEMORY_COMPARE(c, 16, r, 16);
+}
+/* END_CASE */