Use a single fast-path in mbedtls_xor, gains around 1% in benchmarks

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/common.h b/library/common.h
index 89f3b1f..b48a1fc 100644
--- a/library/common.h
+++ b/library/common.h
@@ -142,12 +142,13 @@
         uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i);
         mbedtls_put_unaligned_uint64(r + i, x);
     }
-#endif
+#else
     for (; (i + 4) <= n; i += 4) {
         uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
         mbedtls_put_unaligned_uint32(r + i, x);
     }
 #endif
+#endif
     for (; i < n; i++) {
         r[i] = a[i] ^ b[i];
     }