return plaintext data faster on unpadded decryption
diff --git a/library/cipher.c b/library/cipher.c
index b0e0d87..cf82a82 100644
--- a/library/cipher.c
+++ b/library/cipher.c
@@ -299,8 +299,10 @@
         /*
          * If there is not enough data for a full block, cache it.
          */
-        if( ( ctx->operation == MBEDTLS_DECRYPT &&
+        if( ( ctx->operation == MBEDTLS_DECRYPT && NULL != ctx->add_padding &&
                 ilen <= mbedtls_cipher_get_block_size( ctx ) - ctx->unprocessed_len ) ||
+            ( ctx->operation == MBEDTLS_DECRYPT && NULL == ctx->add_padding &&
+                ilen < mbedtls_cipher_get_block_size( ctx ) - ctx->unprocessed_len ) ||
              ( ctx->operation == MBEDTLS_ENCRYPT &&
                 ilen < mbedtls_cipher_get_block_size( ctx ) - ctx->unprocessed_len ) )
         {
@@ -341,9 +343,17 @@
          */
         if( 0 != ilen )
         {
+            /* Encryption: only cache partial blocks
+             * Decryption w/ padding: always keep at least one whole block
+             * Decryption w/o padding: only cache partial blocks
+             */
             copy_len = ilen % mbedtls_cipher_get_block_size( ctx );
-            if( copy_len == 0 && ctx->operation == MBEDTLS_DECRYPT )
+            if( copy_len == 0 &&
+                ctx->operation == MBEDTLS_DECRYPT &&
+                NULL != ctx->add_padding)
+            {
                 copy_len = mbedtls_cipher_get_block_size( ctx );
+            }
 
             memcpy( ctx->unprocessed_data, &( input[ilen - copy_len] ),
                     copy_len );