Manuel Pégourié-Gonnard | 92ac76f | 2013-12-16 17:12:53 +0100 | [diff] [blame] | 1 | /* |
| 2 | * AES-NI support functions |
| 3 | * |
| 4 | * Copyright (C) 2013, Brainspark B.V. |
| 5 | * |
| 6 | * This file is part of PolarSSL (http://www.polarssl.org) |
| 7 | * Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org> |
| 8 | * |
| 9 | * All rights reserved. |
| 10 | * |
| 11 | * This program is free software; you can redistribute it and/or modify |
| 12 | * it under the terms of the GNU General Public License as published by |
| 13 | * the Free Software Foundation; either version 2 of the License, or |
| 14 | * (at your option) any later version. |
| 15 | * |
| 16 | * This program is distributed in the hope that it will be useful, |
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 19 | * GNU General Public License for more details. |
| 20 | * |
| 21 | * You should have received a copy of the GNU General Public License along |
| 22 | * with this program; if not, write to the Free Software Foundation, Inc., |
| 23 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| 24 | */ |
| 25 | |
| 26 | /* |
| 27 | * [AES-WP] http://software.intel.com/en-us/articles/intel-advanced-encryption-standard-aes-instructions-set |
Manuel Pégourié-Gonnard | d333f67 | 2013-12-26 11:44:46 +0100 | [diff] [blame^] | 28 | * [CLMUL-WP] http://software.intel.com/en-us/articles/intel-carry-less-multiplication-instruction-and-its-usage-for-computing-the-gcm-mode/ |
Manuel Pégourié-Gonnard | 92ac76f | 2013-12-16 17:12:53 +0100 | [diff] [blame] | 29 | */ |
| 30 | |
| 31 | #include "polarssl/config.h" |
| 32 | |
| 33 | #if defined(POLARSSL_AESNI_C) |
| 34 | |
| 35 | #include "polarssl/aesni.h" |
Manuel Pégourié-Gonnard | 5b68565 | 2013-12-18 11:45:21 +0100 | [diff] [blame] | 36 | #include <stdio.h> |
Manuel Pégourié-Gonnard | 92ac76f | 2013-12-16 17:12:53 +0100 | [diff] [blame] | 37 | |
| 38 | #if defined(POLARSSL_HAVE_X86_64) |
| 39 | |
| 40 | /* |
Manuel Pégourié-Gonnard | 8eaf20b | 2013-12-18 19:14:53 +0100 | [diff] [blame] | 41 | * AES-NI support detection routine |
Manuel Pégourié-Gonnard | 92ac76f | 2013-12-16 17:12:53 +0100 | [diff] [blame] | 42 | */ |
Manuel Pégourié-Gonnard | 8eaf20b | 2013-12-18 19:14:53 +0100 | [diff] [blame] | 43 | int aesni_supports( unsigned int what ) |
Manuel Pégourié-Gonnard | 92ac76f | 2013-12-16 17:12:53 +0100 | [diff] [blame] | 44 | { |
Manuel Pégourié-Gonnard | 8eaf20b | 2013-12-18 19:14:53 +0100 | [diff] [blame] | 45 | static int done = 0; |
| 46 | static unsigned int c = 0; |
Manuel Pégourié-Gonnard | 92ac76f | 2013-12-16 17:12:53 +0100 | [diff] [blame] | 47 | |
Manuel Pégourié-Gonnard | 8eaf20b | 2013-12-18 19:14:53 +0100 | [diff] [blame] | 48 | if( ! done ) |
Manuel Pégourié-Gonnard | 92ac76f | 2013-12-16 17:12:53 +0100 | [diff] [blame] | 49 | { |
| 50 | asm( "movl $1, %%eax \n" |
| 51 | "cpuid \n" |
| 52 | : "=c" (c) |
| 53 | : |
| 54 | : "eax", "ebx", "edx" ); |
Manuel Pégourié-Gonnard | 8eaf20b | 2013-12-18 19:14:53 +0100 | [diff] [blame] | 55 | done = 1; |
Manuel Pégourié-Gonnard | 92ac76f | 2013-12-16 17:12:53 +0100 | [diff] [blame] | 56 | } |
| 57 | |
Manuel Pégourié-Gonnard | 8eaf20b | 2013-12-18 19:14:53 +0100 | [diff] [blame] | 58 | return( ( c & what ) != 0 ); |
Manuel Pégourié-Gonnard | 92ac76f | 2013-12-16 17:12:53 +0100 | [diff] [blame] | 59 | } |
| 60 | |
Manuel Pégourié-Gonnard | 5b68565 | 2013-12-18 11:45:21 +0100 | [diff] [blame] | 61 | /* |
| 62 | * AES-NI AES-ECB block en(de)cryption |
| 63 | */ |
| 64 | int aesni_crypt_ecb( aes_context *ctx, |
| 65 | int mode, |
| 66 | const unsigned char input[16], |
| 67 | unsigned char output[16] ) |
| 68 | { |
| 69 | asm( "movdqu (%3), %%xmm0 \n" // load input |
| 70 | "movdqu (%1), %%xmm1 \n" // load round key 0 |
| 71 | "pxor %%xmm1, %%xmm0 \n" // round 0 |
| 72 | "addq $16, %1 \n" // point to next round key |
| 73 | "subl $1, %0 \n" // normal rounds = nr - 1 |
| 74 | "test %2, %2 \n" // mode? |
| 75 | "jz 2f \n" // 0 = decrypt |
| 76 | |
| 77 | "1: \n" // encryption loop |
| 78 | "movdqu (%1), %%xmm1 \n" // load round key |
| 79 | "aesenc %%xmm1, %%xmm0 \n" // do round |
| 80 | "addq $16, %1 \n" // point to next round key |
| 81 | "subl $1, %0 \n" // loop |
| 82 | "jnz 1b \n" |
| 83 | "movdqu (%1), %%xmm1 \n" // load round key |
| 84 | "aesenclast %%xmm1, %%xmm0 \n" // last round |
| 85 | "jmp 3f \n" |
| 86 | |
| 87 | "2: \n" // decryption loop |
| 88 | "movdqu (%1), %%xmm1 \n" |
| 89 | "aesdec %%xmm1, %%xmm0 \n" |
| 90 | "addq $16, %1 \n" |
| 91 | "subl $1, %0 \n" |
| 92 | "jnz 2b \n" |
| 93 | "movdqu (%1), %%xmm1 \n" // load round key |
| 94 | "aesdeclast %%xmm1, %%xmm0 \n" // last round |
| 95 | |
| 96 | "3: \n" |
| 97 | "movdqu %%xmm0, (%4) \n" // export output |
| 98 | : |
| 99 | : "r" (ctx->nr), "r" (ctx->rk), "r" (mode), "r" (input), "r" (output) |
| 100 | : "memory", "cc", "xmm0", "xmm1" ); |
| 101 | |
| 102 | |
| 103 | return( 0 ); |
| 104 | } |
Manuel Pégourié-Gonnard | d333f67 | 2013-12-26 11:44:46 +0100 | [diff] [blame^] | 105 | |
| 106 | /* |
| 107 | * GCM multiplication: c = a times b in GF(2^128) |
| 108 | * Based on [CLMUL-WP] algorithms 1 (with equation 27) and 5. |
| 109 | */ |
| 110 | int aesni_gcm_mult( unsigned char c[16], |
| 111 | const unsigned char a[16], |
| 112 | const unsigned char b[16] ) |
| 113 | { |
| 114 | unsigned char aa[16], bb[16], cc[16]; |
| 115 | size_t i; |
| 116 | |
| 117 | /* The inputs are in big-endian order, so byte-reverse them */ |
| 118 | for( i = 0; i < 16; i++ ) |
| 119 | { |
| 120 | aa[i] = a[15 - i]; |
| 121 | bb[i] = b[15 - i]; |
| 122 | } |
| 123 | |
| 124 | asm( "movdqu (%0), %%xmm0 \n" // a1:a0 |
| 125 | "movdqu (%1), %%xmm1 \n" // b1:b0 |
| 126 | |
| 127 | /* |
| 128 | * Caryless multiplication xmm2:xmm1 = xmm0 * xmm1 |
| 129 | * using [CLMUL-WP] algorithm 1 (p. 13). |
| 130 | */ |
| 131 | "movdqa %%xmm1, %%xmm2 \n" // copy of b1:b0 |
| 132 | "movdqa %%xmm1, %%xmm3 \n" // same |
| 133 | "movdqa %%xmm1, %%xmm4 \n" // same |
| 134 | "pclmulqdq $0x00, %%xmm0, %%xmm1 \n" // a0*b0 = c1:c0 |
| 135 | "pclmulqdq $0x11, %%xmm0, %%xmm2 \n" // a1*b1 = d1:d0 |
| 136 | "pclmulqdq $0x10, %%xmm0, %%xmm3 \n" // a0*b1 = e1:e0 |
| 137 | "pclmulqdq $0x01, %%xmm0, %%xmm4 \n" // a1*b0 = f1:f0 |
| 138 | "pxor %%xmm3, %%xmm4 \n" // e1+f1:e0+f0 |
| 139 | "movdqa %%xmm4, %%xmm3 \n" // same |
| 140 | "psrldq $8, %%xmm4 \n" // 0:e1+f1 |
| 141 | "pslldq $8, %%xmm3 \n" // e0+f0:0 |
| 142 | "pxor %%xmm4, %%xmm2 \n" // d1:d0+e1+f1 |
| 143 | "pxor %%xmm3, %%xmm1 \n" // c1+e0+f1:c0 |
| 144 | |
| 145 | /* |
| 146 | * Now shift the result one bit to the left, |
| 147 | * taking advantage of [CLMUL-WP] eq 27 (p. 20) |
| 148 | */ |
| 149 | "movdqa %%xmm1, %%xmm3 \n" // r1:r0 |
| 150 | "movdqa %%xmm2, %%xmm4 \n" // r3:r2 |
| 151 | "psllq $1, %%xmm1 \n" // r1<<1:r0<<1 |
| 152 | "psllq $1, %%xmm2 \n" // r3<<1:r2<<1 |
| 153 | "psrlq $63, %%xmm3 \n" // r1>>63:r0>>63 |
| 154 | "psrlq $63, %%xmm4 \n" // r3>>63:r2>>63 |
| 155 | "movdqa %%xmm3, %%xmm5 \n" // r1>>63:r0>>63 |
| 156 | "pslldq $8, %%xmm3 \n" // r0>>63:0 |
| 157 | "pslldq $8, %%xmm4 \n" // r2>>63:0 |
| 158 | "psrldq $8, %%xmm5 \n" // 0:r1>>63 |
| 159 | "por %%xmm3, %%xmm1 \n" // r1<<1|r0>>63:r0<<1 |
| 160 | "por %%xmm4, %%xmm2 \n" // r3<<1|r2>>62:r2<<1 |
| 161 | "por %%xmm5, %%xmm2 \n" // r3<<1|r2>>62:r2<<1|r1>>63 |
| 162 | |
| 163 | /* |
| 164 | * Now reduce modulo the GCM polynomial x^128 + x^7 + x^2 + x + 1 |
| 165 | * using [CLMUL-WP] algorithm 5 (p. 20). |
| 166 | * Currently xmm2:xmm1 holds x3:x2:x1:x0 (already shifted). |
| 167 | */ |
| 168 | /* Step 2 (1) */ |
| 169 | "movdqa %%xmm1, %%xmm3 \n" // x1:x0 |
| 170 | "movdqa %%xmm1, %%xmm4 \n" // same |
| 171 | "movdqa %%xmm1, %%xmm5 \n" // same |
| 172 | "psllq $63, %%xmm3 \n" // x1<<63:x0<<63 = stuff:a |
| 173 | "psllq $62, %%xmm4 \n" // x1<<62:x0<<62 = stuff:b |
| 174 | "psllq $57, %%xmm5 \n" // x1<<57:x0<<57 = stuff:c |
| 175 | |
| 176 | /* Step 2 (2) */ |
| 177 | "pxor %%xmm4, %%xmm3 \n" // stuff:a+b |
| 178 | "pxor %%xmm5, %%xmm3 \n" // stuff:a+b+c |
| 179 | "pslldq $8, %%xmm3 \n" // a+b+c:0 |
| 180 | "pxor %%xmm3, %%xmm1 \n" // x1+a+b+c:x0 = d:x0 |
| 181 | |
| 182 | /* Steps 3 and 4 */ |
| 183 | "movdqa %%xmm1,%%xmm0 \n" // d:x0 |
| 184 | "movdqa %%xmm1,%%xmm4 \n" // same |
| 185 | "movdqa %%xmm1,%%xmm5 \n" // same |
| 186 | "psrlq $1, %%xmm0 \n" // e1:x0>>1 = e1:e0' |
| 187 | "psrlq $2, %%xmm4 \n" // f1:x0>>2 = f1:f0' |
| 188 | "psrlq $7, %%xmm5 \n" // g1:x0>>7 = g1:g0' |
| 189 | "pxor %%xmm4, %%xmm0 \n" // e1+f1:e0'+f0' |
| 190 | "pxor %%xmm5, %%xmm0 \n" // e1+f1+g1:e0'+f0'+g0' |
| 191 | // e0'+f0'+g0' is almost e0+f0+g0, except for some missing |
| 192 | // bits carried from d. Now get those bits back in. |
| 193 | "movdqa %%xmm1,%%xmm3 \n" // d:x0 |
| 194 | "movdqa %%xmm1,%%xmm4 \n" // same |
| 195 | "movdqa %%xmm1,%%xmm5 \n" // same |
| 196 | "psllq $63, %%xmm3 \n" // d<<63:stuff |
| 197 | "psllq $62, %%xmm4 \n" // d<<62:stuff |
| 198 | "psllq $57, %%xmm5 \n" // d<<57:stuff |
| 199 | "pxor %%xmm4, %%xmm3 \n" // d<<63+d<<62:stuff |
| 200 | "pxor %%xmm5, %%xmm3 \n" // missing bits of d:stuff |
| 201 | "psrldq $8, %%xmm3 \n" // 0:missing bits of d |
| 202 | "pxor %%xmm3, %%xmm0 \n" // e1+f1+g1:e0+f0+g0 |
| 203 | "pxor %%xmm1, %%xmm0 \n" // h1:h0 |
| 204 | "pxor %%xmm2, %%xmm0 \n" // x3+h1:x2+h0 |
| 205 | |
| 206 | "movdqu %%xmm0, (%2) \n" // done |
| 207 | : |
| 208 | : "r" (aa), "r" (bb), "r" (cc) |
| 209 | : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); |
| 210 | |
| 211 | /* Now byte-reverse the outputs */ |
| 212 | for( i = 0; i < 16; i++ ) |
| 213 | c[i] = cc[15 - i]; |
| 214 | |
| 215 | return( 0 ); |
| 216 | } |
| 217 | |
Manuel Pégourié-Gonnard | 92ac76f | 2013-12-16 17:12:53 +0100 | [diff] [blame] | 218 | #endif /* POLARSSL_HAVE_X86_64 */ |
| 219 | |
| 220 | #endif /* POLARSSL_AESNI_C */ |