Adapt AES-NI code to "old" binutil versions
diff --git a/library/aesni.c b/library/aesni.c
index aa054cb..e396d43 100644
--- a/library/aesni.c
+++ b/library/aesni.c
@@ -59,6 +59,32 @@
 }
 
 /*
+ * Binutils needs to be at least 2.19 to support AES-NI instructions.
+ * Unfortunately, a lot of users have a lower version now (2014-04).
+ * Emit bytecode directly in order to support "old" version of gas.
+ *
+ * Opcodes from the Intel architecture reference manual, vol. 3.
+ * We always use registers, so we don't need prefixes for memory operands.
+ * Operand macros are in gas order (src, dst) as opposed to Intel order
+ * (dst, src) in order to blend better into the surrounding assembly code.
+ */
+#define AESDEC      ".byte 0x66,0x0F,0x38,0xDE,"
+#define AESDECLAST  ".byte 0x66,0x0F,0x38,0xDF,"
+#define AESENC      ".byte 0x66,0x0F,0x38,0xDC,"
+#define AESENCLAST  ".byte 0x66,0x0F,0x38,0xDD,"
+#define AESIMC      ".byte 0x66,0x0F,0x38,0xDB,"
+#define AESKEYGENA  ".byte 0x66,0x0F,0x3A,0xDF,"
+#define PCLMULQDQ   ".byte 0x66,0x0F,0x3A,0x44,"
+
+#define xmm0_xmm0   "0xC0"
+#define xmm0_xmm1   "0xC8"
+#define xmm0_xmm2   "0xD0"
+#define xmm0_xmm3   "0xD8"
+#define xmm0_xmm4   "0xE0"
+#define xmm1_xmm0   "0xC1"
+#define xmm1_xmm2   "0xD1"
+
+/*
  * AES-NI AES-ECB block en(de)cryption
  */
 int aesni_crypt_ecb( aes_context *ctx,
@@ -76,22 +102,22 @@
 
          "1:                        \n" // encryption loop
          "movdqu    (%1), %%xmm1    \n" // load round key
-         "aesenc    %%xmm1, %%xmm0  \n" // do round
+         AESENC     xmm1_xmm0      "\n" // do round
          "addq      $16, %1         \n" // point to next round key
          "subl      $1, %0          \n" // loop
          "jnz       1b              \n"
          "movdqu    (%1), %%xmm1    \n" // load round key
-         "aesenclast %%xmm1, %%xmm0 \n" // last round
+         AESENCLAST xmm1_xmm0      "\n" // last round
          "jmp       3f              \n"
 
          "2:                        \n" // decryption loop
          "movdqu    (%1), %%xmm1    \n"
-         "aesdec    %%xmm1, %%xmm0  \n"
+         AESDEC     xmm1_xmm0      "\n" // do round
          "addq      $16, %1         \n"
          "subl      $1, %0          \n"
          "jnz       2b              \n"
          "movdqu    (%1), %%xmm1    \n" // load round key
-         "aesdeclast %%xmm1, %%xmm0 \n" // last round
+         AESDECLAST xmm1_xmm0      "\n" // last round
 
          "3:                        \n"
          "movdqu    %%xmm0, (%4)    \n" // export output
@@ -131,10 +157,10 @@
          "movdqa %%xmm1, %%xmm2             \n" // copy of b1:b0
          "movdqa %%xmm1, %%xmm3             \n" // same
          "movdqa %%xmm1, %%xmm4             \n" // same
-         "pclmulqdq $0x00, %%xmm0, %%xmm1   \n" // a0*b0 = c1:c0
-         "pclmulqdq $0x11, %%xmm0, %%xmm2   \n" // a1*b1 = d1:d0
-         "pclmulqdq $0x10, %%xmm0, %%xmm3   \n" // a0*b1 = e1:e0
-         "pclmulqdq $0x01, %%xmm0, %%xmm4   \n" // a1*b0 = f1:f0
+         PCLMULQDQ xmm0_xmm1 ",0x00         \n" // a0*b0 = c1:c0
+         PCLMULQDQ xmm0_xmm2 ",0x11         \n" // a1*b1 = d1:d0
+         PCLMULQDQ xmm0_xmm3 ",0x10         \n" // a0*b1 = e1:e0
+         PCLMULQDQ xmm0_xmm4 ",0x01         \n" // a1*b0 = f1:f0
          "pxor %%xmm3, %%xmm4               \n" // e1+f1:e0+f0
          "movdqa %%xmm4, %%xmm3             \n" // same
          "psrldq $8, %%xmm4                 \n" // 0:e1+f1
@@ -228,7 +254,7 @@
 
     for( fk -= 16, ik += 16; fk > fwdkey; fk -= 16, ik += 16 )
         asm( "movdqu (%0), %%xmm0       \n"
-             "aesimc %%xmm0, %%xmm0     \n"
+             AESIMC  xmm0_xmm0         "\n"
              "movdqu %%xmm0, (%1)       \n"
              :
              : "r" (fk), "r" (ik)
@@ -271,17 +297,17 @@
          "ret                               \n"
 
          /* Main "loop" */
-         "2:                                    \n"
-         "aeskeygenassist $0x01, %%xmm0, %%xmm1 \ncall 1b   \n"
-         "aeskeygenassist $0x02, %%xmm0, %%xmm1 \ncall 1b   \n"
-         "aeskeygenassist $0x04, %%xmm0, %%xmm1 \ncall 1b   \n"
-         "aeskeygenassist $0x08, %%xmm0, %%xmm1 \ncall 1b   \n"
-         "aeskeygenassist $0x10, %%xmm0, %%xmm1 \ncall 1b   \n"
-         "aeskeygenassist $0x20, %%xmm0, %%xmm1 \ncall 1b   \n"
-         "aeskeygenassist $0x40, %%xmm0, %%xmm1 \ncall 1b   \n"
-         "aeskeygenassist $0x80, %%xmm0, %%xmm1 \ncall 1b   \n"
-         "aeskeygenassist $0x1B, %%xmm0, %%xmm1 \ncall 1b   \n"
-         "aeskeygenassist $0x36, %%xmm0, %%xmm1 \ncall 1b   \n"
+         "2:                                \n"
+         AESKEYGENA xmm0_xmm1 ",0x01        \ncall 1b   \n"
+         AESKEYGENA xmm0_xmm1 ",0x02        \ncall 1b   \n"
+         AESKEYGENA xmm0_xmm1 ",0x04        \ncall 1b   \n"
+         AESKEYGENA xmm0_xmm1 ",0x08        \ncall 1b   \n"
+         AESKEYGENA xmm0_xmm1 ",0x10        \ncall 1b   \n"
+         AESKEYGENA xmm0_xmm1 ",0x20        \ncall 1b   \n"
+         AESKEYGENA xmm0_xmm1 ",0x40        \ncall 1b   \n"
+         AESKEYGENA xmm0_xmm1 ",0x80        \ncall 1b   \n"
+         AESKEYGENA xmm0_xmm1 ",0x1B        \ncall 1b   \n"
+         AESKEYGENA xmm0_xmm1 ",0x36        \ncall 1b   \n"
          :
          : "r" (rk), "r" (key)
          : "memory", "cc", "0" );
@@ -329,15 +355,15 @@
          "add $8, %0                    \n"
          "ret                           \n"
 
-         "2:                                    \n"
-         "aeskeygenassist $0x01, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x02, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x04, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x08, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x10, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x20, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x40, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x80, %%xmm1, %%xmm2 \ncall 1b   \n"
+         "2:                            \n"
+         AESKEYGENA xmm1_xmm2 ",0x01    \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x02    \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x04    \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x08    \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x10    \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x20    \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x40    \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x80    \ncall 1b   \n"
 
          :
          : "r" (rk), "r" (key)
@@ -380,7 +406,7 @@
 
          /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 )
           * and proceed to generate next round key from there */
-         "aeskeygenassist $0, %%xmm0, %%xmm2\n"
+         AESKEYGENA xmm0_xmm2 ",0x00        \n"
          "pshufd $0xaa, %%xmm2, %%xmm2      \n"
          "pxor %%xmm1, %%xmm2               \n"
          "pslldq $4, %%xmm1                 \n"
@@ -397,14 +423,14 @@
           * Main "loop" - Generating one more key than necessary,
           * see definition of aes_context.buf
           */
-         "2:                                    \n"
-         "aeskeygenassist $0x01, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x02, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x04, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x08, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x10, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x20, %%xmm1, %%xmm2 \ncall 1b   \n"
-         "aeskeygenassist $0x40, %%xmm1, %%xmm2 \ncall 1b   \n"
+         "2:                                \n"
+         AESKEYGENA xmm1_xmm2 ",0x01        \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x02        \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x04        \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x08        \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x10        \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x20        \ncall 1b   \n"
+         AESKEYGENA xmm1_xmm2 ",0x40        \ncall 1b   \n"
          :
          : "r" (rk), "r" (key)
          : "memory", "cc", "0" );