Added integer divide by as separate function

Added 64bit integer divided by 32bit integer, with remainder
diff --git a/library/bignum.c b/library/bignum.c
index 14a3dc7..d05b0d3 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -19,13 +19,22 @@
  *  with this program; if not, write to the Free Software Foundation, Inc.,
  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
+
 /*
- *  This MPI implementation is based on:
+ *  The following sources were referenced in the design of this Multi-precision
+ *  Integer library:
  *
- *  http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf
- *  http://www.stillhq.com/extracted/gnupg-api/mpi/
- *  http://math.libtomcrypt.com/files/tommath.pdf
- */
+ *  [1] Handbook of Applied Cryptography - 1997
+ *      Menezes, van Oorschot and Vanstone
+ *
+ *  [2] Multi-Precision Math
+ *      Tom St Denis
+ *      https://github.com/libtom/libtommath/blob/develop/tommath.pdf
+ *
+ *  [3] GNU Multi-Precision Arithmetic Library
+ *      https://gmplib.org/manual/index.html
+ *
+*/
 
 #include "polarssl/config.h"
 
@@ -229,6 +238,24 @@
 }
 
 /*
+ * Count leading zero bits in a given integer
+ */
+static size_t int_clz( const t_uint x )
+{
+    size_t j;
+    t_uint mask = (t_uint) 1 << (biL - 1);
+
+    for( j = 0; j < biL; j++ )
+    {
+        if( x & mask ) break;
+
+        mask >>= 1;
+    }
+
+    return j;
+}
+
+/*
  * Return the number of most significant bits
  */
 size_t mpi_msb( const mpi *X )
@@ -239,9 +266,7 @@
         if( X->p[i] != 0 )
             break;
 
-    for( j = biL; j > 0; j-- )
-        if( ( ( X->p[i] >> ( j - 1 ) ) & 1 ) != 0 )
-            break;
+    j = biL - int_clz( X->p[i] );
 
     return( ( i * biL ) + j );
 }
@@ -1066,6 +1091,98 @@
 }
 
 /*
+ * Unsigned integer divide - 64bit dividend and 32bit divisor
+ */
+static t_uint int_div_int(t_uint u1, t_uint u0, t_uint d, t_uint *r)
+{
+#if defined(POLARSSL_HAVE_UDBL)
+    t_udbl dividend, quotient;
+#endif
+
+    /*
+     * Check for overflow
+     */
+    if(( 0 == d ) || ( u1 >= d ))
+    {
+        if (r != NULL) *r = (~0);
+
+        return (~0);
+    }
+
+#if defined(POLARSSL_HAVE_UDBL)
+    dividend  = (t_udbl) u1 << biL;
+    dividend |= (t_udbl) u0;
+    quotient = dividend / d;
+    if( quotient > ( (t_udbl) 1 << biL ) - 1 )
+        quotient = ( (t_udbl) 1 << biL ) - 1;
+
+    if( r != NULL )
+        *r = dividend - (quotient * d);
+
+    return (t_uint) quotient;
+#else
+    const t_uint radix = 1 << biH;
+    t_uint d0, d1, q0, q1, rAX, r0, quotient;
+    t_uint u0_msw, u0_lsw;
+    int s;
+
+    /*
+     * Algorithm D, Section 4.3.1 - The Art of Computer Programming
+     *   Vol. 2 - Seminumerical Algorithms, Knuth
+     */
+
+    /*
+     * Normalize the divisor, d, and dividend, u0, u1
+     */
+    s = int_clz( d );
+    d = d << s;
+
+    u1 = u1 << s;
+    u1 |= (u0 >> (32 - s)) & ( (-s) >> 31);
+    u0 =  u0 << s;
+
+    d1 = d >> biH;
+    d0 = d & 0xffff;
+
+    u0_msw = u0 >> biH;
+    u0_lsw = u0 & 0xffff;
+
+    /*
+     * Find the first quotient and remainder
+     */
+    q1 = u1 / d1;
+    r0 = u1 - d1 * q1;
+
+    while( q1 >= radix || ( q1 * d0 > radix * r0 + u0_msw ) )
+    {
+        q1 -= 1;
+        r0 += d1;
+
+        if ( r0 >= radix ) break;
+    }
+
+    rAX = (u1 * radix) + (u0_msw - q1 * d);
+    q0 = rAX / d1;
+    r0 = rAX - q0 * d1;
+
+    while( q0 >= radix || ( q0 * d0 > radix * r0 + u0_lsw ) )
+    {
+        q0 -= 1;
+        r0 += d1;
+
+        if ( r0 >= radix ) break;
+    }
+
+    if (r != NULL)
+        *r = (rAX * radix + u0_lsw - q0 * d) >> s;
+
+    quotient = q1 * radix + q0;
+
+    return quotient;
+#endif
+}
+
+/*
  * Division by mpi: A = Q * B + R  (HAC 14.20)
  */
 int mpi_div_mpi( mpi *Q, mpi *R, const mpi *A, const mpi *B )
@@ -1122,67 +1239,7 @@
             Z.p[i - t - 1] = ~0;
         else
         {
-            /*
-             * The version of Clang shipped by Apple with Mavericks around
-             * 2014-03 can't handle 128-bit division properly. Disable
-             * 128-bits division for this version. Let's be optimistic and
-             * assume it'll be fixed in the next minor version (next
-             * patchlevel is probably a bit too optimistic).
-             */
-#if defined(POLARSSL_HAVE_UDBL) &&                          \
-    ! ( defined(__x86_64__) && defined(__APPLE__) &&        \
-        defined(__clang_major__) && __clang_major__ == 5 && \
-        defined(__clang_minor__) && __clang_minor__ == 0 )
-            t_udbl r;
-
-            r  = (t_udbl) X.p[i] << biL;
-            r |= (t_udbl) X.p[i - 1];
-            r /= Y.p[t];
-            if( r > ((t_udbl) 1 << biL) - 1)
-                r = ((t_udbl) 1 << biL) - 1;
-
-            Z.p[i - t - 1] = (t_uint) r;
-#else
-            /*
-             * __udiv_qrnnd_c, from gmp/longlong.h
-             */
-            t_uint q0, q1, r0, r1;
-            t_uint d0, d1, d, m;
-
-            d  = Y.p[t];
-            d0 = ( d << biH ) >> biH;
-            d1 = ( d >> biH );
-
-            q1 = X.p[i] / d1;
-            r1 = X.p[i] - d1 * q1;
-            r1 <<= biH;
-            r1 |= ( X.p[i - 1] >> biH );
-
-            m = q1 * d0;
-            if( r1 < m )
-            {
-                q1--, r1 += d;
-                while( r1 >= d && r1 < m )
-                    q1--, r1 += d;
-            }
-            r1 -= m;
-
-            q0 = r1 / d1;
-            r0 = r1 - d1 * q0;
-            r0 <<= biH;
-            r0 |= ( X.p[i - 1] << biH ) >> biH;
-
-            m = q0 * d0;
-            if( r0 < m )
-            {
-                q0--, r0 += d;
-                while( r0 >= d && r0 < m )
-                    q0--, r0 += d;
-            }
-            r0 -= m;
-
-            Z.p[i - t - 1] = ( q1 << biH ) | q0;
-#endif
+            Z.p[i - t - 1] = int_div_int( X.p[i], X.p[i - 1], Y.p[t], NULL);
         }
 
         Z.p[i - t - 1]++;