MemPool -- better tests, more robust pointer handling, update documentation
diff --git a/QCBOR.xcodeproj/project.pbxproj b/QCBOR.xcodeproj/project.pbxproj
index 08c4e91..f599141 100644
--- a/QCBOR.xcodeproj/project.pbxproj
+++ b/QCBOR.xcodeproj/project.pbxproj
@@ -324,8 +324,12 @@
 		E776E084214ADF7F00E67947 /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
+				CLANG_UNDEFINED_BEHAVIOR_SANITIZER_INTEGER = YES;
+				CLANG_UNDEFINED_BEHAVIOR_SANITIZER_NULLABILITY = YES;
 				CODE_SIGN_STYLE = Automatic;
 				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
+				GCC_WARN_PEDANTIC = YES;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 			};
 			name = Debug;
@@ -333,8 +337,12 @@
 		E776E085214ADF7F00E67947 /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
+				CLANG_UNDEFINED_BEHAVIOR_SANITIZER_INTEGER = YES;
+				CLANG_UNDEFINED_BEHAVIOR_SANITIZER_NULLABILITY = YES;
 				CODE_SIGN_STYLE = Automatic;
 				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
+				GCC_WARN_PEDANTIC = YES;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 			};
 			name = Release;
diff --git a/inc/qcbor.h b/inc/qcbor.h
index 101bc4f..54ff6dd 100644
--- a/inc/qcbor.h
+++ b/inc/qcbor.h
@@ -1626,19 +1626,21 @@
  
  Indefinite length strings (text and byte) cannot be decoded unless there is
  a string allocator configured. MemPool is a simple built-in string allocator
- that allocates bytes from a block of memory handed to it by calling
- this function.
+ that allocates bytes from a memory pool handed to it by calling
+ this function.  The memory pool is just a pointer and length for some
+ block of memory that is to be used for string allocation. It can
+ come from the stack, heap or other.
  
- The buffer must be large enough to hold some fixed overhead plus the
+ The memory pool must be large enough to hold some fixed overhead plus the
  space for all the strings allocated. The fixed overhead does vary
- per implementation, but can roughly be computed as the space for
- nine pointers, 72 bytes of a 64-bit CPU.  There is no overhead
+ by CPU and compiler, but can roughly be computed as the space for
+ seven pointers, 56 bytes for a 64-bit CPU.  There is no overhead
  per string allocated
  
  This memory pool is used for all indefinite length strings that are text
  strings or byte strings, including strings used as labels.
  
- The pointers to strings in QCBORItem will point into the buffer passed set
+ The pointers to strings in QCBORItem will point into the memory pool set
  here. They do not need to be individually freed. Just discard the buffer
  when they are no longer needed.
  
@@ -1646,6 +1648,10 @@
  hold **all** strings, definite and indefinite length, value or label. The
  advantage of this is that after the decode is complete, the original memory
  holding the encoded CBOR does not need to remain valid.
+ 
+ If this function is not called because there is no need to support indefinite
+ length strings, the MemPool implementation should be dead-stripped by the loader
+ and not add to code size.
  */
 QCBORError QCBORDecode_SetMemPool(QCBORDecodeContext *pCtx, UsefulBuf MemPool, bool bAllStrings);
 
@@ -1663,6 +1669,9 @@
  A malloc based string allocator can be obtained by calling
  QCBORDecode_MakeMallocStringAllocator(). Pass its result to
  this function.
+ 
+ You can also write your own allocator. Create the allocate, free,
+ and destroy functions and put pointers to them in a QCBORStringAllocator.
  */
 void QCBORDecode_SetUpAllocator(QCBORDecodeContext *pCtx, const QCBORStringAllocator *pAllocator, bool bAllStrings);
 
diff --git a/src/qcbor_decode.c b/src/qcbor_decode.c
index e9f50d4..5614fb0 100644
--- a/src/qcbor_decode.c
+++ b/src/qcbor_decode.c
@@ -1164,16 +1164,6 @@
 
 
 
-/*
- 
- Use the 64-bit map. 48 8-bit tags built in, 1 16 bit tag, 15 64-bit tags can be assigned as of interest
- 
- There is a tag map.
-
- 
- */
-
-
 /* 
  
 Decoder errors handled in this file
@@ -1200,33 +1190,46 @@
 
 
 
+
+/*
+ This is a very primitive memory allocator. It does not track individual
+ allocations, only a high-water mark. A free or reallotcation must be of
+ the last chunk allocated.
+ 
+ All of this following code will get dead-stripped if QCBORDecode_SetMemPool()
+ is not called.
+ */
+
 typedef struct {
    QCBORStringAllocator  StringAllocator;
-   uint8_t              *pStart;
-   uint8_t              *pEnd;
-   uint8_t              *pFree;
+   uint8_t              *pStart;  // First byte that can be allocated
+   uint8_t              *pEnd;    // One past the last byte that can be allocated
+   uint8_t              *pFree;   // Where the next free chunk is
 } MemPool;
 
 
 /*
+ Internal function for an allocation
+ 
  Code Reviewers: THIS FUNCTION DOES POINTER MATH
  */
 static UsefulBuf MemPool_Alloc(void *ctx, void *pMem, size_t uNewSize)
 {
-   MemPool *me = (MemPool *)ctx;
-   void *pReturn = NULL;
+   MemPool *me      = (MemPool *)ctx;
+   void    *pReturn = NULL;
    
    if(pMem) {
       // Realloc case
-      // TODO: review this pointer math
-      if((uint8_t *)pMem + uNewSize <= me->pEnd) {//} && (uint8_t *)pMem > me->pStart) {
+      // This check will work even if uNewSize is a super-large value like UINT64_MAX
+      if((uNewSize <= (size_t)(me->pEnd - (uint8_t *)pMem)) && ((uint8_t *)pMem >= me->pStart)) {
          me->pFree = (uint8_t *)pMem + uNewSize;
-         pReturn = pMem;
+         pReturn   = pMem;
       }
    } else {
       // New chunk case
-      if(me->pFree + uNewSize <= me->pEnd) {
-         pReturn = me->pFree;
+      // This check will work even if uNewSize is a super large value like UINT64_MAX
+      if(uNewSize <= (size_t)(me->pEnd - me->pFree)) {
+         pReturn    = me->pFree;
          me->pFree += uNewSize;
       }
    }
@@ -1234,34 +1237,61 @@
    return (UsefulBuf){pReturn, uNewSize};
 }
 
-
+/*
+ Internal function to free memory
+ */
 static void MemPool_Free(void *ctx, void *pOldMem)
 {
    MemPool *me = (MemPool *)ctx;
-   me->pFree = pOldMem;
+   me->pFree   = pOldMem;
 }
 
-
+/*
+ Public function, see header qcbor.h file
+ */
 QCBORError QCBORDecode_SetMemPool(QCBORDecodeContext *me, UsefulBuf Pool, bool bAllStrings)
 {
+   // The first bytes of the Pool passed in are used
+   // as the context (vtable of sorts) for the memory pool
+   // allocator.
    if(Pool.len < sizeof(MemPool)+1) {
       return QCBOR_ERR_BUFFER_TOO_SMALL;
    }
    
    MemPool *pMP = (MemPool *)Pool.ptr;
    
+   // Fill in the "vtable"
    pMP->StringAllocator.fAllocate   = MemPool_Alloc;
    pMP->StringAllocator.fFree       = MemPool_Free;
    pMP->StringAllocator.fDestructor = NULL;
    
+   // Set up the pointers to the memory to be allocated
    pMP->pStart = (uint8_t *)Pool.ptr + sizeof(MemPool);
    pMP->pFree  = pMP->pStart;
    pMP->pEnd   = (uint8_t *)Pool.ptr + Pool.len;
-   pMP->StringAllocator.pAllocaterContext = pMP;
    
-   me->pStringAllocator = pMP;
+   // More book keeping of context
+   pMP->StringAllocator.pAllocaterContext = pMP;
+   me->pStringAllocator   = pMP;
+   
+   // The flag indicating when to use the allocator
    me->bStringAllocateAll = bAllStrings;
    
    return QCBOR_SUCCESS;
 }
 
+
+/*
+ Extra little hook to make MemPool testing work right
+ without adding any code size or overhead to non-test
+ uses. This will get dead-stripped for non-test use.
+ 
+ This is not a public function. 
+ */
+size_t MemPoolTestHook_GetPoolSize(void *ctx)
+{
+   MemPool *me = (MemPool *)ctx;
+   
+   return me->pEnd - me->pStart;
+}
+
diff --git a/test/qcbor_decode_tests.c b/test/qcbor_decode_tests.c
index 12be915..f1b5dbf 100644
--- a/test/qcbor_decode_tests.c
+++ b/test/qcbor_decode_tests.c
@@ -2502,75 +2502,85 @@
    return 0;
 }
 
+// Cheating declaration to get to the special test hook
+size_t MemPoolTestHook_GetPoolSize(void *ctx);
+
 
 int MemPoolTest(void)
 {
-    QCBORDecodeContext DC;
-    
-    const uint8_t pMinimalCBOR[] = {0xa0}; // One empty map
-    
-    QCBORDecode_Init(&DC, UsefulBuf_FROM_BYTE_ARRAY_LITERAL(pMinimalCBOR),0);
-    
-    UsefulBuf_MAKE_STACK_UB(Pool, 100);
-    
-    QCBORDecode_SetMemPool(&DC, Pool, 0);
-    
-    // Cheat a little to get to the string allocator object
-    // so we can call it directly to test it
-    QCBORStringAllocator *pAlloc = (QCBORStringAllocator *)DC.pStringAllocator;
-    
-    // Ask for too much in one go
-    // 90 < 100, but there is some overhead taken out of the 100
-    UsefulBuf Allocated = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, NULL, 90);
-    if(!UsefulBuf_IsNULL(Allocated)) {
-        return -1;
-    }
-    
-    
-    
-    QCBORDecode_SetMemPool(&DC, Pool, 0);
-    
-    // Cheat a little to get to the string allocator object
-    // so we can call it directly to test it
-    pAlloc = (QCBORStringAllocator *)DC.pStringAllocator;
-    
-    Allocated = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, NULL, 30);
-    if(UsefulBuf_IsNULL(Allocated)) { // expected to succeed
-        return -1;
-    }
-    UsefulBuf Allocated2 = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, NULL, 30);
-    if(!UsefulBuf_IsNULL(Allocated2)) { // expected to fail
-        return -1;
-    }
-    (*pAlloc->fFree)(pAlloc->pAllocaterContext, Allocated.ptr);
-    Allocated = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, NULL, 30);
-    if(UsefulBuf_IsNULL(Allocated)) { // succeed because of the free
-        return -1;
-    }
-    
-    
-    QCBORDecode_SetMemPool(&DC, Pool, 0);
-    
-    // Cheat a little to get to the string allocator object
-    // so we can call it directly to test it
-    pAlloc = (QCBORStringAllocator *)DC.pStringAllocator;
-    Allocated = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, NULL, 20);
-    if(UsefulBuf_IsNULL(Allocated)) { // expected to succeed
-        return -1;
-    }
-    Allocated2 = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, Allocated.ptr, 25);
-    if(UsefulBuf_IsNULL(Allocated2)) { // expected to fail
-        return -1;
-    }
-    if(Allocated2.ptr != Allocated.ptr || Allocated2.len != 25) {
-        return -1;
-    }
-    
-    return 0;
-}
-
-
-
-
+   // Set up the decoder with a tiny bit of CBOR to parse
+   QCBORDecodeContext DC;
+   const uint8_t pMinimalCBOR[] = {0xa0}; // One empty map
+   QCBORDecode_Init(&DC, UsefulBuf_FROM_BYTE_ARRAY_LITERAL(pMinimalCBOR),0);
    
+   // Set up an memory pool of 100 bytes
+   UsefulBuf_MAKE_STACK_UB(Pool, 100);
+   QCBORDecode_SetMemPool(&DC, Pool, 0);
+    
+   // Cheat a little to get to the string allocator object
+   // so we can call it directly to test it
+   QCBORStringAllocator *pAlloc = (QCBORStringAllocator *)DC.pStringAllocator;
+   // Cheat some more to know exactly the 
+   size_t uAvailPool = MemPoolTestHook_GetPoolSize(pAlloc);
+
+   // First test -- ask for too much in one go
+   UsefulBuf Allocated = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, NULL, uAvailPool+1);
+   if(!UsefulBuf_IsNULL(Allocated)) {
+      return -1;
+   }
+   
+   
+   // Re do the set up for the next test that will do a successful alloc,
+   // a fail, a free and then success
+   // This test should work on 32 and 64-bit machines if the compiler
+   // does the expected thing with pointer sizes for the internal
+   // MemPool implementation leaving 44 or 72 bytes of pool memory.
+   QCBORDecode_SetMemPool(&DC, Pool, 0);
+    
+   // Cheat a little to get to the string allocator object
+   // so we can call it directly to test it
+   pAlloc = (QCBORStringAllocator *)DC.pStringAllocator;
+   // Cheat some more to know exactly the
+   uAvailPool = MemPoolTestHook_GetPoolSize(pAlloc);
+    
+   Allocated = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, NULL, uAvailPool-1);
+   if(UsefulBuf_IsNULL(Allocated)) { // expected to succeed
+      return -2;
+   }
+   UsefulBuf Allocated2 = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, NULL, uAvailPool/2);
+   if(!UsefulBuf_IsNULL(Allocated2)) { // expected to fail
+      return -3;
+   }
+   (*pAlloc->fFree)(pAlloc->pAllocaterContext, Allocated.ptr);
+   Allocated = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, NULL, uAvailPool/2);
+   if(UsefulBuf_IsNULL(Allocated)) { // succeed because of the free
+      return -4;
+   }
+    
+   
+   // Re do set up for next test that involves a successful alloc,
+   // and a successful realloc and a failed realloc
+   QCBORDecode_SetMemPool(&DC, Pool, 0);
+    
+   // Cheat a little to get to the string allocator object
+   // so we can call it directly to test it
+   pAlloc = (QCBORStringAllocator *)DC.pStringAllocator;
+   Allocated = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, NULL, uAvailPool/2);
+   if(UsefulBuf_IsNULL(Allocated)) { // expected to succeed
+      return -5;
+   }
+   Allocated2 = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, Allocated.ptr, uAvailPool);
+   if(UsefulBuf_IsNULL(Allocated2)) {
+      return -6;
+   }
+   if(Allocated2.ptr != Allocated.ptr || Allocated2.len != uAvailPool) {
+      return -7;
+   }
+   UsefulBuf Allocated3 = (*pAlloc->fAllocate)(pAlloc->pAllocaterContext, Allocated.ptr, uAvailPool+1);
+   if(!UsefulBuf_IsNULL(Allocated3)) { // expected to fail
+      return -8;
+   }
+    
+   return 0;
+}