Add Base64 encoding for printable hashes (SHA1 hash is 30 bytes, City…

… is 14). (JetBrains#30)
Deiufy · Nov 2, 2016 · 5966f69 · 5966f69
1 parent b3fb82d
commit 5966f69
Show file tree

Hide file tree

Showing 6 changed files with 212 additions and 14 deletions.
diff --git a/common/src/hash/cpp/Base64.cpp b/common/src/hash/cpp/Base64.cpp
@@ -0,0 +1,164 @@
+#include <string.h>
+#include <stdint.h>
+
+#include "Base64.h"
+
+namespace {
+
+// From https://en.wikibooks.org/wiki/Algorithm_Implementation/Miscellaneous/Base64
+// with minor tweaks.
+
+#define WHITESPACE 64
+#define EQUALS     65
+#define INVALID    66
+
+const char kAlphabet[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+static const unsigned char kDecode[] = {
+    66,66,66,66,66,66,66,66,66,66,64,66,66,66,66,66,66,66,66,66,66,66,66,66,66,
+    66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,62,66,66,66,63,52,53,
+    54,55,56,57,58,59,60,61,66,66,66,65,66,66,66, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+    10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,66,66,66,66,66,66,26,27,28,
+    29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,66,66,
+    66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,
+    66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,
+    66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,
+    66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,
+    66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,
+    66,66,66,66,66,66
+};
+
+}  // namespace
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int Base64Encode(
+    const void* dataBuf, uint32_t dataLength, char* result, uint32_t resultSize) {
+  const uint8_t *data = reinterpret_cast<const uint8_t*>(dataBuf);
+  size_t resultIndex = 0;
+  size_t x;
+  uint32_t n = 0;
+  int padCount = dataLength % 3;
+  uint8_t n0, n1, n2, n3;
+
+  /* increment over the length of the string, three characters at a time */
+  for (x = 0; x < dataLength; x += 3) {
+    /* these three 8-bit (ASCII) characters become one 24-bit number */
+    n = ((uint32_t)data[x]) << 16;
+
+    if ((x+1) < dataLength)
+      n += ((uint32_t)data[x+1]) << 8;
+
+    if ((x+2) < dataLength)
+      n += data[x+2];
+
+    /* this 24-bit number gets separated into four 6-bit numbers */
+    n0 = (uint8_t)(n >> 18) & 63;
+    n1 = (uint8_t)(n >> 12) & 63;
+    n2 = (uint8_t)(n >> 6) & 63;
+    n3 = (uint8_t)n & 63;
+
+    /*
+     * if we have one byte available, then its encoding is spread
+     * out over two characters
+     */
+    if (resultIndex >= resultSize)
+      return 1;   /* indicate failure: buffer too small */
+    result[resultIndex++] = kAlphabet[n0];
+    if (resultIndex >= resultSize)
+      return 1;   /* indicate failure: buffer too small */
+    result[resultIndex++] = kAlphabet[n1];
+
+    /*
+     * if we have only two bytes available, then their encoding is
+     * spread out over three chars
+     */
+    if ((x+1) < dataLength) {
+      if (resultIndex >= resultSize)
+        return 1;   /* indicate failure: buffer too small */
+      result[resultIndex++] = kAlphabet[n2];
+    }
+
+    /*
+     * if we have all three bytes available, then their encoding is spread
+     * out over four characters
+     */
+    if ((x+2) < dataLength) {
+      if (resultIndex >= resultSize)
+        return 1;   /* indicate failure: buffer too small */
+      result[resultIndex++] = kAlphabet[n3];
+    }
+  }
+
+   /*
+    * create and add padding that is required if we did not have a multiple of 3
+    * number of characters available
+    */
+   if (padCount > 0) {
+      for (; padCount < 3; padCount++) {
+         if (resultIndex >= resultSize)
+           return 1;   /* indicate failure: buffer too small */
+         result[resultIndex++] = '=';
+      }
+   }
+   if (resultIndex >= resultSize)
+     return 1;   /* indicate failure: buffer too small */
+
+   result[resultIndex] = 0;
+
+   return 0;   /* indicate success */
+}
+
+int Base64Decode(
+    const char *in, uint32_t inLen, void* outBuf, uint32_t* outLen) {
+  uint8_t* out = reinterpret_cast<uint8_t*>(outBuf);
+  const char* end = in + inLen;
+  char iter = 0;
+  size_t buf = 0, len = 0;
+
+  while (in < end) {
+    unsigned char c = kDecode[*in++];
+
+    switch (c) {
+      case WHITESPACE: continue;   /* skip whitespace */
+      case INVALID:    return 1;   /* invalid input, return error */
+      case EQUALS:                 /* pad character, end of data */
+        in = end;
+        continue;
+      default:
+        buf = buf << 6 | c;
+        iter++; // increment the number of iteration
+        /* If the buffer is full, split it into bytes */
+        if (iter == 4) {
+          if ((len += 3) > *outLen)
+            return 1; /* buffer overflow */
+          *(out++) = (buf >> 16) & 255;
+          *(out++) = (buf >> 8) & 255;
+          *(out++) = buf & 255;
+          buf = 0; iter = 0;
+        }
+    }
+  }
+
+  if (iter == 3) {
+    if ((len += 2) > *outLen)
+      return 1; /* buffer overflow */
+    *(out++) = (buf >> 10) & 255;
+    *(out++) = (buf >> 2) & 255;
+  }
+  else if (iter == 2) {
+    if (++len > *outLen)
+      return 1; /* buffer overflow */
+    *(out++) = (buf >> 4) & 255;
+  }
+
+  *outLen = len; /* modify to reflect the actual output size */
+  return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/common/src/hash/cpp/Names.cpp b/common/src/hash/cpp/Names.cpp
@@ -2,12 +2,17 @@
 
 #include "Names.h"
 
+#include "Base64.h"
 #include "City.h"
 #include "Sha1.h"
 
 namespace {
 
-void Printable(const uint8_t* data, uint32_t data_length, char* hex) {
+constexpr uint32_t PrintableHexSize(uint32_t input_length) {
+  return input_length * 2;
+}
+
+void PrintableHex(const uint8_t* data, uint32_t data_length, char* hex) {
   static const char* hex_digits = "0123456789ABCDEF";
   int i = 0;
   for(int i = 0; i < data_length; ++i) {
@@ -16,6 +21,15 @@ void Printable(const uint8_t* data, uint32_t data_length, char* hex) {
   }
 }
 
+constexpr uint32_t PrintableBase64Size(uint32_t input_length) {
+  return ((input_length + 2) / 3 * 4) + 1;
+}
+
+void PrintableBase64(const uint8_t* data, uint32_t data_length, char* base64) {
+  int rv = Base64Encode(data, data_length, base64, PrintableBase64Size(data_length));
+  assert(rv == 0);
+}
+
 } // namespace
 
 extern "C" {
@@ -35,20 +49,20 @@ void MakeGlobalHash(const void* data, uint32_t size, GlobalHash* hash) {
 
 // Make printable C string out of local hash.
 void PrintableLocalHash(const LocalHash* hash, char* buffer, uint32_t size) {
-  if (size < sizeof(*hash) * 2) {
+  if (size < PrintableBase64Size(sizeof(*hash))) {
     assert(false);
     return;
   }
-  Printable(reinterpret_cast<const uint8_t*>(&hash), sizeof(*hash), buffer);
+  PrintableBase64(reinterpret_cast<const uint8_t*>(&hash), sizeof(*hash), buffer);
 }
 
 // Make printable C string out of global hash.
 void PrintableGlobalHash(const GlobalHash* hash, char* buffer, uint32_t size) {
-  if (size < sizeof(*hash) * 2) {
+  if (size < PrintableBase64Size(sizeof(*hash))) {
     assert(false);
     return;
   }
-  Printable(hash->bits, sizeof(*hash), buffer);
+  PrintableBase64(hash->bits, sizeof(*hash), buffer);
 }
 
 } // extern "C"
diff --git a/common/src/hash/headers/Base64.h b/common/src/hash/headers/Base64.h
@@ -0,0 +1,20 @@
+#ifndef COMMON_BASE64_H
+#define COMMON_BASE64_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int Base64Encode(
+    const void* input, uint32_t inputLen, char* output, uint32_t outputLen);
+
+int Base64Decode(
+    const char* input, uint32_t inputLen, void* output, uint32_t* outputLen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // COMMON_BASE64_H
diff --git a/common/src/hash/headers/City.h b/common/src/hash/headers/City.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_CITY_H
-#define RUNTIME_CITY_H
+#ifndef COMMON_CITY_H
+#define COMMON_CITY_H
 
 // CityHash, by Geoff Pike and Jyrki Alakuijala.
 
@@ -17,4 +17,4 @@ uint64_t CityHash64(const void* buf, size_t len);
 }
 #endif
 
-#endif // RUNTIME_CITY_H
+#endif // COMMON_CITY_H
diff --git a/common/src/hash/headers/Names.h b/common/src/hash/headers/Names.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_NAMES_H
-#define RUNTIME_NAMES_H
+#ifndef COMMON_NAMES_H
+#define COMMON_NAMES_H
 
 #include <stdint.h>
 
@@ -40,4 +40,4 @@ void PrintableGlobalHash(const GlobalHash* hash, char* buffer, uint32_t size);
 } // extern "C"
 #endif
 
-#endif // RUNTIME_NAMES_H
+#endif // COMMON_NAMES_H
diff --git a/common/src/hash/headers/Sha1.h b/common/src/hash/headers/Sha1.h
@@ -1,5 +1,5 @@
-#ifndef RUNTIME_SHA1_H
-#define RUNTIME_SHA1_H
+#ifndef COMMON_SHA1_H
+#define COMMON_SHA1_H
 
 #include <stdint.h>
 
@@ -27,4 +27,4 @@ void SHA1Final(unsigned char digest[20], SHA1_CTX* context);
 }
 #endif
 
-#endif // RUNTIME_UTIL_SHA1_H
+#endif // COMMON_SHA1_H