Skip to content

a ~9x faster .toString('hex') #219

Closed
@jimmywarting

Description

@jimmywarting

I was reading upon MDN and saw there example of how they take a buffer and make it to a hex string.

There solution was: They used DataView got 4 bytes as Uint32 to reduce the iterations by 4.
I thought, "how much faster is it as compared to a simple for loop like your hexSlice or a buf.reduce(fn, '')?

I tested a few implementations with a 60k random buffer and 100 iteration and the test where roughly the same.

dataview4step: 970.505ms
manual4step: 933.899ms
buffer toString: 1097.956ms
reduce: 1218.181ms
forLoop: 1107.912ms

i figure, can i beat that DataView somehow? then it hit me! a lookup table!

var table = []
var i=256; while(i--) table[i]=('0' + i.toString(16)).slice(-2)

/**
 * Decodes Uint8Array to a hex string.
 * Start and end may be passed to decode only a subset of the array.
 *
 * @param  {Buffer|Uint8Array|Array} arr  Can be any Uint8Array like object
 * @param  {Number} [start=0]             The byte offset to start decoding at.
 * @param  {Number} [end=arr.length]      The byte offset to stop decoding at.
 * @param  {String} [delimiter='']        The delimiter to use
 * @return {String}                       hex string
 */
function bytesToHex(arr, start, end, delimiter) {
  var len = arr.length

  if (!start || start < 0) start = 0
  if (!end || end < 0 || end > len) end = len

  delimiter = delimiter || ''

  for (var str = table[arr[start++]]; start < end; start++) {
    str += delimiter + table[arr[start]]
  }

  return str || ''
}
bytesToHex: 120.786ms
dataview4step: 990.888ms
manual4step: 947.693ms
buffer toString: 1103.244ms
reduce: 1203.335ms
forLoop: 1108.984ms
====================
buffer inspect: 13.124ms
bytesToHex inspect: 3.825ms

the table lookup where ~8.25 times faster then using DataView and ~9.2 times faster then buffer.toString('hex')

Here is the complete test if you are interested or would like to test it yourself
var uint = new Uint8Array(60000)
require('crypto').randomFillSync(uint)

var buffer = require('./node_modules/buffer').Buffer.from(uint.buffer)

var table = []
var i=256; while(i--) table[i]=('0' + i.toString(16)).slice(-2)

/**
 * Decodes Uint8Array to a hex string.
 * Start and end may be passed to decode only a subset of the array.
 *
 * @param  {Buffer|Uint8Array|Array} arr  Can be any Uint8Array like object
 * @param  {Number} [start=0]             The byte offset to start decoding at.
 * @param  {Number} [end=arr.length]      The byte offset to stop decoding at.
 * @param  {String} [delimiter='']        The delimiter to use
 * @return {String}                       hex string
 */
function bytesToHex(arr, start, end, delimiter) {
  var len = arr.length

  if (!start || start < 0) start = 0
  if (!end || end < 0 || end > len) end = len

  delimiter = delimiter || ''

  for (var str = table[arr[start++]]; start < end; start++) {
    str += delimiter + table[arr[start]]
  }

  return str || ''
}


function dataview4step(buffer) {
  var hexCodes = []
  var view = new DataView(buffer.buffer)
  for (var i = 0; i < view.byteLength; i += 4) {
    // Using getUint32 reduces the number of iterations needed (we process 4 bytes each time)
    var value = view.getUint32(i)

    // toString(16) will give the hex representation of the number without padding
    var stringValue = value.toString(16)
    // We use concatenation and slice for padding
    var padding = '00000000'
    var paddedValue = (padding + stringValue).slice(-padding.length)
    hexCodes.push(paddedValue)
  }

  // Join all the hex strings into one
  return hexCodes.join('')
}

function manual4step(buffer) {
  var hexCodes = [];
  for (var i = 0; i < buffer.byteLength; i += 4) {
    // Using getUint32 reduces the number of iterations needed (we process 4 bytes each time)
    var value = (buffer[i] * 0x1000000) +
                ((buffer[i + 1] << 16) |
                (buffer[i + 2] << 8) |
                buffer[i + 3])
    // toString(16) will give the hex representation of the number without padding
    var stringValue = value.toString(16)
    // We use concatenation and slice for padding
    var padding = '00000000'
    var paddedValue = (padding + stringValue).slice(-8)
    hexCodes.push(paddedValue);
  }

  // Join all the hex strings into one
  return hexCodes.join("");
}

function reducer (memo, n) {
  return memo + (n < 16 ? '0' : '') + n.toString(16)
}

function forLoop (buf, start, end) {
  var len = buf.length

  if (!start || start < 0) start = 0
  if (!end || end < 0 || end > len) end = len

  var out = ''
  for (var i = start; i < end; ++i) {
    var n = buf[i]
    out += (n < 16 ? '0' : '') + n.toString(16)
  }
  return out
}

i = 100
console.time('bytesToHex')
while(i--) bytesToHex(uint)
console.timeEnd('bytesToHex')

i = 100
console.time('dataview4step')
while(i--) dataview4step(uint)
console.timeEnd('dataview4step')

i = 100
console.time('manual4step')
while(i--) manual4step(uint)
console.timeEnd('manual4step')

i = 100
console.time('buffer toString')
while(i--) buffer.toString('hex')
console.timeEnd('buffer toString')

i = 100
console.time('reduce')
while(i--) uint.reduce(reducer, '')
console.timeEnd('reduce')

i = 100
console.time('forLoop')
while(i--) forLoop(uint)
console.timeEnd('forLoop')

console.log('====================')

function inspect (uint) {
  var max = 50
  var str = bytesToHex(uint, 0, max, ' ')
  if (uint.length > max) str += ' ... '
  return '<Buffer ' + str + '>'
}

i = 1000
console.time('buffer inspect')
while(i--) buffer.inspect()
console.timeEnd('buffer inspect')

i = 1000
console.time('bytesToHex inspect')
while(i--) inspect(uint)
console.timeEnd('bytesToHex inspect')

I'm thinking whether or not i should publish it on NPM. couldn't find something like it.
If you like you can embed this bytesToHex function into your lib if you feel like it.
If you want me to publish it on npm and you would just like to require it i can do that too

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions