Skip to content

querystring: improve parse() performance #10874

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 12 additions & 21 deletions benchmark/querystring/querystring-parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,26 @@ var common = require('../common.js');
var querystring = require('querystring');
var v8 = require('v8');

var types = [
'noencode',
'multicharsep',
'encodemany',
'encodelast',
'multivalue',
'multivaluemany',
'manypairs'
];
var inputs = {
noencode: 'foo=bar&baz=quux&xyzzy=thud',
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
encodefake: 'foo=%©ar&baz=%A©uux&xyzzy=%©ud',
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
};

var bench = common.createBenchmark(main, {
type: types,
type: Object.keys(inputs),
n: [1e6],
});

function main(conf) {
var type = conf.type;
var n = conf.n | 0;

var inputs = {
noencode: 'foo=bar&baz=quux&xyzzy=thud',
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
};
var input = inputs[type];

// Force-optimize querystring.parse() so that the benchmark doesn't get
Expand Down
104 changes: 62 additions & 42 deletions lib/querystring.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
// Query String Utilities

'use strict';

const QueryString = exports;
const QueryString = module.exports = {
unescapeBuffer,
// `unescape()` is a JS global, so we need to use a different local name
unescape: qsUnescape,

// `escape()` is a JS global, so we need to use a different local name
escape: qsEscape,

stringify,
encode: stringify,

parse,
decode: parse
};
const Buffer = require('buffer').Buffer;

// This constructor is used to store parsed query string values. Instantiating
Expand All @@ -13,7 +24,7 @@ ParsedQueryString.prototype = Object.create(null);


// a safe fast alternative to decodeURIComponent
QueryString.unescapeBuffer = function(s, decodeSpaces) {
function unescapeBuffer(s, decodeSpaces) {
var out = Buffer.allocUnsafe(s.length);
var state = 0;
var n, m, hexchar;
Expand Down Expand Up @@ -77,7 +88,7 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) {
// TODO support returning arbitrary buffers.

return out.slice(0, outIndex - 1);
};
}


function qsUnescape(s, decodeSpaces) {
Expand All @@ -87,13 +98,12 @@ function qsUnescape(s, decodeSpaces) {
return QueryString.unescapeBuffer(s, decodeSpaces).toString();
}
}
QueryString.unescape = qsUnescape;


var hexTable = new Array(256);
for (var i = 0; i < 256; ++i)
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
QueryString.escape = function(str) {
function qsEscape(str) {
// replaces encodeURIComponent
// http://www.ecma-international.org/ecma-262/5.1/#sec-15.1.3.4
if (typeof str !== 'string') {
Expand Down Expand Up @@ -164,20 +174,20 @@ QueryString.escape = function(str) {
if (lastPos < str.length)
return out + str.slice(lastPos);
return out;
};
}

var stringifyPrimitive = function(v) {
function stringifyPrimitive(v) {
if (typeof v === 'string')
return v;
if (typeof v === 'number' && isFinite(v))
return '' + v;
if (typeof v === 'boolean')
return v ? 'true' : 'false';
return '';
};
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there any reasonable perf impact moving to a switch?
e.g.

switch (typeof v) {
  case 'string':
    return v;
  case 'boolean':
    return String(v);
  case 'number':
    if (isFinite(v)) return String(v);
  default:
    return '';
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dunno, I didn't do anything with this particular function. However, I remember we switched away from that type of usage (no pun intended) because V8 optimizes typeof xx === yyy statements. Before we switched to separate if statements, the code was storing the result of typeof and that was causing a performance degredation. I suspect switch(typeof v) might have the same effect.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair enough :-)



QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
function stringify(obj, sep, eq, options) {
sep = sep || '&';
eq = eq || '=';

Expand Down Expand Up @@ -215,34 +225,43 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
return fields;
}
return '';
};
}

// Parse a key/val string.
QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
sep = sep || '&';
eq = eq || '=';
function charCodes(str) {
if (str.length === 0) return [];
if (str.length === 1) return [str.charCodeAt(0)];
const ret = [];
for (var i = 0; i < str.length; ++i)
ret[ret.length] = str.charCodeAt(i);
return ret;
}
const defSepCodes = [38]; // &
const defEqCodes = [61]; // =

// Parse a key/val string.
function parse(qs, sep, eq, options) {
const obj = new ParsedQueryString();

if (typeof qs !== 'string' || qs.length === 0) {
return obj;
}

if (typeof sep !== 'string')
sep += '';

const eqLen = eq.length;
const sepLen = sep.length;
var sepCodes = (!sep ? defSepCodes : charCodes(sep + ''));
var eqCodes = (!eq ? defEqCodes : charCodes(eq + ''));
const sepLen = sepCodes.length;
const eqLen = eqCodes.length;

var maxKeys = 1000;
var pairs = 1000;
if (options && typeof options.maxKeys === 'number') {
maxKeys = options.maxKeys;
// -1 is used in place of a value like Infinity for meaning
// "unlimited pairs" because of additional checks V8 (at least as of v5.4)
// has to do when using variables that contain values like Infinity. Since
// `pairs` is always decremented and checked explicitly for 0, -1 works
// effectively the same as Infinity, while providing a significant
// performance boost.
pairs = (options.maxKeys > 0 ? options.maxKeys : -1);
}

var pairs = Infinity;
if (maxKeys > 0)
pairs = maxKeys;

var decode = QueryString.unescape;
if (options && typeof options.decodeURIComponent === 'function') {
decode = options.decodeURIComponent;
Expand All @@ -262,7 +281,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
const code = qs.charCodeAt(i);

// Try matching key/value pair separator (e.g. '&')
if (code === sep.charCodeAt(sepIdx)) {
if (code === sepCodes[sepIdx]) {
if (++sepIdx === sepLen) {
// Key/value pair separator match!
const end = i - sepIdx + 1;
Expand All @@ -284,10 +303,10 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
keys[keys.length] = key;
} else {
const curValue = obj[key];
// `instanceof Array` is used instead of Array.isArray() because it
// is ~15-20% faster with v8 4.7 and is safe to use because we are
// using it with values being created within this function
if (curValue instanceof Array)
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe since
// we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
Expand Down Expand Up @@ -322,7 +341,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {

// Try matching key/value separator (e.g. '=') if we haven't already
if (eqIdx < eqLen) {
if (code === eq.charCodeAt(eqIdx)) {
if (code === eqCodes[eqIdx]) {
if (++eqIdx === eqLen) {
// Key/value separator match!
const end = i - eqIdx + 1;
Expand Down Expand Up @@ -354,12 +373,12 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {

if (code === 43/*+*/) {
if (eqIdx < eqLen) {
if (i - lastPos > 0)
if (lastPos < i)
key += qs.slice(lastPos, i);
key += '%20';
keyEncoded = true;
} else {
if (i - lastPos > 0)
if (lastPos < i)
value += qs.slice(lastPos, i);
value += '%20';
valEncoded = true;
Expand All @@ -369,7 +388,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
}

// Check if we have leftover key or value data
if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) {
if (pairs !== 0 && (lastPos < qs.length || eqIdx > 0)) {
if (lastPos < qs.length) {
if (eqIdx < eqLen)
key += qs.slice(lastPos);
Expand All @@ -387,22 +406,23 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
keys[keys.length] = key;
} else {
const curValue = obj[key];
// `instanceof Array` is used instead of Array.isArray() because it
// is ~15-20% faster with v8 4.7 and is safe to use because we are
// using it with values being created within this function
if (curValue instanceof Array)
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe since
// we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
}
}

return obj;
};
}


// v8 does not optimize functions with try-catch blocks, so we isolate them here
// to minimize the damage
// to minimize the damage (Note: no longer true as of V8 5.4 -- but still will
// not be inlined).
function decodeStr(s, decoder) {
try {
return decoder(s);
Expand Down