Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

querystring: fix empty pairs handling #11234

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 23 additions & 17 deletions benchmark/querystring/querystring-parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,42 +12,48 @@ var inputs = {
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z',
manyblankpairs: '&&&&&&&&&&&&&&&&&&&&&&&&',
altspaces: 'foo+bar=baz+quux&xyzzy+thud=quuy+quuz&abc=def+ghi'
};

var bench = common.createBenchmark(main, {
type: Object.keys(inputs),
n: [1e6],
});

// A deopt followed by a reopt of main() can happen right when the timed loop
// starts, which seems to have a noticeable effect on the benchmark results.
// So we explicitly disable optimization of main() to avoid this potential
// issue.
v8.setFlagsFromString('--allow_natives_syntax');
eval('%NeverOptimizeFunction(main)');

function main(conf) {
var type = conf.type;
var n = conf.n | 0;
var input = inputs[type];
var i;

// Force-optimize querystring.parse() so that the benchmark doesn't get
// disrupted by the optimizer kicking in halfway through.
v8.setFlagsFromString('--allow_natives_syntax');
if (type !== 'multicharsep') {
querystring.parse(input);
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
querystring.parse(input);
} else {
querystring.parse(input, '&&&&&&&&&&');
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
querystring.parse(input, '&&&&&&&&&&');
}
// Note: we do *not* use OptimizeFunctionOnNextCall() here because currently
// it causes a deopt followed by a reopt, which could make its way into the
// timed loop. Instead, just execute the function a "sufficient" number of
// times before the timed loop to ensure the function is optimized just once.
if (type === 'multicharsep') {
for (i = 0; i < n; i += 1)
Copy link
Member

@joyeecheung joyeecheung Feb 8, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to be n? Refs: v8/src/runtime-profiler.cc, this might change but 1e3 should probabaly be safe.

// Number of times a function has to be seen on the stack before it is
// compiled for baseline.
static const int kProfilerTicksBeforeBaseline = 1;
// Number of times a function has to be seen on the stack before it is
// optimized.
static const int kProfilerTicksBeforeOptimization = 2;
// If the function optimization was disabled due to high deoptimization count,
// but the function is hot and has been seen on the stack this number of times,
// then we try to reenable optimization for this function.
static const int kProfilerTicksBeforeReenablingOptimization = 250;
// If a function does not have enough type info (according to
// FLAG_type_info_threshold), but has seen a huge number of ticks,
// optimize it as it is.
static const int kTicksWhenNotEnoughTypeInfo = 100;
// We only have one byte to store the number of ticks.
STATIC_ASSERT(kProfilerTicksBeforeOptimization < 256);
STATIC_ASSERT(kProfilerTicksBeforeReenablingOptimization < 256);
STATIC_ASSERT(kTicksWhenNotEnoughTypeInfo < 256);

Copy link
Contributor Author

@mscdex mscdex Feb 8, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Originally I used n / 2, but I seemed to get better (less variable) results with n, especially considering the default value of n. Either way, n should be enough no matter what, so I kept it.

querystring.parse(input, '&&&&&&&&&&');

var i;
if (type !== 'multicharsep') {
bench.start();
for (i = 0; i < n; i += 1)
querystring.parse(input);
querystring.parse(input, '&&&&&&&&&&');
bench.end(n);
} else {
for (i = 0; i < n; i += 1)
querystring.parse(input);

bench.start();
for (i = 0; i < n; i += 1)
querystring.parse(input, '&&&&&&&&&&');
querystring.parse(input);
bench.end(n);
}
}
246 changes: 134 additions & 112 deletions lib/querystring.js
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,25 @@ function stringify(obj, sep, eq, options) {
return '';
}

const isHexTable = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 - 15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 - 31
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32 - 47
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 48 - 63
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64 - 79
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80 - 95
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 96 - 111
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 112 - 127
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128 ...
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // ... 256
];

function charCodes(str) {
if (str.length === 0) return [];
if (str.length === 1) return [str.charCodeAt(0)];
Expand Down Expand Up @@ -292,14 +311,14 @@ function parse(qs, sep, eq, options) {
const customDecode = (decode !== qsUnescape);

const keys = [];
var posIdx = 0;
var lastPos = 0;
var sepIdx = 0;
var eqIdx = 0;
var key = '';
var value = '';
var keyEncoded = customDecode;
var valEncoded = customDecode;
const plusChar = (customDecode ? '%20' : ' ');
var encodeCheck = 0;
for (var i = 0; i < qs.length; ++i) {
const code = qs.charCodeAt(i);
Expand All @@ -310,142 +329,145 @@ function parse(qs, sep, eq, options) {
// Key/value pair separator match!
const end = i - sepIdx + 1;
if (eqIdx < eqLen) {
// If we didn't find the key/value separator, treat the substring as
// part of the key instead of the value
if (lastPos < end)
// We didn't find the (entire) key/value separator
if (lastPos < end) {
// Treat the substring as part of the key instead of the value
key += qs.slice(lastPos, end);
} else if (lastPos < end)
value += qs.slice(lastPos, end);
if (keyEncoded)
key = decodeStr(key, decode);
if (valEncoded)
value = decodeStr(value, decode);

if (key || value || lastPos - posIdx > sepLen || i === 0) {
// Use a key array lookup instead of using hasOwnProperty(), which is
// slower
if (keys.indexOf(key) === -1) {
obj[key] = value;
keys[keys.length] = key;
if (keyEncoded)
key = decodeStr(key, decode);
} else {
const curValue = obj[key] || '';
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe
// since we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else if (curValue)
obj[key] = [curValue, value];
// We saw an empty substring between separators
if (--pairs === 0)
return obj;
lastPos = i + 1;
sepIdx = eqIdx = 0;
continue;
}
} else {
if (lastPos < end) {
value += qs.slice(lastPos, end);
if (valEncoded)
value = decodeStr(value, decode);
}
} else if (i === 1) {
// A pair with repeated sep could be added into obj in the first loop
// and it should be deleted
delete obj[key];
if (keyEncoded)
key = decodeStr(key, decode);
}

// Use a key array lookup instead of using hasOwnProperty(), which is
// slower
if (keys.indexOf(key) === -1) {
obj[key] = value;
keys[keys.length] = key;
} else {
const curValue = obj[key];
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe
// since we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
}
if (--pairs === 0)
break;
return obj;
keyEncoded = valEncoded = customDecode;
encodeCheck = 0;
key = value = '';
posIdx = lastPos;
encodeCheck = 0;
lastPos = i + 1;
sepIdx = eqIdx = 0;
}
continue;
} else {
sepIdx = 0;
if (!valEncoded) {
// Try to match an (valid) encoded byte (once) to minimize unnecessary
// calls to string decoding functions
if (code === 37/*%*/) {
encodeCheck = 1;
} else if (encodeCheck > 0 &&
((code >= 48/*0*/ && code <= 57/*9*/) ||
(code >= 65/*A*/ && code <= 70/*F*/) ||
(code >= 97/*a*/ && code <= 102/*f*/))) {
if (++encodeCheck === 3)
valEncoded = true;
// Try matching key/value separator (e.g. '=') if we haven't already
if (eqIdx < eqLen) {
if (code === eqCodes[eqIdx]) {
if (++eqIdx === eqLen) {
// Key/value separator match!
const end = i - eqIdx + 1;
if (lastPos < end)
key += qs.slice(lastPos, end);
encodeCheck = 0;
lastPos = i + 1;
}
continue;
} else {
encodeCheck = 0;
eqIdx = 0;
if (!keyEncoded) {
// Try to match an (valid) encoded byte once to minimize unnecessary
// calls to string decoding functions
if (code === 37/*%*/) {
encodeCheck = 1;
continue;
} else if (encodeCheck > 0) {
// eslint-disable-next-line no-extra-boolean-cast
Copy link
Contributor Author

@mscdex mscdex Feb 8, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If anyone is curious, this was added because there seems to be a positive difference when explicitly converting to a boolean in the conditional vs. implicitly converting the numeric value (or undefined) to a boolean. eslint doesn't know that isHexTable contains non-booleans, so I explicitly silence these "errors."

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it better to use isHexTable[code] > 0 as a more readable option? The performance seems to be quite similar

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's debatable. I think the explicit conversion is easier to reason about than remembering how undefined values compare with numeric values.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, in this case I'd propose isHexTable[code] === 1 as an alternative

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If anyone is curious, this was added because there seems to be a positive difference when explicitly converting to a boolean in the conditional vs. implicitly converting the numeric value (or undefined) to a boolean.

Could you clarify what you mean by the "positive difference"?

Copy link
Contributor Author

@mscdex mscdex Feb 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you clarify what you mean by the "positive difference"?

An increase in performance?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's interesting. AFAIK if (foo) has exactly the same behavior as if (!!foo), so I'm not sure why the performance would be different.

If there really is a difference, then I propose adding a comment explaining that.

if (!!isHexTable[code]) {
if (++encodeCheck === 3)
keyEncoded = true;
continue;
} else {
encodeCheck = 0;
}
}
}
}
}
}

// Try matching key/value separator (e.g. '=') if we haven't already
if (eqIdx < eqLen) {
if (code === eqCodes[eqIdx]) {
if (++eqIdx === eqLen) {
// Key/value separator match!
const end = i - eqIdx + 1;
if (lastPos < end)
key += qs.slice(lastPos, end);
encodeCheck = 0;
if (code === 43/*+*/) {
if (lastPos < i)
key += qs.slice(lastPos, i);
key += plusChar;
lastPos = i + 1;
continue;
}
continue;
} else {
eqIdx = 0;
if (!keyEncoded) {
// Try to match an (valid) encoded byte once to minimize unnecessary
// calls to string decoding functions
if (code === 37/*%*/) {
encodeCheck = 1;
} else if (encodeCheck > 0 &&
((code >= 48/*0*/ && code <= 57/*9*/) ||
(code >= 65/*A*/ && code <= 70/*F*/) ||
(code >= 97/*a*/ && code <= 102/*f*/))) {
}
if (code === 43/*+*/) {
if (lastPos < i)
value += qs.slice(lastPos, i);
value += plusChar;
lastPos = i + 1;
} else if (!valEncoded) {
// Try to match an (valid) encoded byte (once) to minimize unnecessary
// calls to string decoding functions
if (code === 37/*%*/) {
encodeCheck = 1;
} else if (encodeCheck > 0) {
// eslint-disable-next-line no-extra-boolean-cast
if (!!isHexTable[code]) {
if (++encodeCheck === 3)
keyEncoded = true;
valEncoded = true;
} else {
encodeCheck = 0;
}
}
}
}

if (code === 43/*+*/) {
if (eqIdx < eqLen) {
if (lastPos < i)
key += qs.slice(lastPos, i);
key += '%20';
keyEncoded = true;
} else {
if (lastPos < i)
value += qs.slice(lastPos, i);
value += '%20';
valEncoded = true;
}
lastPos = i + 1;
}
}

// Check if we have leftover key or value data
if (pairs !== 0 && (lastPos < qs.length || eqIdx > 0)) {
if (lastPos < qs.length) {
if (eqIdx < eqLen)
key += qs.slice(lastPos);
else if (sepIdx < sepLen)
value += qs.slice(lastPos);
}
if (keyEncoded)
key = decodeStr(key, decode);
if (valEncoded)
value = decodeStr(value, decode);
// Use a key array lookup instead of using hasOwnProperty(), which is
// slower
if (keys.indexOf(key) === -1) {
obj[key] = value;
keys[keys.length] = key;
} else {
const curValue = obj[key];
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe since
// we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
}
// Deal with any leftover key or value data
if (lastPos < qs.length) {
if (eqIdx < eqLen)
key += qs.slice(lastPos);
else if (sepIdx < sepLen)
value += qs.slice(lastPos);
} else if (eqIdx === 0) {
// We ended on an empty substring
return obj;
}
if (keyEncoded)
key = decodeStr(key, decode);
if (valEncoded)
value = decodeStr(value, decode);
// Use a key array lookup instead of using hasOwnProperty(), which is slower
if (keys.indexOf(key) === -1) {
obj[key] = value;
keys[keys.length] = key;
} else {
const curValue = obj[key];
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe since
// we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
}

return obj;
Expand Down
Loading