Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add replace invalid http header character #26

Merged
merged 1 commit into from
Feb 14, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ utils.split('foo,bar,,,', ','); // ['foo', 'bar']

// replace string work with special chars which `String.prototype.replace` can't handle
utils.replace('<body> hi', '<body>', '$& body'); // '$& body hi'

// replace http header invalid characters
utils.replaceInvalidHttpHeaderChar('abc你好11'); // {invalid: true, val: 'abc 11'}
```

### Try
Expand Down
115 changes: 115 additions & 0 deletions benchmark/string.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
'use strict';

/**
* Module dependencies.
*/

const utility = require('../');

const Benchmark = require('benchmark');
const suite = new Benchmark.Suite();
let str1_10 = [];
let str2_10 = [];
let str3_10 = [];

for (let i = 0; i < 10; i++) {
str1_10.push(String.fromCharCode(i + 32));
str2_10.push(String.fromCharCode(i + 32));
str3_10.push(String.fromCharCode(i + 32));
}

str1_10.splice(1, 1, '中文');
str2_10.splice(3, 1, '中文');
str3_10.splice(5, 1, '中文');

str1_10 = str1_10.join('');
str2_10 = str2_10.join('');
str3_10 = str3_10.join('');

let str1_1000 = [];
let str2_1000 = [];
let str3_1000 = [];
for (let i = 0; i < 1000; i++) {
str1_1000.push(String.fromCharCode(i % 80 + 32));
str2_1000.push(String.fromCharCode(i % 80 + 32));
str3_1000.push(String.fromCharCode(i % 80 + 32));
}

str1_1000.splice(10, 1, '中文');
str2_1000.splice(100, 1, '中文');
str3_1000.splice(800, 1, '中文');

str1_1000 = str1_1000.join('');
str2_1000 = str2_1000.join('');
str3_1000 = str3_1000.join('');

// console.info('===>', utility.replaceInvalidHttpHeaderChar(str1_10));
// console.info('===>', utility.replaceInvalidHttpHeaderChar(str1_1000));

const headers= {};
const headers_invalid = {};

headers['Host'] = 'my.foo.com';
headers['Connection'] = 'keep-alive';
headers['Accept'] = 'text/html, */*; q=0.01';
headers['X-Requested-With'] = 'XMLHttpRequest';
headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36';
headers['Referer'] = 'https://my.alipay.com/portal/i.htm';
headers['Accept-Encoding'] = 'gzip, deflate, sdch, br';
headers['Accept-Language'] = 'zh-CN,zh;q=0.8,en;q=0.6';
headers['Cookie'] = 'cna=Aq7qEKDcuSsCASp4SdAqdtIE; LOCALE=zh_CN; ZAUTH_REST_LOGIN_INFO=1111111111111111111111111122222222222222222222222222223333333333333aaaaaaaaaaaaaaaaaaaaaddddddddddddddddddbbbbbbbbbbbbbbbbb333333333333bbbbbbbbbbbbbbbbbbbbbbbbbddddddddddddddddddddbbbbbbbbbbbbbbbbbbbb333333333333333333333333333bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb3333kkkkkkkkkkkkkzzzzzzzzzzzzzdddddddddddddddddddddddddkkkkkkkkkkkkkkkkkkkk3333333333333333333kkkkkkkkkkkkkkkkkkkkkkk; ABCDEFGAAAUTHJSESSIONID=DDFSFSFSFSFEESFSDFSDFSFyD1zyhINADBCDDDDBB; mobileSendTime=-1; credibleMobileSendTime=-1; ctuMobileSendTime=-1; riskMobileBankSendTime=-1; riskMobileAccoutSendTime=-1; riskMobileCreditSendTime=-1; riskCredibleMobileSendTime=-1; riskOriginalAccountMobileSendTime=-1; aeofen=kA6t-uV3HlfVbpLt; LoginForm=testabc_login_auth; bjklmnd="K1iSL1mlXoWjQ8nVYGjtRnfEXYkS/VRtvnHvn5jXMRU1IkwUm44hgEQa"; CLUB_ALIPAY_COM=11238849458323234950303; iw.userid="K23iML1mlBoDjE8nVEGEtBg=="; tt_apache_tracktmp="uid=11238849458323234950303"; session.cookieNameId=ABCDEFGAAAUTHJSESSIONID; CHAIR_SESS=K12iO619fABDFHGGKHKDIDO_wEsrVBE-ddeftrE3hh_R1mMywjYorRhLQ823aOAvkxKVL_vlB56DDPElcBYwsduFu2sDEFLX9zyHyeIzsBidDcBSLB_Cdj62Yyh5passfedpzI2hUSTRsylVTCc5n2deddf==; ABCDEFGAAAUTHJSESSIONID.sig=3Gr3S-Hwd6-YWX56jsdfswDEGEEGzBHlNHnlsUlaC_CaNU; ABCDEFGAAAUTHJSESSIONID=RZ04Xc6XMGCQHGA5stpwlJougOFFeAauthBD00AD22; bone=VZ11A; spanner=49GKop+ywXeMAfFDEeVDDshSF/xD/mjdv';

Object.keys(headers).forEach(key => {
headers_invalid[key] = headers[key] + '中文';
});

Object.keys(headers_invalid).forEach(key => {
console.info(utility.replaceInvalidHttpHeaderChar(headers_invalid[key]));
});

suite
.add('utility.replaceInvalidHttpHeaderChar(str1_10)', () => {
utility.replaceInvalidHttpHeaderChar(str1_10);
})
.add('utility.replaceInvalidHttpHeaderChar(str2_10)', () => {
utility.replaceInvalidHttpHeaderChar(str2_10);
})
.add('utility.replaceInvalidHttpHeaderChar(str3_10)', () => {
utility.replaceInvalidHttpHeaderChar(str3_10);
})
.add('utility.replaceInvalidHttpHeaderChar(str1_1000)', () => {
utility.replaceInvalidHttpHeaderChar(str1_1000);
})
.add('utility.replaceInvalidHttpHeaderChar(str2_1000)', () => {
utility.replaceInvalidHttpHeaderChar(str2_1000);
})
.add('utility.replaceInvalidHttpHeaderChar(str3_1000)', () => {
utility.replaceInvalidHttpHeaderChar(str3_1000);
}).
add('utility.relaceInvalidHttpHeaderChar(real_headers)', () => {
Object.keys(headers).forEach(key => {
utility.replaceInvalidHttpHeaderChar(headers[key]);
});
}).
add('utility.relaceInvalidHttpHeaderChar(real_headers_invalid)', () => {
Object.keys(headers_invalid).forEach(key => {
utility.replaceInvalidHttpHeaderChar(headers_invalid[key]);
});
})
// add listeners
.on('cycle', event => {
console.log(String(event.target));
})
.on('complete', () => {
console.log('done');
})
.run({ async: false });
// $ node benchmark/string.js
// utility.replaceInvalidHttpHeaderChar(str1_10) x 1,310,312 ops/sec ±1.03% (87 runs sampled)
// utility.replaceInvalidHttpHeaderChar(str2_10) x 1,309,274 ops/sec ±1.35% (86 runs sampled)
// utility.replaceInvalidHttpHeaderChar(str3_10) x 1,332,818 ops/sec ±1.22% (88 runs sampled)
// utility.replaceInvalidHttpHeaderChar(str1_1000) x 29,391 ops/sec ±1.41% (90 runs sampled)
// utility.replaceInvalidHttpHeaderChar(str2_1000) x 27,842 ops/sec ±1.29% (83 runs sampled)
// utility.replaceInvalidHttpHeaderChar(str3_1000) x 26,905 ops/sec ±1.63% (85 runs sampled)
// utility.relaceInvalidHttpHeaderChar(real_headers) x 85,277 ops/sec ±1.34% (83 runs sampled)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

又是一个性能瓶颈点

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

恩, 目前我们这里执行过了, 到了 http 模块中, 还是会在执行一遍.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

node 里面不知道为啥那么严格, 直接异常了.

// utility.relaceInvalidHttpHeaderChar(real_headers_invalid) x 16,691 ops/sec ±1.14% (88 runs sampled)
69 changes: 68 additions & 1 deletion lib/string.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ exports.split = function split(str, sep) {
}
return needs;
};

// always optimized
exports.splitAlwaysOptimized = function splitAlwaysOptimized() {
var str = '';
Expand Down Expand Up @@ -76,3 +75,71 @@ exports.replace = function replace(str, substr, newSubstr) {
}
return str.replace(substr, replaceFunction);
};

// original source https://github.com/nodejs/node/blob/v7.5.0/lib/_http_common.js#L300
/**
* True if val contains an invalid field-vchar
* field-value = *( field-content / obs-fold )
* field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
* field-vchar = VCHAR / obs-text
*
* checkInvalidHeaderChar() is currently designed to be inlinable by v8,
* so take care when making changes to the implementation so that the source
* code size does not exceed v8's default max_inlined_source_size setting.
**/
var validHdrChars = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, // 0 - 15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 - 31
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32 - 47
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48 - 63
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64 - 79
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80 - 95
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96 - 111
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112 - 127
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128 ...
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // ... 255
];

/**
* Replace invalid http header characters with spaces
*
* @param {String} val
* @param {String} replacement
* @return {Object}
*/
exports.replaceInvalidHttpHeaderChar = function replaceInvalidHttpHeaderChar(val, replacement) {
replacement = replacement || ' ';
var invalid = false;

if (!val || typeof val !== 'string') {
return {
val: val,
invalid: invalid,
};
}

var chars;
for (var i = 0; i < val.length; ++i) {
if (!validHdrChars[val.charCodeAt(i)]) {
// 延迟产生这个数组, 只有找到非法字符的时候, 才创建.
chars = chars || val.split('');
chars[i] = replacement;
}
}

if (chars) {
val = chars.join('');
invalid = true;
}

return {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里肯定也不支持

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

那测试用例为啥不影响, 我看测试用例大量用了 const 和 () =>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

测试用例用了 ava 吧,ava 用 babel 编译了

val: val,
invalid: invalid,
};
};
33 changes: 33 additions & 0 deletions test/string.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,36 @@ test('replace() should support regex', t => {

t.is(utils.replace('{ <body> }', /<body>/, 'this is body $& $` $\' $$'), '{ this is body $& $` $\' $$ }');
});

test('replaceInvalidHttpHeaderChar() should replace invalid char', t => {
var s0 = '';
var s1 = '123';
var s2 = 'abc';
var s3 = '!@#$%^&*()_+-=\|';
var s4 = '你1好0';
var s5 = '1你1好0';
var s6 = '11你1好0';
var s7 = '111你1好0';
var s8 = '1111你1好0';

t.is(utils.replaceInvalidHttpHeaderChar(s0).val, s0);
t.is(utils.replaceInvalidHttpHeaderChar(s0).invalid, false);
t.is(utils.replaceInvalidHttpHeaderChar(s1).val, s1);
t.is(utils.replaceInvalidHttpHeaderChar(s1).invalid, false);
t.is(utils.replaceInvalidHttpHeaderChar(s2).val, s2);
t.is(utils.replaceInvalidHttpHeaderChar(s2).invalid, false);
t.is(utils.replaceInvalidHttpHeaderChar(s3).val, s3);
t.is(utils.replaceInvalidHttpHeaderChar(s3).invalid, false);
t.is(utils.replaceInvalidHttpHeaderChar(s4).val, ' 1 0');
t.is(utils.replaceInvalidHttpHeaderChar(s4).invalid, true);
t.is(utils.replaceInvalidHttpHeaderChar(s5).val, '1 1 0');
t.is(utils.replaceInvalidHttpHeaderChar(s5).invalid, true);
t.is(utils.replaceInvalidHttpHeaderChar(s6).val, '11 1 0');
t.is(utils.replaceInvalidHttpHeaderChar(s6).invalid, true);
t.is(utils.replaceInvalidHttpHeaderChar(s7).val, '111 1 0');
t.is(utils.replaceInvalidHttpHeaderChar(s7).invalid, true);
t.is(utils.replaceInvalidHttpHeaderChar(s8).val, '1111 1 0');
t.is(utils.replaceInvalidHttpHeaderChar(s8).invalid, true);
t.is(utils.replaceInvalidHttpHeaderChar(s8, '-').val, '1111-1-0');
t.is(utils.replaceInvalidHttpHeaderChar(s8, '-').invalid, true);
});