Skip to content

Commit 7fa2f4c

Browse files
committed
refactor: improve CESU-8 encoding coding style
BTW: js CESU-8 encoding is faster then Buffer UTF-8 encoding ``` putRawStringSmallLessThan0x80*10000: 672.642ms putRawStringSmallLessThan0x800*10000: 592.960ms putRawStringSmallBiggerThan0x800*10000: 861.010ms putUTF8RawStringSmallLessThan0x80*10000: 841.638ms putUTF8RawStringSmallLessThan0x800*10000: 958.383ms putUTF8RawStringSmallBiggerThan0x800*10000: 1793.470ms ```
1 parent 7dd0a0c commit 7fa2f4c

File tree

6 files changed

+245
-47
lines changed

6 files changed

+245
-47
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ results
1414
node_modules
1515
npm-debug.log
1616
coverage/
17+
profile-*

.travis.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1+
sudo: false
12
language: node_js
23
node_js:
34
- '0.12'
4-
- '1'
5-
- '2'
6-
- '3'
75
- '4'
8-
script: "npm run test-travis"
6+
- '6'
7+
- '8'
8+
script: "npm run test-travis && npm run benchmark"
99
after_script: "npm install coveralls@2 && cat ./coverage/lcov.info | coveralls"

benchmark/putRawString.js

Lines changed: 145 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,165 @@ largeStr += largeStr
1010
var bb = ByteBuffer.allocate(1024);
1111
var max = 10;
1212

13+
// putRawString
14+
1315
bb.putRawString(makeStr('a', 200));
14-
console.log('small bytes %s', bb.array().length);
16+
console.log('putRawString: small < 0x80 bytes %s, one char length: %d',
17+
bb.array().length, 'a'.length);
18+
bb.reset();
19+
20+
bb.putRawString(makeStr('ȅ', 200));
21+
console.log('putRawString: small < 0x800 bytes %s, one char length: %d',
22+
bb.array().length, 'ȅ'.length);
23+
bb.reset();
24+
25+
bb.putRawString(makeStr('𐐀', 200));
26+
console.log('putRawString: small >= 0x800 bytes %s, one char length: %d, maxIncreaseSize: %d, bb.size: %d',
27+
bb.array().length, '𐐀'.length, makeStr('𐐀', 200).length * 3, bb._size);
28+
bb.reset();
29+
30+
bb.putRawString(makeStr(String.fromCharCode(0x801), 200));
31+
console.log('putRawString: small = 0x801 bytes %s, one char length: %d',
32+
bb.array().length, String.fromCharCode(0x801).length);
33+
bb.reset();
34+
35+
bb.putRawString(makeStr('中文', 200));
36+
console.log('putRawString: small 中文 bytes %s, one char length: %d, maxIncreaseSize: %d, bb.size: %d',
37+
bb.array().length, '中文'.length, makeStr('中文', 200).length * 3, bb._size);
38+
bb.reset();
39+
40+
bb.putRawString(makeStr('\ud83c\udf3c', 200));
41+
console.log('putRawString: small \ud83c\udf3c bytes %s, one char length: %d, maxIncreaseSize: %d, bb.size: %d',
42+
bb.array().length, '\ud83c\udf3c'.length, makeStr('\ud83c\udf3c', 200).length * 3, bb._size);
1543
bb.reset();
1644

1745
bb.putRawString(makeStr(largeStr, 10));
18-
console.log('large bytes %s', bb.array().length);
46+
console.log('putRawString: large bytes %s, one char length: %d',
47+
bb.array().length);
1948
bb.reset();
2049

50+
// putUTF8RawString
51+
52+
bb = ByteBuffer.allocate(2);
53+
bb.putUTF8RawString(makeStr('a', 200));
54+
console.log('putUTF8RawString: small < 0x80 bytes %s, one char length: %d',
55+
bb.array().length, 'a'.length);
56+
bb.reset();
57+
58+
bb.putUTF8RawString(makeStr('ȅ', 200));
59+
console.log('putUTF8RawString: small < 0x800 bytes %s, one char length: %d',
60+
bb.array().length, 'ȅ'.length);
61+
bb.reset();
62+
63+
bb.putUTF8RawString(makeStr('𐐀', 200));
64+
console.log('putUTF8RawString: small >= 0x800 bytes %s, one char length: %d, byteLength: %d, bb.size: %d',
65+
bb.array().length, '𐐀'.length, Buffer.byteLength(makeStr('𐐀', 200)), bb._size);
66+
bb.reset();
67+
68+
bb.putUTF8RawString(makeStr('中文', 200));
69+
console.log('putUTF8RawString: small 中文 bytes %s, one char length: %d, byteLength: %d, bb.size: %d',
70+
bb.array().length, '中文'.length, Buffer.byteLength(makeStr('中文', 200)), bb._size);
71+
bb.reset();
72+
73+
bb.putUTF8RawString(makeStr('\ud83c\udf3c', 200));
74+
console.log('putUTF8RawString: small \ud83c\udf3c bytes %s, one char length: %d, byteLength: %d, bb.size: %d',
75+
bb.array().length, '\ud83c\udf3c'.length, Buffer.byteLength(makeStr('\ud83c\udf3c', 200)), bb._size);
76+
bb.reset();
77+
78+
bb.putUTF8RawString(makeStr(String.fromCharCode(0x801), 200));
79+
console.log('putUTF8RawString: small = 0x801 bytes %s, one char length: %d',
80+
bb.array().length, String.fromCharCode(0x801).length);
81+
bb.reset();
82+
83+
bb.putUTF8RawString(makeStr(largeStr, 10));
84+
console.log('putUTF8RawString: large bytes %s, one char length: %d',
85+
bb.array().length);
86+
bb.reset();
87+
88+
bb = ByteBuffer.allocate(1024);
89+
2190
var run = bench([
22-
function putRawStringSmall(cb) {
91+
function putRawStringSmallLessThan0x80(cb) {
2392
for (var i = 0; i < max; i++) {
2493
bb.putRawString(makeStr('a', 200));
2594
}
2695
bb.array();
2796
bb.reset();
2897
setImmediate(cb);
2998
},
99+
function putRawStringSmallLessThan0x800(cb) {
100+
for (var i = 0; i < max; i++) {
101+
bb.putRawString(makeStr('ȅ', 200));
102+
}
103+
bb.array();
104+
bb.reset();
105+
setImmediate(cb);
106+
},
107+
function putRawStringSmallBiggerThan0x800(cb) {
108+
for (var i = 0; i < max; i++) {
109+
bb.putRawString(makeStr('𐐀', 200));
110+
}
111+
bb.array();
112+
bb.reset();
113+
setImmediate(cb);
114+
},
115+
function putRawStringSmallChinese(cb) {
116+
for (var i = 0; i < max; i++) {
117+
bb.putRawString(makeStr('中文', 200));
118+
}
119+
bb.array();
120+
bb.reset();
121+
setImmediate(cb);
122+
},
123+
function putRawStringSmallEmoji(cb) {
124+
for (var i = 0; i < max; i++) {
125+
bb.putRawString(makeStr('\ud83c\udf3c', 200));
126+
}
127+
bb.array();
128+
bb.reset();
129+
setImmediate(cb);
130+
},
131+
132+
function putUTF8RawStringSmallLessThan0x80(cb) {
133+
for (var i = 0; i < max; i++) {
134+
bb.putUTF8RawString(makeStr('a', 200));
135+
}
136+
bb.array();
137+
bb.reset();
138+
setImmediate(cb);
139+
},
140+
function putUTF8RawStringSmallLessThan0x800(cb) {
141+
for (var i = 0; i < max; i++) {
142+
bb.putUTF8RawString(makeStr('ȅ', 200));
143+
}
144+
bb.array();
145+
bb.reset();
146+
setImmediate(cb);
147+
},
148+
function putUTF8RawStringSmallBiggerThan0x800(cb) {
149+
for (var i = 0; i < max; i++) {
150+
bb.putUTF8RawString(makeStr('𐐀', 200));
151+
}
152+
bb.array();
153+
bb.reset();
154+
setImmediate(cb);
155+
},
156+
function putUTF8RawStringSmallChinese(cb) {
157+
for (var i = 0; i < max; i++) {
158+
bb.putUTF8RawString(makeStr('中文', 200));
159+
}
160+
bb.array();
161+
bb.reset();
162+
setImmediate(cb);
163+
},
164+
function putUTF8RawStringSmallEmoji(cb) {
165+
for (var i = 0; i < max; i++) {
166+
bb.putUTF8RawString(makeStr('\ud83c\udf3c', 200));
167+
}
168+
bb.array();
169+
bb.reset();
170+
setImmediate(cb);
171+
},
30172
// function putRawStringLarge(cb) {
31173
// for (var i = 0; i < max; i++) {
32174
// bb.putRawString(makeStr(largeStr, 10));

lib/byte.js

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ ByteBuffer.prototype._putString = function (index, value, format) {
344344

345345
// Prints a string to the Buffer, encoded as CESU-8
346346
ByteBuffer.prototype.putRawString = function (index, str) {
347-
if (typeof index === 'string') {
347+
if (arguments.length === 1) {
348348
// putRawString(str)
349349
str = index;
350350
index = this._offset;
@@ -381,19 +381,42 @@ ByteBuffer.prototype.putRawString = function (index, str) {
381381
this._bytes[index++] = ch;
382382
} else if (ch < 0x800) {
383383
// 0x800: 2048
384-
this._bytes[index++] = (0xc0 + ((ch >> 6) & 0x1f)) >>> 32;
385-
this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32;
384+
// this._bytes[index++] = (0xc0 + ((ch >> 6) & 0x1f)) >>> 32;
385+
// this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32;
386+
this._bytes[index++] = (ch >>> 6) | 0xc0;
387+
this._bytes[index++] = (ch & 0x3f) | 0x80; // 0x3f => 0b00111111
386388
} else {
387-
this._bytes[index++] = (0xe0 + ((ch >> 12) & 0xf)) >>> 32;
388-
this._bytes[index++] = (0x80 + ((ch >> 6) & 0x3f)) >>> 32;
389-
this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32;
389+
// this._bytes[index++] = (0xe0 + ((ch >> 12) & 0xf)) >>> 32;
390+
// this._bytes[index++] = (0x80 + ((ch >> 6) & 0x3f)) >>> 32;
391+
// this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32;
392+
this._bytes[index++] = (ch >>> 12) | 0xe0;
393+
this._bytes[index++] = ((ch >>> 6) & 0x3f) | 0x80;
394+
this._bytes[index++] = (ch & 0x3f) | 0x80;
390395
}
391396
}
392397
// index is now probably less than @_offset and reflects the real length
393398
this._offset = index;
394399
return this;
395400
};
396401

402+
ByteBuffer.prototype.putUTF8RawString = function (index, str) {
403+
var buf;
404+
if (arguments.length === 1) {
405+
// putUTF8RawString(str)
406+
str = index;
407+
index = this._offset;
408+
buf = Buffer.from ? Buffer.from(str) : new Buffer(str);
409+
this._checkSize(this._offset + buf.length);
410+
buf.copy(this._bytes, index);
411+
} else {
412+
buf = Buffer.from ? Buffer.from(str) : new Buffer(str);
413+
buf.copy(this._bytes, index);
414+
}
415+
416+
this._offset = index + buf.length;
417+
return this;
418+
};
419+
397420
ByteBuffer.prototype._copy = function (start, end) {
398421
// magic number here..
399422
// @see benchmark/buffer_slice_and_copy.js

package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
"autod": "autod -w --prefix '^' -e benchmark && npm run cnpm",
1515
"cnpm": "npm install --registry=https://registry.npm.taobao.org",
1616
"contributors": "contributors -f plain -o AUTHORS",
17-
"optimized": "node --allow-natives-syntax --trace_opt --trace_deopt test/optimized.js"
17+
"optimized": "node --allow-natives-syntax --trace_opt --trace_deopt test/optimized.js",
18+
"benchmark": "node benchmark/putRawString.js"
1819
},
1920
"dependencies": {
2021
"debug": "^2.6.6",
@@ -56,4 +57,4 @@
5657
},
5758
"author": "fengmk2 <fengmk2@gmail.com> (http://fengmk2.com)",
5859
"license": "MIT"
59-
}
60+
}

0 commit comments

Comments
 (0)