Description
- Version: v10.15.3
- Platform:
4.15.0-50-generic #54~16.04.1-Ubuntu x86_64 GNU/Linux
- Subsystem: stream/http
Server
const http = require('http');
const server = http.createServer(async (req, res) => {
const data = [];
// simulate some preprocessing - eg. authentication in middleware
// if setEncoding is called before async operation all works correctly
await sleep(2000);
req.setEncoding('utf8');
req.on('data', (d) => {
// already buffered chunks are read as buffer, although encoding is set to utf8
data.push(d);
console.log(d);
});
req.on('end', () => {
const body = data.join('');
// expected {"řeřicha":"hello čača"} but got {"��eřicha":"hello čača"}
console.log(body);
console.log(body.toString());
res.statusCode = 204;
res.end();
});
});
server.listen(3000, 'localhost', () => {
console.log('LISTENING');
});
async function sleep(ms = 1000) {
return new Promise((res) => setTimeout(res, ms));
}
Client
const http = require('http');
const data = Buffer.from(JSON.stringify({ řeřicha: 'hello čača' }));
async function main() {
const req = http.request(
{
hostname: 'localhost',
port: 3000,
method: 'POST',
path: '/',
headers: {
'content-type': 'application/json',
'content-length': data.length
}
},
(res) => {
console.log(res.statusCode);
console.log(res.headers);
}
);
const offset = 3;
// write some bytes
req.write(data.slice(0, offset));
for (let i = offset; i < data.length; i++) {
// simulate very slow connection
await sleep();
req.write(data.slice(i, i + 1));
}
req.end();
}
main();
async function sleep(ms = 1000) {
return new Promise((res) => setTimeout(res, ms));
}
When setEncoding('utf8') is used on Readable stream (eg. IncomingMessage) with some delay, although no data are read, then already buffered chunks are read as buffer which breaks multibyte characters.
If encoding is set immediately, eg. on http 'request' event, than it works properly, however http frameworks (Express, Fastify, Koa) allows including asynchronous middlewares in processing pipeline before body parsing occurs, which delays setEncoding call and thus breaks multibyte characters.
Included code simulates this delay as two first emitted 'data' chunks are buffers which in standard body processing breaks characters if chunk contains only some bytes of unicode character. In case of http body parsing middlewares this also breaks content-length check.
Example body processing middleware from Fastify v1.14.6
function rawBody (request, reply, options, parser, done) {
var asString = parser.asString
var limit = options.limit === null ? parser.bodyLimit : options.limit
var contentLength = request.headers['content-length'] === undefined
? NaN
: Number.parseInt(request.headers['content-length'], 10)
if (contentLength > limit) {
const err = new Error('Request body is too large')
err.statusCode = 413
reply.code(err.statusCode).send(err)
return
}
var receivedLength = 0
var body = asString === true ? '' : []
var req = request.raw
if (asString === true) {
req.setEncoding('utf8')
}
req.on('data', onData)
req.on('end', onEnd)
req.on('error', onEnd)
function onData (chunk) {
receivedLength += chunk.length
if (receivedLength > limit) {
req.removeListener('data', onData)
req.removeListener('end', onEnd)
req.removeListener('error', onEnd)
const err = new Error('Request body is too large')
err.statusCode = 413
reply.code(err.statusCode).send(err)
return
}
if (asString === true) {
// first chunks might be buffers automatically coerced to strings
// with broken multibytes characters
body += chunk
} else {
body.push(chunk)
}
}
function onEnd (err) {
req.removeListener('data', onData)
req.removeListener('end', onEnd)
req.removeListener('error', onEnd)
if (err !== undefined) {
err.statusCode = 400
reply.code(err.statusCode).send(err)
return
}
if (asString === true) {
receivedLength = Buffer.byteLength(body)
}
if (!Number.isNaN(contentLength) && receivedLength !== contentLength) {
const err = new Error('Request body size did not match Content-Length')
err.statusCode = 400
reply.code(err.statusCode).send(err)
return
}
if (asString === false) {
body = Buffer.concat(body)
}
var result = parser.fn(req, body, done)
if (result && typeof result.then === 'function') {
result.then(body => done(null, body), done)
}
}
}