Skip to content

Commit 5b6c8c3

Browse files
authored
Switch preview to chunked transfer (elastic#921)
Switches the PR preview infrastructure from buffering the entire file to chunked transfer. This allows us to do two things: 1. Shrink the size of the node process significantly. 2. Handle large files.
1 parent bd6a348 commit 5b6c8c3

File tree

5 files changed

+69
-12
lines changed

5 files changed

+69
-12
lines changed

build_docs.pl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -598,7 +598,7 @@ sub preview {
598598
} else {
599599
close STDIN;
600600
open( STDIN, "</dev/null" );
601-
exec( qw(node /docs_build/preview/preview.js) );
601+
exec( qw(node --max-old-space-size=128 /docs_build/preview/preview.js) );
602602
}
603603
} else {
604604
close STDIN;

integtest/spec/preview_spec.rb

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,18 @@
1010
# and the preview is designed to update itself as its target repo changes so
1111
# we start it once and play with the target repo during the tests.
1212
RSpec.describe 'previewing built docs', order: :defined do
13+
very_large_text = 'muchtext' * 1024 * 1024 * 5 # 40mb
1314
repo_root = File.expand_path '../../', __dir__
15+
1416
convert_before do |src, dest|
1517
repo = src.repo_with_index 'repo', <<~ASCIIDOC
1618
Some text.
1719
1820
image::resources/cat.jpg[A cat]
21+
image::resources/very_large.jpg[Not a jpg but very big]
1922
ASCIIDOC
2023
repo.cp "#{repo_root}/resources/cat.jpg", 'resources/cat.jpg'
24+
repo.write 'resources/very_large.jpg', very_large_text
2125
repo.commit 'add cat image'
2226
book = src.book 'Test'
2327
book.source repo, 'index.asciidoc'
@@ -64,11 +68,14 @@ def get(watermark, branch, path)
6468
shared_context 'docs for branch' do
6569
watermark = SecureRandom.uuid
6670
let(:watermark) { watermark }
67-
let(:root) { get watermark, branch, 'guide/index.html' }
6871
let(:current_url) { 'guide/test/current' }
72+
let(:root) { get watermark, branch, 'guide/index.html' }
6973
let(:cat_image) do
7074
get watermark, branch, "#{current_url}/resources/cat.jpg"
7175
end
76+
let(:very_large) do
77+
get watermark, branch, "#{current_url}/resources/very_large.jpg"
78+
end
7279
end
7380

7481
it 'logs that the built docs are ready' do
@@ -106,7 +113,10 @@ def get(watermark, branch, path)
106113
include_examples 'serves the docs root'
107114
it 'serves an image' do
108115
bytes = File.open("#{repo_root}/resources/cat.jpg", 'rb', &:read)
109-
expect(cat_image).to serve(doc_body(eq(bytes)))
116+
expect(cat_image).to serve(eq(bytes))
117+
end
118+
it 'serves a very large file' do
119+
expect(very_large).to serve(eq(very_large_text))
110120
end
111121
end
112122
describe 'for the test branch' do

lib/ES/Util.pm

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,8 @@ http {
607607
proxy_http_version 1.1;
608608
proxy_set_header Host \$host;
609609
proxy_cache_bypass \$http_upgrade;
610+
proxy_buffering off;
611+
gzip on;
610612
add_header 'Access-Control-Allow-Origin' '*';
611613
if (\$request_method = 'OPTIONS') {
612614
add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS';

preview/preview.js

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
/*
2+
* Little server that listens for requests in the form of
3+
* `${branch}.host/guide/${doc}`, looks up the doc from git and streams
4+
* it back over the response.
5+
*
6+
* It uses subprocesses to access git. It is tempting to use NodeGit to prevent
7+
* the fork overhead but for the most part the subprocesses are fast enough and
8+
* I'm worried that NodeGit's `Blog.getContent` methods are synchronous.
9+
*/
10+
111
const child_process = require('child_process');
212
const http = require('http');
313
const url = require('url');
@@ -9,8 +19,6 @@ const checkTypeOpts = {
919
};
1020
const catOpts = {
1121
'cwd': '/docs_build/.repos/target_repo.git',
12-
'encoding': 'buffer',
13-
'max_buffer': 1024 * 1024 * 20,
1422
};
1523

1624
const requestHandler = (request, response) => {
@@ -23,6 +31,7 @@ const requestHandler = (request, response) => {
2331
const path = 'html' + parsedUrl.pathname.substring('/guide'.length);
2432
const branch = gitBranch(request.headers['host']);
2533
const requestedObject = `${branch}:${path}`;
34+
// TODO keepalive?
2635
// TODO include the commit info for the branch in a header
2736
child_process.execFile('git', ['cat-file', '-t', requestedObject], checkTypeOpts, (err, stdout, stderr) => {
2837
if (err) {
@@ -36,15 +45,51 @@ const requestHandler = (request, response) => {
3645
}
3746
return;
3847
}
48+
49+
response.setHeader('Transfer-Encoding', 'chunked');
3950
const showGitObject = gitShowObject(requestedObject, stdout.trim());
40-
child_process.execFile('git', ['cat-file', 'blob', showGitObject], catOpts, (err, stdout, stderr) => {
41-
if (err) {
42-
console.warn('unhandled error', err);
43-
response.statusCode = 500;
44-
response.end(err.message);
51+
const child = child_process.spawn(
52+
'git', ['cat-file', 'blob', showGitObject], catOpts
53+
);
54+
55+
// We spool stderr into a string because it is never super big.
56+
child.stderr.setEncoding('utf8');
57+
let childstderr = '';
58+
child.stderr.addListener('data', chunk => {
59+
childstderr += chunk;
60+
});
61+
62+
/* Let node handle all the piping because it handles backpressure properly.
63+
* We don't let the pipe emit the `end` event though because we'd like to
64+
* do that manually when the process exits. The chunk size seems looks like
65+
* it is 4k which looks to come from the child_process. */
66+
child.stdout.pipe(response, {end: false});
67+
68+
let exited = false;
69+
child.addListener('close', (code, signal) => {
70+
/* This can get invoked multiple times but we can only do anything
71+
* the first time so we just drop the second time on the floor. */
72+
if (exited) return;
73+
exited = true;
74+
75+
if (code === 0 && signal === null) {
76+
response.end();
4577
return;
4678
}
47-
response.end(stdout);
79+
/* Note that we're playing a little fast and loose here with the status.
80+
* If we've already sent a chunk we can't change the status code. We're
81+
* out of luck. We just terminate the response anyway. The server log
82+
* is the best we can do for tracking these. */
83+
console.warn('unhandled error', code, signal, childstderr);
84+
response.statusCode = 500;
85+
response.end(childstderr);
86+
});
87+
child.addListener('error', err => {
88+
child.stdout.destroy();
89+
child.stderr.destroy();
90+
console.warn('unhandled error', err);
91+
response.statusCode = 500;
92+
response.end(err.message);
4893
});
4994
});
5095
}

publish_docker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
set -e
44

55
export BUILD=docker.elastic.co/docs/build:1
6-
export PREVIEW=docker.elastic.co/docs/preview:1
6+
export PREVIEW=docker.elastic.co/docs/preview:2
77

88
cd $(git rev-parse --show-toplevel)
99
./build_docs --just-build-image

0 commit comments

Comments
 (0)