Skip to content

Commit 483e58b

Browse files
committed
add remote running support for chrome distributed
1 parent 24e887e commit 483e58b

File tree

3 files changed

+51
-16
lines changed

3 files changed

+51
-16
lines changed

system/node/chrome-distributed.js

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ program
6161
.option("--enableOPT", "Enables the entire system optimization pipeline")
6262
.option("--testing", "debug mode")
6363
.option("--azaddr <azaddr>", "addr for the az server")
64+
.option("--id <id>", "id of the crawler")
65+
.option("--remote", "remote mode")
6466
.parse(process.argv);
6567

6668
var bashSanitize = (str) => {
@@ -101,17 +103,32 @@ var genBrowserArgs = (proxies) => {
101103
"--disable-setuid-sandbox",
102104
// "--blink-settings=scriptEnabled=false",
103105
],
104-
};
106+
},
107+
startHttpPort = 6080,
108+
base = program.id * program.concurrency;
105109
program.testing && template.args.push("--auto-open-devtools-for-tabs");
106110
for (var i = 0; i < proxies.length; i++) {
107111
var proxy = proxies[i];
112+
var httpAddr, httpsAddr;
113+
if (program.remote) {
114+
!program.id &&
115+
console.log("FATAL: id is required in remote mode") &&
116+
process.exit(1);
117+
118+
httpAddr = `lions.eecs.umich.edu:${startHttpPort + base + i}`;
119+
httpsAddr = `lions.eecs.umich.edu:${startHttpPort + base + i + 1000}`;
120+
} else {
121+
httpAddr = `127.0.0.1:${proxy.http_port}`;
122+
httpsAddr = `127.0.0.1:${proxy.https_port}`;
123+
}
108124
var proxyFlags = [
109-
`--host-resolver-rules=MAP *:80 127.0.0.1:${proxy.http_port},MAP *:443 127.0.0.1:${proxy.https_port},EXCLUDE localhost`,
125+
`--host-resolver-rules=MAP *:80 ${httpAddr},MAP *:443 ${httpsAddr},EXCLUDE localhost`,
110126
// `--proxy-server=http=https://127.0.0.1:${proxy.https_port}`,
111127
];
112128
var browserArgs = Object.assign({}, template);
113129
browserArgs.args = browserArgs.args.concat(proxyFlags);
114130
args.push(browserArgs);
131+
console.log(proxyFlags);
115132
}
116133
// console.log(args)
117134
return args;
@@ -153,7 +170,8 @@ var genBrowserArgs = (proxies) => {
153170
`${program.output}/logs`,
154171
program.mode,
155172
program.enableOPT,
156-
program.azaddr
173+
program.azaddr,
174+
program.remote
157175
);
158176
await proxyManager.createProxies();
159177
proxies = proxyManager.getAll();
@@ -196,13 +214,20 @@ var genBrowserArgs = (proxies) => {
196214
console.log("updating proxy path for ", pa, " to ", sanurl);
197215
process.env["NODE_TLS_REJECT_UNAUTHORIZED"] = 0;
198216

217+
var host;
218+
if (program.remote) {
219+
host = "lions.eecs.umich.edu";
220+
} else {
221+
host = "127.0.0.1";
222+
}
223+
199224
var hr = await httpPromise(
200-
`http://127.0.0.1:${pa - 1000}/update-archive-path?${
225+
`http://${host}:${pa - 1000}/update-archive-path?${
201226
program.proxy
202227
}/${sanurl}.wprgo`
203228
);
204229
var hsr = await httpsPromise(
205-
`https://127.0.0.1:${pa}/update-shared-object`
230+
`https://${host}:${pa}/update-shared-object`
206231
);
207232

208233
console.log(

system/node/chrome-server.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ async function main() {
161161
var url = req.url.slice(1);
162162
console.log(`Received request for ${url}`);
163163
console.log(`Pages loaded: ${++pagesload}`);
164-
pagesload % 3 == 0 && cluster.queue(url);
164+
cluster.queue(url);
165165
res.end("ok");
166166
});
167167
process.on("SIGINT", async () => {

system/node/lib/wpr-proxy.js

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class Proxy {
2121
this.mode = options.mode;
2222
this.caching = options.caching;
2323
this.az_addr = options.az_addr;
24+
this.remote = options.remote;
2425
}
2526

2627
start() {
@@ -31,15 +32,21 @@ class Proxy {
3132
--az_addr ${this.az_addr}\
3233
${this.caching ? "--caching" : ""}`;
3334
(this.stdout = ""), (this.stderr = "");
34-
console.log(cmd);
35-
//write dummy data to dataOutput before spawning command
36-
this.process = child_process.spawn(cmd, { shell: true, cwd: WPRDIR });
37-
38-
var outStream = fs.createWriteStream(this.logOutput);
39-
var errStream = fs.createWriteStream(this.logOutput);
40-
41-
this.process.stdout.pipe(outStream);
42-
this.process.stderr.pipe(errStream);
35+
if (!this.remote) {
36+
console.log(cmd);
37+
//write dummy data to dataOutput before spawning command
38+
this.process = child_process.spawn(cmd, { shell: true, cwd: WPRDIR });
39+
40+
var outStream = fs.createWriteStream(this.logOutput);
41+
var errStream = fs.createWriteStream(this.logOutput);
42+
43+
this.process.stdout.pipe(outStream);
44+
this.process.stderr.pipe(errStream);
45+
} else {
46+
console.log(
47+
`Running in remote mode, not starting proxy on ${this.http_port} and ${this.https_port}`
48+
);
49+
}
4350
}
4451

4552
dump() {
@@ -59,7 +66,7 @@ class Proxy {
5966
}
6067

6168
class ProxyManager {
62-
constructor(nProxies, logDir, mode, caching, az_addr) {
69+
constructor(nProxies, logDir, mode, caching, az_addr, remote) {
6370
this.nProxies = nProxies;
6471
this.proxies = [];
6572
this.startHttpsPort = 7080 + Math.floor(Math.random() * 1000);
@@ -68,6 +75,7 @@ class ProxyManager {
6875
this.mode = mode;
6976
this.caching = caching;
7077
this.az_addr = az_addr;
78+
this.remote = remote;
7179
}
7280

7381
async createProxies() {
@@ -78,13 +86,15 @@ class ProxyManager {
7886
var mode = this.mode;
7987
var caching = this.caching;
8088
var az_addr = this.az_addr;
89+
var remote = this.remote;
8190
var p = new Proxy({
8291
http_port,
8392
https_port,
8493
logOutput,
8594
mode,
8695
caching,
8796
az_addr,
97+
remote,
8898
});
8999
this.proxies.push(p);
90100
}

0 commit comments

Comments
 (0)