Skip to content

Commit f1b6f6c

Browse files
committed
Low-hanging fruit for performance improvements in usage reporting
- Drop references to unencoded and uncompressed versions of reports as soon as they are no longer needed, to hopefully allow garbage collection to be more effective during the HTTP POSTs. - Simplify compression code to no longer require a workaround for Node v6 (we require Node v14!), and use `util.promisify`. - Add a timeout to the report POST. The default is 30 seconds, which is currently the timeout enforced by Apollo's load balancers; a smaller number may be advisable for high traffic users facing memory constraints. This uses the AbortController API (which should be supported by most Fetcher implementations). We use the `node-abort-controller` polyfill (same polyfill we chose for `@apollo/gateway`) because Node's built-in global AbortController requires an experimental flag in Node v14. Fixes #7100.
1 parent 4881198 commit f1b6f6c

File tree

7 files changed

+87
-43
lines changed

7 files changed

+87
-43
lines changed

.changeset/purple-kiwis-lay.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@apollo/server': patch
3+
---
4+
5+
Manage memory more efficiently in the usage reporting plugin by allowing large objects to be garbage collected more quickly.

.changeset/quick-weeks-shake.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@apollo/server': minor
3+
---
4+
5+
The usage reporting plugin now defaults to a 30 second timeout for each attempt to send reports to Apollo Server instead of no timeout; the timeout can be adjusted with the new `requestTimeoutMs` option to `ApolloServerPluginUsageReporting`. (Apollo's servers already enforced a 30 second timeout, so this is unlikely to break any existing use cases.)

package-lock.json

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/integration-testsuite/src/apolloServerTests.ts

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2225,21 +2225,20 @@ export function defineIntegrationTestSuiteApolloServerTests(
22252225

22262226
describe('graphql server functions even when Apollo servers are down', () => {
22272227
async function testWithStatus(
2228-
status: number,
2228+
status: number | 'cannot-connect' | 'timeout',
22292229
expectedRequestCount: number,
22302230
) {
2231-
const networkError = status === 0;
2232-
22332231
const { closeServer, fakeUsageReportingUrl, writeResponseResolve } =
22342232
await makeFakeUsageReportingServer({
2235-
status,
2233+
// the 444 case shouldn't ever get to actually sending 444
2234+
status: typeof status === 'number' ? status : 444,
22362235
waitWriteResponse: true,
22372236
});
22382237

22392238
try {
22402239
// To simulate a network error, we create and close the server.
22412240
// This lets us still generate a port that is hopefully unused.
2242-
if (networkError) {
2241+
if (status == 'cannot-connect') {
22432242
await closeServer();
22442243
}
22452244

@@ -2277,6 +2276,8 @@ export function defineIntegrationTestSuiteApolloServerTests(
22772276
reportErrorFunction(error: Error) {
22782277
reportErrorPromiseResolve(error);
22792278
},
2279+
// Make sure the timeout test actually finishes in time
2280+
requestTimeoutMs: status === 'timeout' ? 10 : undefined,
22802281
}),
22812282
],
22822283
});
@@ -2292,27 +2293,32 @@ export function defineIntegrationTestSuiteApolloServerTests(
22922293
});
22932294
expect(result.data.something).toBe('hello');
22942295

2295-
if (!networkError) {
2296+
if (typeof status === 'number') {
22962297
// Allow reporting to return its response (for every retry).
2298+
// (But not if we're intentionally timing out!)
22972299
writeResponseResolve();
22982300
}
22992301

23002302
// Make sure we can get the error from reporting.
23012303
const sendingError = await reportErrorPromise;
23022304
expect(sendingError).toBeTruthy();
2303-
if (networkError) {
2305+
if (status === 'cannot-connect') {
23042306
expect(sendingError.message).toContain(
23052307
'Error sending report to Apollo servers',
23062308
);
23072309
expect(sendingError.message).toContain('ECONNREFUSED');
2310+
} else if (status === 'timeout') {
2311+
expect(sendingError.message).toBe(
2312+
'Error sending report to Apollo servers: The user aborted a request.',
2313+
);
23082314
} else {
23092315
expect(sendingError.message).toBe(
23102316
`Error sending report to Apollo servers: HTTP status ${status}, Important text in the body`,
23112317
);
23122318
}
23132319
expect(requestCount).toBe(expectedRequestCount);
23142320
} finally {
2315-
if (!networkError) {
2321+
if (status !== 'cannot-connect') {
23162322
await closeServer();
23172323
}
23182324
}
@@ -2322,7 +2328,10 @@ export function defineIntegrationTestSuiteApolloServerTests(
23222328
await testWithStatus(500, 3);
23232329
});
23242330
it('with network error', async () => {
2325-
await testWithStatus(0, 3);
2331+
await testWithStatus('cannot-connect', 3);
2332+
});
2333+
it('with timeout', async () => {
2334+
await testWithStatus('timeout', 3);
23262335
});
23272336
it('with non-retryable error', async () => {
23282337
await testWithStatus(400, 1);

packages/server/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@
106106
"loglevel": "^1.6.8",
107107
"lru-cache": "^7.10.1",
108108
"negotiator": "^0.6.3",
109+
"node-abort-controller": "^3.0.1",
109110
"node-fetch": "^2.6.7",
110111
"uuid": "^9.0.0",
111112
"whatwg-mimetype": "^3.0.0"

packages/server/src/plugin/usageReporting/options.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,12 @@ export interface ApolloServerPluginUsageReportingOptions<
325325
* Minimum back-off for retries. Defaults to 100ms.
326326
*/
327327
minimumRetryDelayMs?: number;
328+
/**
329+
* Default timeout for each individual attempt to send a report to Apollo.
330+
* (This is for each HTTP POST, not for all potential retries.) Defaults to 30
331+
* seconds (30000ms).
332+
*/
333+
requestTimeoutMs?: number;
328334
/**
329335
* A logger interface to be used for output and errors. When not provided
330336
* it will default to the server's own `logger` implementation and use

packages/server/src/plugin/usageReporting/plugin.ts

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@ import {
88
import retry from 'async-retry';
99
import { GraphQLSchema, printSchema } from 'graphql';
1010
import type LRUCache from 'lru-cache';
11+
import { AbortController } from 'node-abort-controller';
1112
import fetch from 'node-fetch';
1213
import os from 'os';
14+
import { promisify } from 'util';
1315
import { gzip } from 'zlib';
1416
import type {
1517
ApolloServerPlugin,
@@ -38,6 +40,8 @@ import { packageVersion } from '../../generated/packageVersion.js';
3840
import { computeCoreSchemaHash } from '../../utils/computeCoreSchemaHash.js';
3941
import type { HeaderMap } from '../../utils/HeaderMap.js';
4042

43+
const gzipPromise = promisify(gzip);
44+
4145
const reportHeaderDefaults = {
4246
hostname: os.hostname(),
4347
agentVersion: `@apollo/server@${packageVersion}`,
@@ -237,7 +241,7 @@ export function ApolloServerPluginUsageReporting<TContext extends BaseContext>(
237241

238242
// Needs to be an arrow function to be confident that key is defined.
239243
const sendReport = async (executableSchemaId: string): Promise<void> => {
240-
const report = getAndDeleteReport(executableSchemaId);
244+
let report = getAndDeleteReport(executableSchemaId);
241245
if (
242246
!report ||
243247
(Object.keys(report.tracesPerQuery).length === 0 &&
@@ -254,9 +258,12 @@ export function ApolloServerPluginUsageReporting<TContext extends BaseContext>(
254258

255259
const protobufError = Report.verify(report);
256260
if (protobufError) {
257-
throw new Error(`Error encoding report: ${protobufError}`);
261+
throw new Error(`Error verifying report: ${protobufError}`);
258262
}
259-
const message = Report.encode(report).finish();
263+
let message: Uint8Array | null = Report.encode(report).finish();
264+
// Let the original protobuf object be garbage collected (helpful if the
265+
// HTTP request hangs).
266+
report = null;
260267

261268
// Potential follow-up: we can compare message.length to
262269
// report.sizeEstimator.bytes and use it to "learn" if our estimation is
@@ -271,45 +278,44 @@ export function ApolloServerPluginUsageReporting<TContext extends BaseContext>(
271278
);
272279
}
273280

274-
const compressed = await new Promise<Buffer>((resolve, reject) => {
275-
// The protobuf library gives us a Uint8Array. Node 8's zlib lets us
276-
// pass it directly; convert for the sake of Node 6. (No support right
277-
// now for Node 4, which lacks Buffer.from.)
278-
const messageBuffer = Buffer.from(
279-
message.buffer as ArrayBuffer,
280-
message.byteOffset,
281-
message.byteLength,
282-
);
283-
gzip(messageBuffer, (err, gzipResult) => {
284-
if (err) {
285-
reject(err);
286-
} else {
287-
resolve(gzipResult);
288-
}
289-
});
290-
});
281+
const compressed = await gzipPromise(message);
282+
// Let the uncompressed message be garbage collected (helpful if the
283+
// HTTP request is slow).
284+
message = null;
291285

292286
// Wrap fetcher with async-retry for automatic retrying
293287
const fetcher: Fetcher = options.fetcher ?? fetch;
294288
const response: FetcherResponse = await retry(
295289
// Retry on network errors and 5xx HTTP
296290
// responses.
297291
async () => {
298-
const curResponse = await fetcher(
299-
(options.endpointUrl ||
300-
'https://usage-reporting.api.apollographql.com') +
301-
'/api/ingress/traces',
302-
{
303-
method: 'POST',
304-
headers: {
305-
'user-agent': 'ApolloServerPluginUsageReporting',
306-
'x-api-key': key,
307-
'content-encoding': 'gzip',
308-
accept: 'application/json',
292+
// Note that once we require Node v16 we can use its global
293+
// AbortController instead of the one from `node-abort-controller`.
294+
const controller = new AbortController();
295+
const abortTimeout = setTimeout(() => {
296+
controller.abort();
297+
}, options.requestTimeoutMs ?? 30_000);
298+
let curResponse;
299+
try {
300+
curResponse = await fetcher(
301+
(options.endpointUrl ||
302+
'https://usage-reporting.api.apollographql.com') +
303+
'/api/ingress/traces',
304+
{
305+
method: 'POST',
306+
headers: {
307+
'user-agent': 'ApolloServerPluginUsageReporting',
308+
'x-api-key': key,
309+
'content-encoding': 'gzip',
310+
accept: 'application/json',
311+
},
312+
body: compressed,
313+
signal: controller.signal,
309314
},
310-
body: compressed,
311-
},
312-
);
315+
);
316+
} finally {
317+
clearTimeout(abortTimeout);
318+
}
313319

314320
if (curResponse.status >= 500 && curResponse.status < 600) {
315321
throw new Error(

0 commit comments

Comments
 (0)