Skip to content

Commit

Permalink
fix indexing of cookie header: (#714)
Browse files Browse the repository at this point in the history
- add fields option for adding req.http:cookie and referrer entries to
the cdxj
- update to warcio 2.4.0 to support this functionality
  • Loading branch information
ikreymer authored Nov 14, 2024
1 parent 60c84b3 commit f56d650
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 3 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"tsc": "^2.0.4",
"undici": "^6.18.2",
"uuid": "8.3.2",
"warcio": "^2.3.1",
"warcio": "^2.4.0",
"ws": "^7.4.4",
"yargs": "^17.7.2"
},
Expand Down
7 changes: 5 additions & 2 deletions src/util/warcwriter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import fs from "fs";
import { Writable } from "stream";
import path from "path";

import { CDXIndexer, WARCRecord } from "warcio";
import { CDXIndexer, WARCRecord, DEFAULT_CDX_FIELDS } from "warcio";
import { WARCSerializer } from "warcio/node";
import { logger, formatErr, LogDetails, LogContext } from "./logger.js";
import type { IndexerOffsetLength } from "warcio";
Expand Down Expand Up @@ -76,7 +76,10 @@ export class WARCWriter implements IndexerOffsetLength {
this.recordLength = 0;

if (this.warcCdxDir) {
this.indexer = new CDXIndexer({ format: "cdxj" });
this.indexer = new CDXIndexer({
format: "cdxj",
fields: [...DEFAULT_CDX_FIELDS, "req.http:cookie", "referrer"],
});
}

return filename;
Expand Down
14 changes: 14 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5295,6 +5295,20 @@ warcio@^2.3.1:
uuid-random "^1.3.2"
yargs "^17.6.2"

warcio@^2.4.0:
version "2.4.0"
resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.4.0.tgz#13bae2837f1bbf5cf7585f75857e6311d30557bd"
integrity sha512-EfxXCgsnZ35CGf2j99QBMyB6EI98KEQ6YmeER+8Lnv/4KFJ3thT76PiX37HfZVbPJS21JihA0Eddjk9QBQRlPg==
dependencies:
"@types/pako" "^1.0.7"
"@types/stream-buffers" "^3.0.7"
base32-encode "^2.0.0"
hash-wasm "^4.9.0"
pako "^1.0.11"
tempy "^3.1.0"
uuid-random "^1.3.2"
yargs "^17.7.2"

web-encoding@^1.1.5:
version "1.1.5"
resolved "https://registry.yarnpkg.com/web-encoding/-/web-encoding-1.1.5.tgz#fc810cf7667364a6335c939913f5051d3e0c4864"
Expand Down

0 comments on commit f56d650

Please sign in to comment.