Skip to content

Commit

Permalink
Handle breaking change in ChunkMatch newlines
Browse files Browse the repository at this point in the history
Previously, zoekt made the curious choice to not include trailing
newline characters in each chunk it served in its API, in spite of the
posix definition of a line including the trailing newline character,
which is respected in many line-oriented unix tools like grep/ripgrep,
etc.

This mistake has finally been corrected! But it is a breakage that we
have to handle here. I've updated the tests to reflect the kind of data
that zoekt is actually serving now.

See sourcegraph/zoekt#747.
  • Loading branch information
isker committed Apr 19, 2024
1 parent 4a24586 commit 4ef4961
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 26 deletions.
5 changes: 5 additions & 0 deletions .changeset/dirty-windows-marry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"neogrok": patch
---

Handle breaking change in ChunkMatch newlines
42 changes: 21 additions & 21 deletions src/lib/server/content-parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,23 @@ describe("parseFileNameMatch", () => {
describe("parseChunkMatch", () => {
it("parses chunk matches", () => {
// Single line.
expect(parseChunkMatch(Buffer.from("foo"), [])).toEqual([
expect(parseChunkMatch(Buffer.from("foo\n"), [])).toEqual([
{ text: "foo", matchRanges: [] },
]);
expect(parseChunkMatch(Buffer.from("foo"), [{ start: 0, end: 3 }])).toEqual(
[{ text: "foo", matchRanges: [{ start: 0, end: 3 }] }],
);
expect(parseChunkMatch(Buffer.from("foo"), [{ start: 0, end: 2 }])).toEqual(
[{ text: "foo", matchRanges: [{ start: 0, end: 2 }] }],
);
expect(parseChunkMatch(Buffer.from("foo"), [{ start: 1, end: 3 }])).toEqual(
[{ text: "foo", matchRanges: [{ start: 1, end: 3 }] }],
);
expect(parseChunkMatch(Buffer.from("foo"), [{ start: 1, end: 2 }])).toEqual(
[{ text: "foo", matchRanges: [{ start: 1, end: 2 }] }],
);
expect(
parseChunkMatch(Buffer.from("foo"), [
parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 3 }]),
).toEqual([{ text: "foo", matchRanges: [{ start: 0, end: 3 }] }]);
expect(
parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 2 }]),
).toEqual([{ text: "foo", matchRanges: [{ start: 0, end: 2 }] }]);
expect(
parseChunkMatch(Buffer.from("foo\n"), [{ start: 1, end: 3 }]),
).toEqual([{ text: "foo", matchRanges: [{ start: 1, end: 3 }] }]);
expect(
parseChunkMatch(Buffer.from("foo\n"), [{ start: 1, end: 2 }]),
).toEqual([{ text: "foo", matchRanges: [{ start: 1, end: 2 }] }]);
expect(
parseChunkMatch(Buffer.from("foo\n"), [
{ start: 1, end: 2 },
{ start: 2, end: 3 },
]),
Expand All @@ -68,42 +68,42 @@ describe("parseChunkMatch", () => {
]);

// Multi-line.
expect(parseChunkMatch(Buffer.from("foo\n"), [])).toEqual([
expect(parseChunkMatch(Buffer.from("foo\n\n"), [])).toEqual([
{ text: "foo", matchRanges: [] },
{ text: "", matchRanges: [] },
]);
expect(
parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 3 }]),
parseChunkMatch(Buffer.from("foo\n\n"), [{ start: 0, end: 3 }]),
).toEqual([
{ text: "foo", matchRanges: [{ start: 0, end: 3 }] },
{ text: "", matchRanges: [] },
]);
expect(
parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 4 }]),
parseChunkMatch(Buffer.from("foo\n\n"), [{ start: 0, end: 4 }]),
).toEqual([
{ text: "foo", matchRanges: [{ start: 0, end: 3 }] },
{ text: "", matchRanges: [] },
]);

expect(parseChunkMatch(Buffer.from("foo\nbar"), [])).toEqual([
expect(parseChunkMatch(Buffer.from("foo\nbar\n"), [])).toEqual([
{ text: "foo", matchRanges: [] },
{ text: "bar", matchRanges: [] },
]);
expect(
parseChunkMatch(Buffer.from("foo\nbar"), [{ start: 0, end: 3 }]),
parseChunkMatch(Buffer.from("foo\nbar\n"), [{ start: 0, end: 3 }]),
).toEqual([
{ text: "foo", matchRanges: [{ start: 0, end: 3 }] },
{ text: "bar", matchRanges: [] },
]);
expect(
parseChunkMatch(Buffer.from("foo\nbar"), [{ start: 0, end: 4 }]),
parseChunkMatch(Buffer.from("foo\nbar\n"), [{ start: 0, end: 4 }]),
).toEqual([
{ text: "foo", matchRanges: [{ start: 0, end: 3 }] },
{ text: "bar", matchRanges: [] },
]);

expect(
parseChunkMatch(Buffer.from("foo\nbar"), [
parseChunkMatch(Buffer.from("foo\nbar\n"), [
{ start: 0, end: 1 },
{ start: 2, end: 5 },
]),
Expand Down
9 changes: 4 additions & 5 deletions src/lib/server/content-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,10 @@ Array<ContentLine> => {
}
}

// Conclude the current line. Note that if `currentLineText` is length 0,
// that is still semantically a line, namely an empty line. `Content` never
// naturally has a trailing newline; if there's a newline at the last byte,
// this indicates that there is a final line that is empty.
lines.push({ text: currentLineText, matchRanges: currentLineMatchRanges });
if (currentLineText.length > 0) {
// Conclude the current line.
lines.push({ text: currentLineText, matchRanges: currentLineMatchRanges });
}

return lines;
};
Expand Down

0 comments on commit 4ef4961

Please sign in to comment.