Handle breaking change in ChunkMatch newlines

Previously, zoekt made the curious choice to not include trailing newline characters in each chunk it served in its API, in spite of the posix definition of a line including the trailing newline character, which is respected in many line-oriented unix tools like grep/ripgrep, etc. This mistake has finally been corrected! But it is a breakage that we have to handle here. I've updated the tests to reflect the kind of data that zoekt is actually serving now. See sourcegraph/zoekt#747.
isker · Apr 19, 2024 · 4ef4961 · 4ef4961
1 parent 4a24586
commit 4ef4961
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 26 deletions.
diff --git a/.changeset/dirty-windows-marry.md b/.changeset/dirty-windows-marry.md
@@ -0,0 +1,5 @@
+---
+"neogrok": patch
+---
+
+Handle breaking change in ChunkMatch newlines
diff --git a/src/lib/server/content-parser.test.ts b/src/lib/server/content-parser.test.ts
@@ -37,23 +37,23 @@ describe("parseFileNameMatch", () => {
 describe("parseChunkMatch", () => {
   it("parses chunk matches", () => {
     // Single line.
-    expect(parseChunkMatch(Buffer.from("foo"), [])).toEqual([
+    expect(parseChunkMatch(Buffer.from("foo\n"), [])).toEqual([
       { text: "foo", matchRanges: [] },
     ]);
-    expect(parseChunkMatch(Buffer.from("foo"), [{ start: 0, end: 3 }])).toEqual(
-      [{ text: "foo", matchRanges: [{ start: 0, end: 3 }] }],
-    );
-    expect(parseChunkMatch(Buffer.from("foo"), [{ start: 0, end: 2 }])).toEqual(
-      [{ text: "foo", matchRanges: [{ start: 0, end: 2 }] }],
-    );
-    expect(parseChunkMatch(Buffer.from("foo"), [{ start: 1, end: 3 }])).toEqual(
-      [{ text: "foo", matchRanges: [{ start: 1, end: 3 }] }],
-    );
-    expect(parseChunkMatch(Buffer.from("foo"), [{ start: 1, end: 2 }])).toEqual(
-      [{ text: "foo", matchRanges: [{ start: 1, end: 2 }] }],
-    );
     expect(
-      parseChunkMatch(Buffer.from("foo"), [
+      parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 3 }]),
+    ).toEqual([{ text: "foo", matchRanges: [{ start: 0, end: 3 }] }]);
+    expect(
+      parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 2 }]),
+    ).toEqual([{ text: "foo", matchRanges: [{ start: 0, end: 2 }] }]);
+    expect(
+      parseChunkMatch(Buffer.from("foo\n"), [{ start: 1, end: 3 }]),
+    ).toEqual([{ text: "foo", matchRanges: [{ start: 1, end: 3 }] }]);
+    expect(
+      parseChunkMatch(Buffer.from("foo\n"), [{ start: 1, end: 2 }]),
+    ).toEqual([{ text: "foo", matchRanges: [{ start: 1, end: 2 }] }]);
+    expect(
+      parseChunkMatch(Buffer.from("foo\n"), [
         { start: 1, end: 2 },
         { start: 2, end: 3 },
       ]),
@@ -68,42 +68,42 @@ describe("parseChunkMatch", () => {
     ]);
 
     // Multi-line.
-    expect(parseChunkMatch(Buffer.from("foo\n"), [])).toEqual([
+    expect(parseChunkMatch(Buffer.from("foo\n\n"), [])).toEqual([
       { text: "foo", matchRanges: [] },
       { text: "", matchRanges: [] },
     ]);
     expect(
-      parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 3 }]),
+      parseChunkMatch(Buffer.from("foo\n\n"), [{ start: 0, end: 3 }]),
     ).toEqual([
       { text: "foo", matchRanges: [{ start: 0, end: 3 }] },
       { text: "", matchRanges: [] },
     ]);
     expect(
-      parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 4 }]),
+      parseChunkMatch(Buffer.from("foo\n\n"), [{ start: 0, end: 4 }]),
     ).toEqual([
       { text: "foo", matchRanges: [{ start: 0, end: 3 }] },
       { text: "", matchRanges: [] },
     ]);
 
-    expect(parseChunkMatch(Buffer.from("foo\nbar"), [])).toEqual([
+    expect(parseChunkMatch(Buffer.from("foo\nbar\n"), [])).toEqual([
       { text: "foo", matchRanges: [] },
       { text: "bar", matchRanges: [] },
     ]);
     expect(
-      parseChunkMatch(Buffer.from("foo\nbar"), [{ start: 0, end: 3 }]),
+      parseChunkMatch(Buffer.from("foo\nbar\n"), [{ start: 0, end: 3 }]),
     ).toEqual([
       { text: "foo", matchRanges: [{ start: 0, end: 3 }] },
       { text: "bar", matchRanges: [] },
     ]);
     expect(
-      parseChunkMatch(Buffer.from("foo\nbar"), [{ start: 0, end: 4 }]),
+      parseChunkMatch(Buffer.from("foo\nbar\n"), [{ start: 0, end: 4 }]),
     ).toEqual([
       { text: "foo", matchRanges: [{ start: 0, end: 3 }] },
       { text: "bar", matchRanges: [] },
     ]);
 
     expect(
-      parseChunkMatch(Buffer.from("foo\nbar"), [
+      parseChunkMatch(Buffer.from("foo\nbar\n"), [
         { start: 0, end: 1 },
         { start: 2, end: 5 },
       ]),

diff --git a/src/lib/server/content-parser.ts b/src/lib/server/content-parser.ts
@@ -132,11 +132,10 @@ Array<ContentLine> => {
     }
   }
 
-  // Conclude the current line. Note that if `currentLineText` is length 0,
-  // that is still semantically a line, namely an empty line. `Content` never
-  // naturally has a trailing newline; if there's a newline at the last byte,
-  // this indicates that there is a final line that is empty.
-  lines.push({ text: currentLineText, matchRanges: currentLineMatchRanges });
+  if (currentLineText.length > 0) {
+    // Conclude the current line.
+    lines.push({ text: currentLineText, matchRanges: currentLineMatchRanges });
+  }
 
   return lines;
 };