Skip to content

Commit b51a233

Browse files
authored
Add ChunkMatch.BestLineMatch to return the best-scoring line (sourcegraph#884)
This PR adds a new field `ChunkMatch.BestLineMatch` with the line number of top-scoring line in the chunk. This will let us address a long-standing issue with our new flexible keyword search, where chunk matches can become very large. Since our search results UX only shows the start of a chunk, the most relevant line may not even be visible. With this information on the best line match, we can adjust the search results UX to center the chunk on the most relevant line. Relates to [SPLF-188](https://linear.app/sourcegraph/issue/SPLF-188/ensure-the-best-scoring-line-match-is-shown-in-search-results)
1 parent d0990e0 commit b51a233

File tree

8 files changed

+181
-104
lines changed

8 files changed

+181
-104
lines changed

api.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,13 @@ type ChunkMatch struct {
161161
// beginning of a line (Column will always be 1).
162162
ContentStart Location
163163

164+
// Score is the overall relevance score of this chunk.
164165
Score float64
166+
167+
// BestLineMatch is the line number of the highest-scoring line match in this chunk.
168+
// The line number represents the index in the full file, and is 1-based. If FileName: true,
169+
// this number will be 0.
170+
BestLineMatch uint32
165171
}
166172

167173
func (cm *ChunkMatch) sizeBytes() (sz uint64) {

api_proto.go

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,14 @@ func ChunkMatchFromProto(p *proto.ChunkMatch) ChunkMatch {
9696
}
9797

9898
return ChunkMatch{
99-
Content: p.GetContent(),
100-
ContentStart: LocationFromProto(p.GetContentStart()),
101-
FileName: p.GetFileName(),
102-
Ranges: ranges,
103-
SymbolInfo: symbols,
104-
Score: p.GetScore(),
105-
DebugScore: p.GetDebugScore(),
99+
Content: p.GetContent(),
100+
ContentStart: LocationFromProto(p.GetContentStart()),
101+
FileName: p.GetFileName(),
102+
Ranges: ranges,
103+
SymbolInfo: symbols,
104+
Score: p.GetScore(),
105+
BestLineMatch: p.GetBestLineMatch(),
106+
DebugScore: p.GetDebugScore(),
106107
}
107108
}
108109

@@ -118,13 +119,14 @@ func (cm *ChunkMatch) ToProto() *proto.ChunkMatch {
118119
}
119120

120121
return &proto.ChunkMatch{
121-
Content: cm.Content,
122-
ContentStart: cm.ContentStart.ToProto(),
123-
FileName: cm.FileName,
124-
Ranges: ranges,
125-
SymbolInfo: symbolInfo,
126-
Score: cm.Score,
127-
DebugScore: cm.DebugScore,
122+
Content: cm.Content,
123+
ContentStart: cm.ContentStart.ToProto(),
124+
FileName: cm.FileName,
125+
Ranges: ranges,
126+
SymbolInfo: symbolInfo,
127+
Score: cm.Score,
128+
BestLineMatch: cm.BestLineMatch,
129+
DebugScore: cm.DebugScore,
128130
}
129131
}
130132

api_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ func TestMatchSize(t *testing.T) {
149149
size: 256,
150150
}, {
151151
v: ChunkMatch{},
152-
size: 112,
152+
size: 120,
153153
}, {
154154
v: candidateMatch{},
155155
size: 80,

build/scoring_test.go

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,12 @@ import (
2727
)
2828

2929
type scoreCase struct {
30-
fileName string
31-
content []byte
32-
query query.Q
33-
language string
34-
wantScore float64
30+
fileName string
31+
content []byte
32+
query query.Q
33+
language string
34+
wantScore float64
35+
wantBestLineMatch uint32
3536
}
3637

3738
func TestFileNameMatch(t *testing.T) {
@@ -79,6 +80,8 @@ func TestBM25(t *testing.T) {
7980
language: "Java",
8081
// bm25-score: 0.58 <- sum-termFrequencyScore: 14.00, length-ratio: 1.00
8182
wantScore: 0.58,
83+
// line 5: private final int exampleField;
84+
wantBestLineMatch: 5,
8285
}, {
8386
// Matches only on content
8487
fileName: "example.java",
@@ -91,6 +94,8 @@ func TestBM25(t *testing.T) {
9194
language: "Java",
9295
// bm25-score: 1.81 <- sum-termFrequencyScore: 116.00, length-ratio: 1.00
9396
wantScore: 1.81,
97+
// line 3: public class InnerClasses {
98+
wantBestLineMatch: 3,
9499
},
95100
{
96101
// Matches only on filename
@@ -130,6 +135,8 @@ func TestJava(t *testing.T) {
130135
language: "Java",
131136
// 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word)
132137
wantScore: 6550,
138+
// line 37: public class InnerClass implements InnerInterface<Integer, Integer> {
139+
wantBestLineMatch: 37,
133140
},
134141
{
135142
fileName: "example.java",
@@ -138,6 +145,8 @@ func TestJava(t *testing.T) {
138145
language: "Java",
139146
// 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word)
140147
wantScore: 7000,
148+
// line 32: public static class InnerStaticClass {
149+
wantBestLineMatch: 32,
141150
},
142151
{
143152
fileName: "example.java",
@@ -146,6 +155,8 @@ func TestJava(t *testing.T) {
146155
language: "Java",
147156
// 7000 (symbol) + 900 (Java enum) + 500 (word)
148157
wantScore: 8400,
158+
// line 16: public enum InnerEnum {
159+
wantBestLineMatch: 16,
149160
},
150161
{
151162
fileName: "example.java",
@@ -154,6 +165,8 @@ func TestJava(t *testing.T) {
154165
language: "Java",
155166
// 7000 (symbol) + 800 (Java interface) + 500 (word)
156167
wantScore: 8300,
168+
// line 22: public interface InnerInterface<A, B> {
169+
wantBestLineMatch: 22,
157170
},
158171
{
159172
fileName: "example.java",
@@ -162,6 +175,8 @@ func TestJava(t *testing.T) {
162175
language: "Java",
163176
// 7000 (symbol) + 700 (Java method) + 500 (word)
164177
wantScore: 8200,
178+
// line 44: public void innerMethod() {
179+
wantBestLineMatch: 44,
165180
},
166181
{
167182
fileName: "example.java",
@@ -170,6 +185,8 @@ func TestJava(t *testing.T) {
170185
language: "Java",
171186
// 7000 (symbol) + 600 (Java field) + 500 (word)
172187
wantScore: 8100,
188+
// line 38: private final int field;
189+
wantBestLineMatch: 38,
173190
},
174191
{
175192
fileName: "example.java",
@@ -178,6 +195,8 @@ func TestJava(t *testing.T) {
178195
language: "Java",
179196
// 7000 (symbol) + 500 (Java enum constant) + 500 (word)
180197
wantScore: 8000,
198+
// line 18: B,
199+
wantBestLineMatch: 18,
181200
},
182201
// 2 Atoms (1x content and 1x filename)
183202
{
@@ -187,6 +206,8 @@ func TestJava(t *testing.T) {
187206
language: "Java",
188207
// 5500 (edge symbol) + 600 (Java field) + 500 (word) + 200 (atom)
189208
wantScore: 6800,
209+
// line 5: private final int exampleField;
210+
wantBestLineMatch: 5,
190211
},
191212
// 3 Atoms (2x content, 1x filename)
192213
{
@@ -199,6 +220,8 @@ func TestJava(t *testing.T) {
199220
language: "Java",
200221
// 7000 (symbol) + 700 (Java method) + 500 (word) + 266.67 (atom)
201222
wantScore: 8466,
223+
// line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
224+
wantBestLineMatch: 54,
202225
},
203226
// 4 Atoms (4x content)
204227
{
@@ -213,6 +236,8 @@ func TestJava(t *testing.T) {
213236
language: "Java",
214237
// 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom)
215238
wantScore: 8700,
239+
// line 16: public enum InnerEnum {
240+
wantBestLineMatch: 16,
216241
},
217242
{
218243
fileName: "example.java",
@@ -221,6 +246,8 @@ func TestJava(t *testing.T) {
221246
language: "Java",
222247
// 4000 (overlap Symbol) + 700 (Java method) + 50 (partial word)
223248
wantScore: 4750,
249+
// line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
250+
wantBestLineMatch: 54,
224251
},
225252
{
226253
fileName: "example.java",
@@ -229,6 +256,8 @@ func TestJava(t *testing.T) {
229256
language: "Java",
230257
// 7000 (Symbol) + 900 (Java enum) + 500 (word)
231258
wantScore: 8400,
259+
// line 16: public enum InnerEnum {
260+
wantBestLineMatch: 16,
232261
},
233262
{
234263
fileName: "example.java",
@@ -237,6 +266,8 @@ func TestJava(t *testing.T) {
237266
language: "Java",
238267
// 5500 (edge Symbol) + 900 (Java enum) + 500 (word)
239268
wantScore: 6900,
269+
// line 16: public enum InnerEnum {
270+
wantBestLineMatch: 16,
240271
},
241272
{
242273
fileName: "example.java",
@@ -245,6 +276,8 @@ func TestJava(t *testing.T) {
245276
language: "Java",
246277
// 4000 (overlap Symbol) + 900 (Java enum) + 500 (word)
247278
wantScore: 5400,
279+
// line 16: public enum InnerEnum {
280+
wantBestLineMatch: 16,
248281
},
249282
}
250283

@@ -640,6 +673,16 @@ func checkScoring(t *testing.T, c scoreCase, useBM25 bool, parserType ctags.CTag
640673
t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].ChunkMatches[0].DebugScore)
641674
}
642675

676+
if c.wantBestLineMatch != 0 {
677+
if len(srs.Files[0].ChunkMatches) == 0 {
678+
t.Fatalf("want BestLineMatch %d, but no chunk matches were returned", c.wantBestLineMatch)
679+
}
680+
chunkMatch := srs.Files[0].ChunkMatches[0]
681+
if chunkMatch.BestLineMatch != c.wantBestLineMatch {
682+
t.Fatalf("want BestLineMatch %d, got %d", c.wantBestLineMatch, chunkMatch.BestLineMatch)
683+
}
684+
}
685+
643686
if got := srs.Files[0].Language; got != c.language {
644687
t.Fatalf("want %s, got %s", c.language, got)
645688
}

0 commit comments

Comments
 (0)