Skip to content
This repository was archived by the owner on Jan 2, 2025. It is now read-only.

Commit c256f6f

Browse files
authored
Add line filters for file endpoint (#412)
* Add line filters for file endpoint * Fix clippy
1 parent fb9bd0c commit c256f6f

File tree

2 files changed

+134
-37
lines changed

2 files changed

+134
-37
lines changed

server/bleep/src/indexes/file.rs

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use std::{
88
},
99
};
1010

11-
use anyhow::{Context, Result};
11+
use anyhow::Result;
1212
use async_trait::async_trait;
1313
use scc::hash_map::Entry;
1414
use tantivy::{
@@ -266,7 +266,7 @@ impl Indexable for File {
266266
}
267267

268268
impl Indexer<File> {
269-
pub async fn file_body(&self, file_disk_path: &str) -> Result<String> {
269+
pub async fn file_body(&self, file_disk_path: &str) -> Result<ContentDocument> {
270270
// Mostly taken from `by_path`, below.
271271
//
272272
// TODO: This can be unified with `by_path` below, but we first need to decide on a unified
@@ -280,26 +280,7 @@ impl Indexer<File> {
280280
IndexRecordOption::Basic,
281281
);
282282

283-
let collector = TopDocs::with_limit(1);
284-
let search_results = searcher
285-
.search(&query, &collector)
286-
.context("failed to search index")?;
287-
288-
match search_results.as_slice() {
289-
[] => Err(anyhow::Error::msg("no path found")),
290-
[(_, doc_addr)] => Ok(searcher
291-
.doc(*doc_addr)
292-
.context("failed to get document by address")?
293-
.get_first(self.source.content)
294-
.context("content field was missing")?
295-
.as_text()
296-
.context("content field did not contain text")?
297-
.to_owned()),
298-
_ => {
299-
warn!("TopDocs is not limited to 1 and index contains duplicates");
300-
Err(anyhow::Error::msg("multiple paths returned"))
301-
}
302-
}
283+
self.top_hit(Box::new(query), searcher).await
303284
}
304285

305286
pub async fn by_path(
@@ -311,7 +292,6 @@ impl Indexer<File> {
311292
let searcher = reader.searcher();
312293

313294
let file_index = searcher.index();
314-
let file_source = &self.source;
315295

316296
// query the `relative_path` field of the `File` index, using tantivy's query language
317297
//
@@ -326,6 +306,16 @@ impl Indexer<File> {
326306
))
327307
.expect("failed to parse tantivy query");
328308

309+
self.top_hit(query, searcher).await
310+
}
311+
312+
async fn top_hit(
313+
&self,
314+
query: Box<dyn tantivy::query::Query>,
315+
searcher: tantivy::Searcher,
316+
) -> Result<ContentDocument> {
317+
let file_source = &self.source;
318+
329319
let collector = TopDocs::with_limit(1);
330320
let search_results = searcher
331321
.search(&query, &collector)

server/bleep/src/webserver/file.rs

Lines changed: 121 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,41 +2,148 @@ use std::sync::Arc;
22

33
use axum::{
44
extract::{Path, Query},
5-
response::IntoResponse,
6-
Extension,
5+
Extension, Json,
76
};
87

98
use super::prelude::*;
109

11-
#[derive(Debug, serde::Deserialize)]
12-
pub struct Params {
13-
pub rev: Option<String>,
10+
#[derive(Debug, serde::Deserialize, Default)]
11+
pub(super) struct Params {
12+
/// 1-indexed line number at which to start the snippet
13+
pub line_start: Option<isize>,
14+
15+
/// 1-indexed line number at which to end the snippet
16+
pub line_end: Option<usize>,
1417
}
1518

1619
#[derive(serde::Serialize)]
17-
pub struct FileResponse {
20+
pub(super) struct FileResponse {
1821
contents: String,
1922
}
2023

2124
impl super::ApiResponse for FileResponse {}
2225

23-
pub async fn handle(
26+
pub(super) async fn handle<'a>(
2427
Path(path): Path<String>,
2528
Query(params): Query<Params>,
2629
Extension(indexes): Extension<Arc<Indexes>>,
27-
) -> impl IntoResponse {
30+
) -> Result<Json<super::Response<'a>>, Error> {
2831
// Strip leading slash, always present.
2932
let file_disk_path = &path[1..];
3033

31-
if params.rev.is_some() {
32-
return Err(Error::internal("the `rev` parameter is not yet supported"));
33-
}
34-
35-
let contents = indexes
34+
let doc = indexes
3635
.file
3736
.file_body(file_disk_path)
3837
.await
3938
.map_err(Error::internal)?;
4039

41-
Ok(json(FileResponse { contents }))
40+
Ok(json(FileResponse {
41+
contents: split_by_lines(&doc.content, &doc.line_end_indices, &params)?.to_string(),
42+
}))
43+
}
44+
45+
fn split_by_lines<'a>(text: &'a str, indices: &[u32], params: &Params) -> Result<&'a str, Error> {
46+
let char_start = match params.line_start {
47+
Some(line_start) if line_start == 1 => 0,
48+
Some(line_start) if line_start > 1 => {
49+
(indices
50+
.get(line_start as usize - 2)
51+
.ok_or_else(|| Error::user("invalid line number"))?
52+
+ 1) as usize
53+
}
54+
Some(_) => return Err(Error::user("line numbers are 1-indexed!")),
55+
_ => 0,
56+
};
57+
58+
let line_end = params.line_end.unwrap_or(indices.len()) - 1;
59+
let char_end = *indices
60+
.get(line_end)
61+
.ok_or_else(|| Error::user("invalid line number"))? as usize;
62+
63+
Ok(&text[char_start..=char_end])
64+
}
65+
66+
#[cfg(test)]
67+
mod tests {
68+
use super::*;
69+
70+
#[test]
71+
fn no_params() {
72+
let text = r#"aaaaaa
73+
bbbbbb
74+
cccccc
75+
"#;
76+
77+
let indices = text
78+
.match_indices('\n')
79+
.map(|(i, _)| i as u32)
80+
.collect::<Vec<_>>();
81+
82+
println!("{indices:?}");
83+
84+
assert_eq!(
85+
split_by_lines(
86+
text,
87+
&indices,
88+
&Params {
89+
line_start: None,
90+
line_end: None
91+
}
92+
)
93+
.unwrap_or_else(|_| panic!("bad")),
94+
text
95+
);
96+
97+
assert_eq!(
98+
split_by_lines(
99+
text,
100+
&indices,
101+
&Params {
102+
line_start: Some(1),
103+
line_end: None
104+
}
105+
)
106+
.unwrap_or_else(|_| panic!("bad")),
107+
text
108+
);
109+
110+
assert_eq!(
111+
split_by_lines(
112+
text,
113+
&indices,
114+
&Params {
115+
line_start: Some(2),
116+
line_end: None
117+
}
118+
)
119+
.unwrap_or_else(|_| panic!("bad")),
120+
&text[7..]
121+
);
122+
123+
assert_eq!(
124+
split_by_lines(
125+
text,
126+
&indices,
127+
&Params {
128+
line_start: Some(3),
129+
line_end: Some(3),
130+
}
131+
)
132+
.unwrap_or_else(|_| panic!("bad")),
133+
&text[14..]
134+
);
135+
136+
assert_eq!(
137+
split_by_lines(
138+
text,
139+
&indices,
140+
&Params {
141+
line_start: Some(2),
142+
line_end: Some(3),
143+
}
144+
)
145+
.unwrap_or_else(|_| panic!("bad")),
146+
&text[7..]
147+
);
148+
}
42149
}

0 commit comments

Comments
 (0)