Skip to content
This repository was archived by the owner on Jan 2, 2025. It is now read-only.

Commit eec3634

Browse files
rmuller-mlggordonhalloppiliappan
authored
[BLO-1840] Determinism semantic search (first call) (#1121)
* gpt3 temperature 0, and using modified rake that gives always same order and do not ignore underscores and slashes * gpt3 temperature 0, and using modified rake that gives always same order and do not ignore underscores and slashes * rebase and add modified github repo to Cargo toml * handling the case where keywords is empty * move stopwords removal logic into bleep * attribution * address review comments * revert * Update server/bleep/src/query/stopwords.rs Co-authored-by: akshay <nerdy@peppe.rs> * address more comments --------- Co-authored-by: Gabriel Gordon-Hall <ggordonhall@gmail.com> Co-authored-by: akshay <nerdy@peppe.rs>
1 parent b6a1eb3 commit eec3634

File tree

7 files changed

+104
-110
lines changed

7 files changed

+104
-110
lines changed

Cargo.lock

Lines changed: 25 additions & 81 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

server/bleep/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ git-version = "0.3.5"
111111
gix = { git = "https://github.com/BloopAI/gitoxide", version="0.55.2", features = ["blocking-http-transport-reqwest-rust-tls-no-trust-dns", "pack-cache-lru-static"] }
112112

113113
# semantic
114-
rake = "0.1"
115114
qdrant-client = { version = "1.5.0", default-features = false }
116115
tiktoken-rs = "0.4.5"
117116
tokenizers = { version = "0.14.0", default-features = false, features = ["progressbar", "cli", "onig", "esaxx_fast"] }

server/bleep/src/agent.rs

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,14 @@ use std::{sync::Arc, time::Duration};
22

33
use anyhow::{anyhow, Context, Result};
44
use futures::{Future, TryStreamExt};
5-
use once_cell::sync::OnceCell;
6-
use rake::*;
75
use tokio::sync::mpsc::Sender;
86
use tracing::{debug, error, info, instrument};
97

108
use crate::{
119
analytics::{EventData, QueryEvent},
1210
indexes::reader::{ContentDocument, FileDocument},
1311
llm_gateway::{self, api::FunctionCall},
14-
query::parser,
12+
query::{parser, stopwords::remove_stopwords},
1513
repo::RepoRef,
1614
semantic,
1715
webserver::{
@@ -43,19 +41,6 @@ mod tools {
4341
pub mod proc;
4442
}
4543

46-
static STOPWORDS: OnceCell<StopWords> = OnceCell::new();
47-
static STOP_WORDS_LIST: &str = include_str!("stopwords.txt");
48-
49-
fn stop_words() -> &'static StopWords {
50-
STOPWORDS.get_or_init(|| {
51-
let mut sw = StopWords::new();
52-
for w in STOP_WORDS_LIST.lines() {
53-
sw.insert(w.to_string());
54-
}
55-
sw
56-
})
57-
}
58-
5944
pub enum Error {
6045
Timeout(Duration),
6146
Processing(anyhow::Error),
@@ -196,23 +181,14 @@ impl Agent {
196181

197182
// Always make a code search for the user query on the first exchange
198183
if self.exchanges.len() == 1 {
199-
// Extract keywords from the query
200184
let keywords = {
201-
let sw = stop_words();
202-
let r = Rake::new(sw.clone());
203-
let keywords = r.run(s);
204-
205-
if keywords.is_empty() {
185+
let keys = remove_stopwords(s);
186+
if keys.is_empty() {
206187
s.clone()
207188
} else {
208-
keywords
209-
.iter()
210-
.map(|k| k.keyword.clone())
211-
.collect::<Vec<_>>()
212-
.join(" ")
189+
keys
213190
}
214191
};
215-
216192
self.code_search(&keywords).await?;
217193
}
218194
s.clone()

server/bleep/src/agent/tools/code.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ impl Agent {
110110
.llm_gateway
111111
.clone()
112112
.model("gpt-3.5-turbo-0613")
113+
.temperature(0.0)
113114
.chat(&prompt, None)
114115
.await?;
115116

server/bleep/src/query.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ pub mod languages;
44
pub mod parser;
55
pub mod planner;
66
pub mod ranking;
7+
pub mod stopwords;

0 commit comments

Comments
 (0)