@@ -180,15 +180,23 @@ var _ = Describe("SourceManager", func() {
180
180
err := sourceManager .AddSource (TestCollection , complexURL , DefaultUpdateInterval )
181
181
Expect (err ).To (BeNil ())
182
182
183
- // Verify the source was added with sanitized filename
184
- sources := kb .GetExternalSources ()
185
- Expect (sources ).To (HaveLen (1 ))
186
- Expect (sources [0 ].URL ).To (Equal (complexURL ))
183
+ sourceManager .Start ()
184
+ defer sourceManager .Stop ()
185
+
186
+ // Wait for initial content to be fetched
187
+ Eventually (func () []string {
188
+ return kb .ListDocuments ()
189
+ }, TestTimeout , TestPollingInterval ).Should (HaveLen (1 ))
190
+
191
+ // Let it run for 2 minutes and check for duplicates
192
+ Consistently (func () []string {
193
+ return kb .ListDocuments ()
194
+ }, 2 * time .Minute , 5 * time .Second ).Should (HaveLen (1 ))
187
195
188
196
// List documents to verify the sanitized filename
189
197
docs := kb .ListDocuments ()
190
198
Expect (docs ).To (HaveLen (1 ))
191
- Expect (docs [0 ]).To (ContainSubstring ("example-com-path-query-value-param-123-section" ))
199
+ Expect (docs [0 ]).To (ContainSubstring ("source-foo-https- example-com-path-query-value-param-123-section.txt " ))
192
200
})
193
201
})
194
202
@@ -228,7 +236,7 @@ var _ = Describe("SourceManager", func() {
228
236
229
237
It ("should prevent duplicate content with frequent updates" , func () {
230
238
// Add a source with a very short update interval
231
- err := sourceManager .AddSource (TestCollection , "https://raw.githubusercontent.com/mudler/LocalRecall/main/README.md " , 1 * time .Second )
239
+ err := sourceManager .AddSource (TestCollection , "https://en.wikipedia.org/wiki/Black-crowned_barwing " , 1 * time .Second )
232
240
Expect (err ).To (BeNil ())
233
241
234
242
// Start the background service
@@ -237,19 +245,23 @@ var _ = Describe("SourceManager", func() {
237
245
// Wait for initial content to be fetched
238
246
Eventually (func () []string {
239
247
return kb .ListDocuments ()
240
- }, TestTimeout , TestPollingInterval ).Should (HaveLen (1 ))
248
+ }, 2 * time . Minute , 5 * time . Second ).Should (HaveLen (1 ))
241
249
242
250
// Let it run for 2 minutes and check for duplicates
243
- Consistently (func () []string {
244
- return kb .ListDocuments ()
245
- }, 2 * time .Minute , 5 * time .Second ).Should (HaveLen (1 ))
251
+ Consistently (func () int {
252
+ e , ok := kb .Engine .(* engine.ChromemDB )
253
+ Expect (ok ).To (BeTrue ())
254
+ return e .Count ()
255
+ }, 3 * time .Minute , 5 * time .Second ).Should (Equal (25 ))
246
256
247
257
// Verify that search results don't contain duplicates
248
258
Consistently (func () bool {
249
- results , err := kb .Engine .Search ("What is LocalRecall ?" , 10 )
259
+ results , err := kb .Engine .Search ("What is the Black-crowned barwing ?" , 3 )
250
260
if err != nil {
261
+ fmt .Println ("Error searching for content" , err )
251
262
return false
252
263
}
264
+
253
265
// Check for duplicate content
254
266
seen := make (map [string ]bool )
255
267
for _ , r := range results {
0 commit comments