Skip to content

Commit 57dfe69

Browse files
authored
Merge pull request #15 from mudler/feat/external_sources
feat: external sources management
2 parents 7cfd2bb + a114bd9 commit 57dfe69

12 files changed

+714
-45
lines changed

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,26 @@ curl -X DELETE $BASE_URL/collections/myCollection/entry/delete \
203203
-d '{"entry":"file.txt"}'
204204
```
205205

206+
- **Add External Source**:
207+
208+
```sh
209+
curl -X POST $BASE_URL/collections/myCollection/sources \
210+
-H "Content-Type: application/json" \
211+
-d '{"url":"https://example.com", "update_interval":30}'
212+
```
213+
214+
The `update_interval` is specified in minutes. If not provided, it defaults to 60 minutes.
215+
216+
- **Remove External Source**:
217+
218+
```sh
219+
curl -X DELETE $BASE_URL/collections/myCollection/sources \
220+
-H "Content-Type: application/json" \
221+
-d '{"url":"https://example.com"}'
222+
```
223+
224+
External sources are automatically monitored and updated in the background. The content is periodically fetched and added to the collection, making it searchable through the regular search endpoint.
225+
206226
---
207227

208228
## 📝 License

main.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77

88
"github.com/labstack/echo/v4"
99
"github.com/labstack/echo/v4/middleware"
10+
"github.com/mudler/localrecall/rag"
1011
"github.com/sashabaranov/go-openai"
1112
)
1213

@@ -20,6 +21,7 @@ var (
2021
vectorEngine = os.Getenv("VECTOR_ENGINE")
2122
maxChunkingSize = os.Getenv("MAX_CHUNKING_SIZE")
2223
apiKeys = os.Getenv("API_KEYS")
24+
sourceManager = rag.NewSourceManager()
2325
)
2426

2527
func init() {
@@ -38,6 +40,9 @@ func init() {
3840
if vectorEngine == "" {
3941
vectorEngine = "chromem"
4042
}
43+
44+
// Start the source manager
45+
sourceManager.Start()
4146
}
4247

4348
func startAPI(listenAddress string) {

rag/chunking.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
"jaytaylor.com/html2text"
1616
)
1717

18-
func getWebPage(url string) (string, error) {
18+
func GetWebPage(url string) (string, error) {
1919
resp, err := http.Get(url)
2020
if err != nil {
2121
return "", err
@@ -32,7 +32,7 @@ func getWebPage(url string) (string, error) {
3232
func getWebSitemap(url string) (res []string, err error) {
3333
err = sitemap.ParseFromSite(url, func(e sitemap.Entry) error {
3434
xlog.Info("Sitemap page: " + e.GetLocation())
35-
content, err := getWebPage(e.GetLocation())
35+
content, err := GetWebPage(e.GetLocation())
3636
if err == nil {
3737
res = append(res, content)
3838
}

rag/engine.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
package rag
22

33
import (
4+
"github.com/mudler/localrecall/rag/engine"
45
"github.com/mudler/localrecall/rag/types"
56
)
67

78
type Engine interface {
8-
Store(s string, meta map[string]string) error
9+
Store(s string, metadata map[string]string) (engine.Result, error)
910
Reset() error
1011
Search(s string, similarEntries int) ([]types.Result, error)
1112
Count() int
13+
Delete(where map[string]string, whereDocuments map[string]string, ids ...string) error
14+
GetByID(id string) (types.Result, error)
1215
}

rag/engine/chromem.go

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,20 +83,40 @@ func (c *ChromemDB) embedding() chromem.EmbeddingFunc {
8383
)
8484
}
8585

86-
func (c *ChromemDB) Store(s string, metadata map[string]string) error {
86+
func (c *ChromemDB) Store(s string, metadata map[string]string) (Result, error) {
8787
defer func() {
8888
c.index++
8989
}()
9090
if s == "" {
91-
return fmt.Errorf("empty string")
91+
return Result{}, fmt.Errorf("empty string")
9292
}
93-
return c.collection.AddDocuments(context.Background(), []chromem.Document{
93+
94+
if err := c.collection.AddDocuments(context.Background(), []chromem.Document{
9495
{
9596
Metadata: metadata,
9697
Content: s,
9798
ID: fmt.Sprint(c.index),
9899
},
99-
}, runtime.NumCPU())
100+
}, runtime.NumCPU()); err != nil {
101+
return Result{}, err
102+
}
103+
104+
return Result{
105+
ID: fmt.Sprint(c.index),
106+
}, nil
107+
}
108+
109+
func (c *ChromemDB) Delete(where map[string]string, whereDocuments map[string]string, ids ...string) error {
110+
return c.collection.Delete(context.Background(), where, whereDocuments, ids...)
111+
}
112+
113+
func (c *ChromemDB) GetByID(id string) (types.Result, error) {
114+
res, err := c.collection.GetByID(context.Background(), id)
115+
if err != nil {
116+
return types.Result{}, err
117+
}
118+
119+
return types.Result{ID: res.ID, Metadata: res.Metadata, Content: res.Content}, nil
100120
}
101121

102122
func (c *ChromemDB) Search(s string, similarEntries int) ([]types.Result, error) {

rag/engine/localai.go

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,19 @@ func (db *LocalAIRAGDB) Count() int {
3131
return 0
3232
}
3333

34-
func (db *LocalAIRAGDB) Store(s string, metadata map[string]string) error {
34+
func (db *LocalAIRAGDB) Store(s string, metadata map[string]string) (Result, error) {
3535
resp, err := db.openaiClient.CreateEmbeddings(context.TODO(),
3636
openai.EmbeddingRequestStrings{
3737
Input: []string{s},
3838
Model: openai.EmbeddingModel(db.embeddingModel),
3939
},
4040
)
4141
if err != nil {
42-
return fmt.Errorf("error getting keys: %v", err)
42+
return Result{}, fmt.Errorf("error getting keys: %v", err)
4343
}
4444

4545
if len(resp.Data) == 0 {
46-
return fmt.Errorf("no response from OpenAI API")
46+
return Result{}, fmt.Errorf("no response from OpenAI API")
4747
}
4848

4949
embedding := resp.Data[0].Embedding
@@ -54,10 +54,22 @@ func (db *LocalAIRAGDB) Store(s string, metadata map[string]string) error {
5454
}
5555
err = db.client.Set(setReq)
5656
if err != nil {
57-
return fmt.Errorf("error setting keys: %v", err)
57+
return Result{}, fmt.Errorf("error setting keys: %v", err)
5858
}
5959

60-
return nil
60+
fmt.Println("LocalAI stores don't support IDs, so we can't delete entries once created. This is not implemented yet.")
61+
return Result{
62+
// TODO: LocalAI should return an ID so can be properly deleted. This is not implemented now
63+
ID: "",
64+
}, nil
65+
}
66+
67+
func (db *LocalAIRAGDB) Delete(where map[string]string, whereDocuments map[string]string, ids ...string) error {
68+
return fmt.Errorf("not implemented")
69+
}
70+
71+
func (db *LocalAIRAGDB) GetByID(id string) (types.Result, error) {
72+
return types.Result{}, fmt.Errorf("not implemented")
6173
}
6274

6375
func (db *LocalAIRAGDB) Search(s string, similarEntries int) ([]types.Result, error) {

rag/engine/types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package engine
2+
3+
type Result struct {
4+
ID string `json:"id"`
5+
}

0 commit comments

Comments
 (0)