Skip to content

Commit 0bbf3f2

Browse files
authored
fix(artifact): improve catalog deletion slow issue (#93)
Because deleting a catalog with too many or huge files will be extremely slow. This commit makes the deletion in the background and marks the catalog as deleted.
1 parent 1b135e1 commit 0bbf3f2

File tree

13 files changed

+795
-292
lines changed

13 files changed

+795
-292
lines changed

cmd/main/main.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ func main() {
235235
privatePort := fmt.Sprintf(":%d", config.Config.Server.PrivatePort)
236236
// Wait for interrupt signal to gracefully shutdown the server with a timeout of 5 seconds.
237237
errSig := make(chan error)
238+
defer close(errSig)
238239

239240
go func() {
240241
privateListener, err := net.Listen("tcp", privatePort)
@@ -266,21 +267,23 @@ func main() {
266267
// kill -2 is syscall.SIGINT
267268
// kill -9 is syscall.SIGKILL but can't be catch, so don't need add it
268269
quitSig := make(chan os.Signal, 1)
270+
defer close(quitSig)
269271
signal.Notify(quitSig, syscall.SIGINT, syscall.SIGTERM)
270272

271273
select {
272274
case err := <-errSig:
273275
logger.Error(fmt.Sprintf("Fatal error: %v\n", err))
276+
os.Exit(1)
274277
case <-quitSig:
275278
// if config.Config.Server.Usage.Enabled && usg != nil {
276279
// usg.TriggerSingleReporter(ctx)
277280
// }
278281
logger.Info("Shutting down server...")
279282
publicGrpcS.GracefulStop()
280283
wp.GraceFulStop()
281-
logger.Info("server shutdown 1")
284+
logger.Info("server shutdown due to signal")
285+
os.Exit(0)
282286
}
283-
fmt.Println("server shutdown 2")
284287
}
285288

286289
func newClients(ctx context.Context, logger *zap.Logger) (

pkg/handler/catalog.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,7 @@ func (ph *PublicHandler) GetFileCatalog(ctx context.Context, req *artifactpb.Get
5454
if err != nil {
5555
log.Error("failed to get file by file uid", zap.Error(err))
5656
return nil, fmt.Errorf("failed to get file by file uid. err: %w", err)
57-
}
58-
if len(kbfs) == 0 {
57+
} else if len(kbfs) == 0 {
5958
log.Error("no file found by file uid", zap.String("file_uid", fileUID.String()))
6059
return nil, fmt.Errorf("no file found by file uid: %s", fileUID.String())
6160
}

pkg/handler/chunks.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ func (ph *PublicHandler) ListChunks(ctx context.Context, req *artifactpb.ListChu
3232
if err != nil {
3333
log.Error("failed to get knowledge base files by file uids", zap.Error(err))
3434
return nil, fmt.Errorf("failed to get knowledge base files by file uids")
35-
}
36-
if len(kbfs) == 0 {
35+
} else if len(kbfs) == 0 {
3736
log.Error("no files found for the given file uids")
3837
return nil, fmt.Errorf("no files found for the given file uids: %v. err: %w", fileUIDs, customerror.ErrNotFound)
3938
}

pkg/handler/knowledgebase.go

Lines changed: 67 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"github.com/instill-ai/artifact-backend/pkg/logger"
1616
"github.com/instill-ai/artifact-backend/pkg/repository"
1717
"github.com/instill-ai/artifact-backend/pkg/service"
18+
"github.com/instill-ai/artifact-backend/pkg/utils"
1819
artifactpb "github.com/instill-ai/protogen-go/artifact/artifact/v1alpha"
1920
)
2021

@@ -347,50 +348,79 @@ func (ph *PublicHandler) DeleteCatalog(ctx context.Context, req *artifactpb.Dele
347348
return nil, fmt.Errorf(ErrorDeleteKnowledgeBaseMsg, customerror.ErrNoPermission)
348349
}
349350

350-
// delete acl
351-
err = ph.service.ACLClient.Purge(ctx, "knowledgebase", kb.UID)
352-
if err != nil {
353-
log.Error("failed to purge catalog", zap.Error(err))
354-
return nil, fmt.Errorf(ErrorDeleteKnowledgeBaseMsg, err)
355-
}
351+
startSignal := make(chan bool)
352+
// TODO: in the future, we should delete the catalog using clean up worker
353+
go utils.GoRecover(func() {
354+
ctx := context.TODO()
355+
log, _ := logger.GetZapLogger(ctx)
356+
// wait for the catalog to be deleted in postgres
357+
canStart := <-startSignal
358+
if !canStart {
359+
log.Error("failed to delete catalog in background", zap.String("catalog_id", kb.UID.String()))
360+
return
361+
}
362+
log.Info("DeleteCatalog starts in background", zap.String("catalog_id", kb.UID.String()))
363+
allPass := true
364+
// delete files in minIO
365+
err = <-ph.service.MinIO.DeleteKnowledgeBase(ctx, kb.UID.String())
366+
if err != nil {
367+
log.Error("failed to delete files in minIO in background", zap.Error(err))
368+
allPass = false
369+
}
356370

357-
// delete files in minIO
358-
err = <-ph.service.MinIO.DeleteKnowledgeBase(ctx, kb.UID.String())
359-
if err != nil {
360-
log.Error("failed to delete files in minIO", zap.Error(err))
361-
}
362-
// delete database in postgres
363-
err = ph.service.Repository.DeleteAllKnowledgeBaseFiles(ctx, kb.UID.String())
364-
if err != nil {
365-
log.Error("failed to delete files in postgres", zap.Error(err))
366-
}
367-
// delete converted files in postgres
368-
err = ph.service.Repository.DeleteAllConvertedFilesinKb(ctx, kb.UID)
369-
if err != nil {
370-
log.Error("failed to delete converted files in postgres", zap.Error(err))
371-
}
372-
// delete all chunks in postgres
373-
err = ph.service.Repository.HardDeleteChunksByKbUID(ctx, kb.UID)
374-
if err != nil {
375-
log.Error("failed to delete chunks in postgres", zap.Error(err))
376-
}
371+
// delete the collection in milvus
372+
err = ph.service.MilvusClient.DropKnowledgeBaseCollection(ctx, kb.UID.String())
373+
if err != nil {
374+
log.Error("failed to delete collection in milvus in background", zap.Error(err))
375+
allPass = false
376+
}
377377

378-
// delete all embedding in postgres
379-
err = ph.service.Repository.HardDeleteEmbeddingsByKbUID(ctx, kb.UID)
380-
if err != nil {
381-
log.Error("failed to delete embeddings in postgres", zap.Error(err))
382-
}
378+
// delete all files in postgres
379+
err = ph.service.Repository.DeleteAllKnowledgeBaseFiles(ctx, kb.UID.String())
380+
if err != nil {
381+
log.Error("failed to delete files in postgres in background", zap.Error(err))
382+
allPass = false
383+
}
384+
// delete converted files in postgres
385+
err = ph.service.Repository.DeleteAllConvertedFilesInKb(ctx, kb.UID)
386+
if err != nil {
387+
log.Error("failed to delete converted files in postgres in background", zap.Error(err))
388+
allPass = false
389+
}
390+
// delete all chunks in postgres
391+
err = ph.service.Repository.HardDeleteChunksByKbUID(ctx, kb.UID)
392+
if err != nil {
393+
log.Error("failed to delete chunks in postgres in background", zap.Error(err))
394+
allPass = false
395+
}
396+
397+
// delete all embedding in postgres
398+
err = ph.service.Repository.HardDeleteEmbeddingsByKbUID(ctx, kb.UID)
399+
if err != nil {
400+
log.Error("failed to delete embeddings in postgres in background", zap.Error(err))
401+
allPass = false
402+
}
403+
// delete acl. Note: we need to delete the acl after deleting the catalog
404+
err = ph.service.ACLClient.Purge(ctx, "knowledgebase", kb.UID)
405+
if err != nil {
406+
log.Error("failed to purge catalog", zap.Error(err))
407+
allPass = false
408+
}
409+
if allPass {
410+
log.Info("successfully deleted catalog in background", zap.String("catalog_id", kb.UID.String()))
411+
} else {
412+
log.Error("failed to delete catalog in background", zap.String("catalog_id", kb.UID.String()))
413+
}
414+
}, "DeleteCatalog")
383415

384416
deletedKb, err := ph.service.Repository.DeleteKnowledgeBase(ctx, ns.NsUID.String(), req.CatalogId)
385417
if err != nil {
418+
log.Error("failed to delete catalog", zap.Error(err))
419+
startSignal <- false
386420
return nil, err
387421
}
388-
389-
// delete acl. Note: we need to delete the acl after deleting the catalog
390-
err = ph.service.ACLClient.Purge(ctx, "knowledgebase", kb.UID)
391-
if err != nil {
392-
log.Error("failed to purge catalog", zap.Error(err))
393-
}
422+
// start the background deletion
423+
startSignal <- true
394424

395425
return &artifactpb.DeleteCatalogResponse{
396426
Catalog: &artifactpb.Catalog{

pkg/handler/knowledgebasefiles.go

Lines changed: 81 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
artifactpb "github.com/instill-ai/protogen-go/artifact/artifact/v1alpha"
1616
"go.uber.org/zap"
1717
"google.golang.org/protobuf/types/known/timestamppb"
18+
"gorm.io/gorm"
1819
)
1920

2021
func (ph *PublicHandler) UploadCatalogFile(ctx context.Context, req *artifactpb.UploadCatalogFileRequest) (*artifactpb.UploadCatalogFileResponse, error) {
@@ -135,8 +136,9 @@ func (ph *PublicHandler) UploadCatalogFile(ctx context.Context, req *artifactpb.
135136
return nil, err
136137
}
137138

138-
// increase catalog usage
139-
err = ph.service.Repository.IncreaseKnowledgeBaseUsage(ctx, kb.UID.String(), int(fileSize))
139+
// increase catalog usage. need to increase after the file is created.
140+
// Note: in the future, we need to increase the usage in transaction with creating the file.
141+
err = ph.service.Repository.IncreaseKnowledgeBaseUsage(ctx, nil, kb.UID.String(), int(fileSize))
140142
if err != nil {
141143
log.Error("failed to increase catalog usage", zap.Error(err))
142144
return nil, err
@@ -318,14 +320,16 @@ func (ph *PublicHandler) DeleteCatalogFile(
318320

319321
// ACL - check user's permission to write catalog of kb file
320322
kbfs, err := ph.service.Repository.GetKnowledgeBaseFilesByFileUIDs(ctx, []uuid.UUID{uuid.FromStringOrNil(req.FileUid)})
321-
if err != nil && len(kbfs) == 0 {
322-
log.Error("failed to get catalog", zap.Error(err))
323-
return nil, fmt.Errorf(ErrorListKnowledgeBasesMsg, err)
323+
if err != nil {
324+
log.Error("failed to get catalog files", zap.Error(err))
325+
return nil, fmt.Errorf("failed to get catalog files. err: %w", err)
326+
} else if len(kbfs) == 0 {
327+
return nil, fmt.Errorf("file not found. err: %w", customerror.ErrNotFound)
324328
}
325329
granted, err := ph.service.ACLClient.CheckPermission(ctx, "knowledgebase", kbfs[0].KnowledgeBaseUID, "writer")
326330
if err != nil {
327331
log.Error("failed to check permission", zap.Error(err))
328-
return nil, fmt.Errorf(ErrorUpdateKnowledgeBaseMsg, err)
332+
return nil, fmt.Errorf("failed to check permission. err: %w", err)
329333
}
330334
if !granted {
331335
log.Error("no permission to delete catalog file")
@@ -345,66 +349,106 @@ func (ph *PublicHandler) DeleteCatalogFile(
345349
files, err := ph.service.Repository.GetKnowledgeBaseFilesByFileUIDs(ctx, []uuid.UUID{fUID})
346350
if err != nil {
347351
return nil, err
348-
}
349-
if len(files) == 0 {
352+
} else if len(files) == 0 {
350353
return nil, fmt.Errorf("file not found. err: %w", customerror.ErrNotFound)
351354
}
352355

356+
startSignal := make(chan bool)
353357
// TODO: need to use clean worker in the future
354358
go utils.GoRecover(
355359
func() {
356-
// to prevent parent context from being cancelled, create a new context
360+
// Create a new context to prevent the parent context from being cancelled
357361
ctx := context.TODO()
358-
// delete the file from minio
362+
log, _ := logger.GetZapLogger(ctx)
363+
canStart := <-startSignal
364+
if !canStart {
365+
log.Info("DeleteCatalogFile: received stop signal")
366+
return
367+
}
368+
log.Info("DeleteCatalogFile: start deleting file from minio, database and milvus")
369+
allPass := true
370+
// Delete the file from MinIO
359371
objectPaths := []string{}
360-
// kb file in minio
372+
// Add the knowledge base file in MinIO to the list of objects to delete
361373
objectPaths = append(objectPaths, files[0].Destination)
362-
// converted file in minio
374+
// Add the converted file in MinIO to the list of objects to delete
363375
cf, err := ph.service.Repository.GetConvertedFileByFileUID(ctx, fUID)
364-
if err == nil {
376+
if err != nil {
377+
if err != gorm.ErrRecordNotFound {
378+
log.Error("failed to get converted file by file uid", zap.Error(err))
379+
allPass = false
380+
}
381+
} else if cf != nil {
365382
objectPaths = append(objectPaths, cf.Destination)
366383
}
367-
// chunks in minio
368-
chunks, _ := ph.service.Repository.ListChunksByKbFileUID(ctx, fUID)
369-
if len(chunks) > 0 {
384+
// Add the chunks in MinIO to the list of objects to delete
385+
chunks, err := ph.service.Repository.ListChunksByKbFileUID(ctx, fUID)
386+
if err != nil {
387+
log.Error("failed to get chunks by kb file uid", zap.Error(err))
388+
allPass = false
389+
} else if len(chunks) > 0 {
370390
for _, chunk := range chunks {
371391
objectPaths = append(objectPaths, chunk.ContentDest)
372392
}
373393
}
374-
// delete the embeddings in milvus(need to delete first)
394+
// Delete the embeddings in Milvus (this better to be done first)
375395
embUIDs := []string{}
376396
embeddings, _ := ph.service.Repository.ListEmbeddingsByKbFileUID(ctx, fUID)
377397
for _, emb := range embeddings {
378398
embUIDs = append(embUIDs, emb.UID.String())
379399
}
380-
_ = ph.service.MilvusClient.DeleteEmbeddingsInKb(ctx, files[0].KnowledgeBaseUID.String(), embUIDs)
381-
382-
_ = ph.service.MinIO.DeleteFiles(ctx, objectPaths)
383-
// delete the converted file in postgreSQL
384-
_ = ph.service.Repository.HardDeleteConvertedFileByFileUID(ctx, fUID)
385-
// delete the chunks in postgreSQL
386-
_ = ph.service.Repository.HardDeleteChunksByKbFileUID(ctx, fUID)
387-
// delete the embeddings in postgreSQL
388-
_ = ph.service.Repository.HardDeleteEmbeddingsByKbFileUID(ctx, fUID)
389-
// print success message and file uid
390-
log.Info("Successfully deleted file from minio, database and milvus", zap.String("file_uid", fUID.String()))
400+
err = ph.service.MilvusClient.DeleteEmbeddingsInKb(ctx, files[0].KnowledgeBaseUID.String(), embUIDs)
401+
if err != nil {
402+
log.Error("failed to delete embeddings in milvus", zap.Error(err))
403+
allPass = false
404+
}
405+
406+
// Delete the files in MinIO
407+
errChan := ph.service.MinIO.DeleteFiles(ctx, objectPaths)
408+
for err := range errChan {
409+
if err != nil {
410+
log.Error("failed to delete files in minio", zap.Error(err))
411+
allPass = false
412+
}
413+
}
414+
// Delete the converted file in PostgreSQL
415+
err = ph.service.Repository.HardDeleteConvertedFileByFileUID(ctx, fUID)
416+
if err != nil {
417+
log.Error("failed to delete converted file in postgreSQL", zap.Error(err))
418+
allPass = false
419+
}
420+
// Delete the chunks in PostgreSQL
421+
err = ph.service.Repository.HardDeleteChunksByKbFileUID(ctx, fUID)
422+
if err != nil {
423+
log.Error("failed to delete chunks in postgreSQL", zap.Error(err))
424+
allPass = false
425+
}
426+
// Delete the embeddings in PostgreSQL
427+
err = ph.service.Repository.HardDeleteEmbeddingsByKbFileUID(ctx, fUID)
428+
if err != nil {
429+
log.Error("failed to delete embeddings in postgreSQL", zap.Error(err))
430+
allPass = false
431+
}
432+
if allPass {
433+
log.Info("DeleteCatalogFile: successfully deleted file from minio, database and milvus", zap.String("file_uid", fUID.String()))
434+
} else {
435+
log.Error("DeleteCatalogFile: failed to delete file from minio, database and milvus", zap.String("file_uid", fUID.String()))
436+
}
391437
},
392438
"DeleteCatalogFile",
393439
)
394440

395-
// delete the file in postgreSQL
396-
err = ph.service.Repository.DeleteKnowledgeBaseFile(ctx, req.FileUid)
397-
if err != nil {
398-
return nil, err
399-
}
400-
// decrease catalog usage
401-
err = ph.service.Repository.IncreaseKnowledgeBaseUsage(ctx, files[0].KnowledgeBaseUID.String(), int(-files[0].Size))
441+
err = ph.service.Repository.DeleteKnowledgeBaseFileAndDecreaseUsage(ctx, fUID)
402442
if err != nil {
443+
log.Error("failed to delete knowledge base file and decrease usage", zap.Error(err))
444+
startSignal <- false
403445
return nil, err
404446
}
447+
// start the background deletion
448+
startSignal <- true
405449

406450
return &artifactpb.DeleteCatalogFileResponse{
407-
FileUid: req.FileUid,
451+
FileUid: fUID.String(),
408452
}, nil
409453

410454
}
@@ -425,8 +469,7 @@ func (ph *PublicHandler) ProcessCatalogFiles(ctx context.Context, req *artifactp
425469
kbfs, err := ph.service.Repository.GetKnowledgeBaseFilesByFileUIDs(ctx, fileUUIDs)
426470
if err != nil {
427471
return nil, err
428-
}
429-
if len(kbfs) == 0 {
472+
} else if len(kbfs) == 0 {
430473
return nil, fmt.Errorf("file not found. err: %w", customerror.ErrNotFound)
431474
}
432475
// check write permission for the catalog

0 commit comments

Comments
 (0)