Skip to content

Commit e620fc6

Browse files
authored
feat(conversion): add fallback for unavailable model (#156)
Because - docling model might not be deployed under circumstances This commit - add fallback mechanism for unavailable model
1 parent 56cec7f commit e620fc6

File tree

4 files changed

+36
-20
lines changed

4 files changed

+36
-20
lines changed

pkg/handler/knowledgebase.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"go.uber.org/zap"
1111
"google.golang.org/grpc/metadata"
1212

13-
"github.com/instill-ai/artifact-backend/config"
1413
"github.com/instill-ai/artifact-backend/pkg/constant"
1514
"github.com/instill-ai/artifact-backend/pkg/customerror"
1615
"github.com/instill-ai/artifact-backend/pkg/logger"
@@ -152,7 +151,8 @@ func (ph *PublicHandler) CreateCatalog(ctx context.Context, req *artifactpb.Crea
152151
CreateTime: dbData.CreateTime.String(),
153152
UpdateTime: dbData.UpdateTime.String(),
154153
ConvertingPipelines: []string{
155-
config.Config.ModelBackend.Namespace + "/" + service.ChunkMdPipelineID + "@" + service.ConvertDocToMDModelVersion,
154+
"instill/" + service.ConvertDocToMDModelID + "@" + service.ConvertDocToMDModelVersion,
155+
service.NamespaceID + "/" + service.ConvertDocToMDPipelineID + "@" + service.DocToMDVersion,
156156
},
157157
SummarizingPipelines: []string{
158158
service.NamespaceID + "/" + service.GenerateSummaryPipelineID + "@" + service.GenerateSummaryVersion,
@@ -235,7 +235,8 @@ func (ph *PublicHandler) ListCatalogs(ctx context.Context, req *artifactpb.ListC
235235
UpdateTime: kb.UpdateTime.String(),
236236
OwnerName: kb.Owner,
237237
ConvertingPipelines: []string{
238-
config.Config.ModelBackend.Namespace + "/" + service.ChunkMdPipelineID + "@" + service.ConvertDocToMDModelVersion,
238+
"instill/" + service.ConvertDocToMDModelID + "@" + service.ConvertDocToMDModelVersion,
239+
service.NamespaceID + "/" + service.ConvertDocToMDPipelineID + "@" + service.DocToMDVersion,
239240
},
240241
SummarizingPipelines: []string{
241242
service.NamespaceID + "/" + service.GenerateSummaryPipelineID + "@" + service.GenerateSummaryVersion,
@@ -332,7 +333,8 @@ func (ph *PublicHandler) UpdateCatalog(ctx context.Context, req *artifactpb.Upda
332333
UpdateTime: kb.UpdateTime.String(),
333334
OwnerName: kb.Owner,
334335
ConvertingPipelines: []string{
335-
config.Config.ModelBackend.Namespace + "/" + service.ChunkMdPipelineID + "@" + service.ConvertDocToMDModelVersion,
336+
"instill/" + service.ConvertDocToMDModelID + "@" + service.ConvertDocToMDModelVersion,
337+
service.NamespaceID + "/" + service.ConvertDocToMDPipelineID + "@" + service.DocToMDVersion,
336338
},
337339
SummarizingPipelines: []string{
338340
service.NamespaceID + "/" + service.GenerateSummaryPipelineID + "@" + service.GenerateSummaryVersion,

pkg/service/model.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"google.golang.org/grpc/metadata"
1111
"google.golang.org/protobuf/types/known/structpb"
1212

13-
"github.com/instill-ai/artifact-backend/config"
1413
"github.com/instill-ai/artifact-backend/pkg/constant"
1514
"github.com/instill-ai/artifact-backend/pkg/logger"
1615

@@ -42,8 +41,9 @@ func (s *Service) ConvertToMDModel(ctx context.Context, fileUID uuid.UUID, calle
4241
// Get the appropriate prefix for the file type
4342
prefix := getFileTypePrefix(fileType)
4443

44+
namespaceID := "admin"
4545
req := &modelpb.TriggerNamespaceModelRequest{
46-
NamespaceId: config.Config.ModelBackend.Namespace,
46+
NamespaceId: namespaceID,
4747
ModelId: ConvertDocToMDModelID,
4848
Version: ConvertDocToMDModelVersion,
4949
TaskInputs: []*structpb.Struct{
@@ -63,8 +63,20 @@ func (s *Service) ConvertToMDModel(ctx context.Context, fileUID uuid.UUID, calle
6363

6464
resp, err := s.ModelPub.TriggerNamespaceModel(ctx, req)
6565
if err != nil {
66-
logger.Error("failed to trigger model", zap.Error(err))
67-
return "", fmt.Errorf("failed to trigger %s model: %w", ConvertDocToMDModelID, err)
66+
namespaceID = "instill"
67+
req.NamespaceId = namespaceID
68+
resp, err = s.ModelPub.TriggerNamespaceModel(ctx, req)
69+
if err != nil {
70+
logger.Error(fmt.Sprintf("failed to trigger %s model", ConvertDocToMDModelID), zap.Error(err))
71+
return "", fmt.Errorf("failed to trigger %s model: %w", ConvertDocToMDModelID, err)
72+
}
73+
}
74+
75+
convertingModelMetadata := namespaceID + "/" + ConvertDocToMDModelID + "@" + ConvertDocToMDModelVersion
76+
err = s.Repository.UpdateKbFileExtraMetaData(ctx, fileUID, "", convertingModelMetadata, "", "", "", nil, nil, nil, nil, nil)
77+
if err != nil {
78+
logger.Error("Failed to save converting pipeline metadata.", zap.String("File uid:", fileUID.String()))
79+
return "", fmt.Errorf("failed to save converting model metadata: %w", err)
6880
}
6981

7082
result, err := getModelConvertResult(resp)

pkg/service/pipeline.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ var PresetPipelinesList = []struct {
6161
{ID: QAPipelineID, Version: QAVersion},
6262
}
6363

64-
// ConvertToMDPipeForFiles using converting pipeline to convert some file type to MD and consume caller's credits
65-
func (s *Service) ConvertToMDPipeForFiles(ctx context.Context, fileUID uuid.UUID, caller uuid.UUID, requester uuid.UUID, fileBase64 string, fileType artifactpb.FileType) (string, error) {
64+
// ConvertToMDPipe using converting pipeline to convert some file type to MD and consume caller's credits
65+
func (s *Service) ConvertToMDPipe(ctx context.Context, fileUID uuid.UUID, caller uuid.UUID, requester uuid.UUID, fileBase64 string, fileType artifactpb.FileType) (string, error) {
6666
logger, _ := logger.GetZapLogger(ctx)
6767
var md metadata.MD
6868
if requester != uuid.Nil {
@@ -136,6 +136,13 @@ func (s *Service) ConvertToMDPipeForFiles(ctx context.Context, fileUID uuid.UUID
136136
return "", fmt.Errorf("failed to trigger %s pipeline: %w", pipelineID, err)
137137
}
138138

139+
convertingModelMetadata := NamespaceID + "/" + ConvertDocToMDModelID + "@" + ConvertDocToMDModelVersion
140+
err = s.Repository.UpdateKbFileExtraMetaData(ctx, fileUID, "", convertingModelMetadata, "", "", "", nil, nil, nil, nil, nil)
141+
if err != nil {
142+
logger.Error("Failed to save converting pipeline metadata.", zap.String("File uid:", fileUID.String()))
143+
return "", fmt.Errorf("failed to save converting model metadata: %w", err)
144+
}
145+
139146
result, err := getConvertResult(resp)
140147
if err != nil {
141148
logger.Error("failed to get convert result", zap.Error(err))

pkg/worker/persistentcatalogworker.go

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"go.uber.org/zap"
1212

1313
"github.com/gofrs/uuid"
14-
"github.com/instill-ai/artifact-backend/config"
1514
"github.com/instill-ai/artifact-backend/pkg/constant"
1615
"github.com/instill-ai/artifact-backend/pkg/logger"
1716
"github.com/instill-ai/artifact-backend/pkg/minio"
@@ -379,15 +378,11 @@ func (wp *persistentCatalogFileToEmbWorkerPool) processConvertingFile(ctx contex
379378
requesterUID := file.RequesterUID
380379
convertedMD, err := wp.svc.ConvertToMDModel(ctx, file.UID, file.CreatorUID, requesterUID, base64Data, artifactpb.FileType(artifactpb.FileType_value[file.Type]))
381380
if err != nil {
382-
logger.Error("Failed to convert pdf to md using pdf-to-md pipeline.", zap.String("File path", fileInMinIOPath))
383-
return nil, artifactpb.FileProcessStatus_FILE_PROCESS_STATUS_UNSPECIFIED, err
384-
}
385-
386-
convertingModelMetadata := config.Config.ModelBackend.Namespace + "/" + service.ConvertDocToMDModelID + "@" + service.ConvertDocToMDModelVersion
387-
err = wp.svc.Repository.UpdateKbFileExtraMetaData(ctx, file.UID, "", convertingModelMetadata, "", "", "", nil, nil, nil, nil, nil)
388-
if err != nil {
389-
logger.Error("Failed to save converting pipeline metadata.", zap.String("File uid:", file.UID.String()))
390-
return nil, artifactpb.FileProcessStatus_FILE_PROCESS_STATUS_UNSPECIFIED, fmt.Errorf("failed to save converting model metadata: %w", err)
381+
logger.Error("Failed to convert pdf to md using docling model, fallback to pipeline.")
382+
if convertedMD, err = wp.svc.ConvertToMDPipe(ctx, file.UID, file.CreatorUID, requesterUID, base64Data, artifactpb.FileType(artifactpb.FileType_value[file.Type])); err != nil {
383+
logger.Error("Failed to convert pdf to md using pdf-to-md pipeline.", zap.String("File path", fileInMinIOPath))
384+
return nil, artifactpb.FileProcessStatus_FILE_PROCESS_STATUS_UNSPECIFIED, err
385+
}
391386
}
392387

393388
// save the converted file into object storage and metadata into database

0 commit comments

Comments
 (0)