1- /**
2- * Transformers.js Embedding Provider
3- * Uses local models via @xenova/transformers
4- */
1+ import { EmbeddingProvider , DEFAULT_MODEL } from "./types.js" ;
52
6- import { EmbeddingProvider } from "./types.js" ;
7-
8- // Model configurations
93const MODEL_CONFIGS : Record < string , { dimensions : number } > = {
104 "Xenova/bge-small-en-v1.5" : { dimensions : 384 } ,
115 "Xenova/all-MiniLM-L6-v2" : { dimensions : 384 } ,
@@ -21,7 +15,7 @@ export class TransformersEmbeddingProvider implements EmbeddingProvider {
2115 private ready = false ;
2216 private initPromise : Promise < void > | null = null ;
2317
24- constructor ( modelName : string = "Xenova/bge-small-en-v1.5" ) {
18+ constructor ( modelName : string = DEFAULT_MODEL ) {
2519 this . modelName = modelName ;
2620 this . dimensions = MODEL_CONFIGS [ modelName ] ?. dimensions || 384 ;
2721 }
@@ -39,12 +33,10 @@ export class TransformersEmbeddingProvider implements EmbeddingProvider {
3933 console . error ( `Loading embedding model: ${ this . modelName } ` ) ;
4034 console . error ( "(First run will download ~130MB model)" ) ;
4135
42- // Dynamic import to avoid issues at require time
4336 const { pipeline } = await import ( "@xenova/transformers" ) ;
4437
45- // Create feature extraction pipeline
4638 this . pipeline = await pipeline ( "feature-extraction" , this . modelName , {
47- quantized : true , // Use quantized model for speed
39+ quantized : true ,
4840 } ) ;
4941
5042 this . ready = true ;
@@ -61,13 +53,11 @@ export class TransformersEmbeddingProvider implements EmbeddingProvider {
6153 }
6254
6355 try {
64- // Get embeddings
6556 const output = await this . pipeline ( text , {
6657 pooling : "mean" ,
6758 normalize : true ,
6859 } ) ;
6960
70- // Convert to array
7161 return Array . from ( output . data ) ;
7262 } catch ( error ) {
7363 console . error ( "Failed to generate embedding:" , error ) ;
@@ -81,24 +71,19 @@ export class TransformersEmbeddingProvider implements EmbeddingProvider {
8171 }
8272
8373 const embeddings : number [ ] [ ] = [ ] ;
84-
85- // Process in smaller batches to manage memory
8674 const batchSize = 32 ;
75+
8776 for ( let i = 0 ; i < texts . length ; i += batchSize ) {
8877 const batch = texts . slice ( i , i + batchSize ) ;
89-
90- // Process batch
9178 const batchEmbeddings = await Promise . all (
9279 batch . map ( ( text ) => this . embed ( text ) )
9380 ) ;
9481
9582 embeddings . push ( ...batchEmbeddings ) ;
9683
97- // Log progress for large batches
9884 if ( texts . length > 100 && ( i + batchSize ) % 100 === 0 ) {
9985 console . error (
100- `Embedded ${ Math . min ( i + batchSize , texts . length ) } /${ texts . length
101- } chunks`
86+ `Embedded ${ Math . min ( i + batchSize , texts . length ) } /${ texts . length } chunks`
10287 ) ;
10388 }
10489 }
@@ -111,13 +96,11 @@ export class TransformersEmbeddingProvider implements EmbeddingProvider {
11196 }
11297}
11398
114- /**
115- * Create an embedding provider based on config
116- */
11799export async function createEmbeddingProvider (
118- modelName : string = "Xenova/bge-base-en-v1.5"
100+ modelName : string = DEFAULT_MODEL
119101) : Promise < EmbeddingProvider > {
120102 const provider = new TransformersEmbeddingProvider ( modelName ) ;
121103 await provider . initialize ( ) ;
122104 return provider ;
123105}
106+
0 commit comments