@@ -7,17 +7,17 @@ use chroma_error::{ChromaError, ErrorCodes};
77use chroma_sqlite:: {
88 db:: SqliteDb ,
99 helpers:: { delete_metadata, update_metadata} ,
10- table:: { EmbeddingFulltextSearch , EmbeddingMetadata , Embeddings , MaxSeqId } ,
10+ table:: { Collections , EmbeddingFulltextSearch , EmbeddingMetadata , Embeddings , MaxSeqId } ,
1111} ;
1212use chroma_types:: {
1313 operator:: {
1414 CountResult , Filter , GetResult , Limit , Projection , ProjectionOutput , ProjectionRecord , Scan ,
1515 } ,
1616 plan:: { Count , Get } ,
17- BooleanOperator , Chunk , CompositeExpression , DocumentExpression , DocumentOperator , LogRecord ,
18- MetadataComparison , MetadataExpression , MetadataSetValue , MetadataValue ,
19- MetadataValueConversionError , Operation , OperationRecord , PrimitiveOperator , SegmentUuid ,
20- SetOperator , UpdateMetadataValue , Where , CHROMA_DOCUMENT_KEY ,
17+ BooleanOperator , Chunk , CollectionUuid , CompositeExpression , DocumentExpression ,
18+ DocumentOperator , LogRecord , MetadataComparison , MetadataExpression , MetadataSetValue ,
19+ MetadataValue , MetadataValueConversionError , Operation , OperationRecord , PrimitiveOperator ,
20+ Schema , SegmentUuid , SetOperator , UpdateMetadataValue , Where , CHROMA_DOCUMENT_KEY ,
2121} ;
2222use sea_query:: {
2323 Alias , DeleteStatement , Expr , ExprTrait , Func , InsertStatement , LikeExpr , OnConflict , Query ,
@@ -41,6 +41,8 @@ pub enum SqliteMetadataError {
4141 SeaQuery ( #[ from] sea_query:: error:: Error ) ,
4242 #[ error( transparent) ]
4343 Sqlx ( #[ from] sqlx:: Error ) ,
44+ #[ error( "Could not serialize schema: {0}" ) ]
45+ SerializeSchema ( #[ from] serde_json:: Error ) ,
4446}
4547
4648impl ChromaError for SqliteMetadataError {
@@ -53,6 +55,11 @@ pub struct SqliteMetadataWriter {
5355 pub db : SqliteDb ,
5456}
5557
58+ pub struct ApplyLogsOutcome {
59+ pub schema_update : Option < Schema > ,
60+ pub max_seq_id : Option < u64 > ,
61+ }
62+
5663impl SqliteMetadataWriter {
5764 pub fn new ( db : SqliteDb ) -> Self {
5865 Self { db }
@@ -278,19 +285,69 @@ impl SqliteMetadataWriter {
278285 Ok ( self . db . get_conn ( ) . begin ( ) . await ?)
279286 }
280287
288+ pub async fn update_collection_schema < C > (
289+ & self ,
290+ collection_id : CollectionUuid ,
291+ schema : & Schema ,
292+ tx : & mut C ,
293+ ) -> Result < ( ) , SqliteMetadataError >
294+ where
295+ for < ' connection > & ' connection mut C : sqlx:: Executor < ' connection , Database = sqlx:: Sqlite > ,
296+ {
297+ let schema_str = serde_json:: to_string ( schema) ?;
298+ let ( sql, values) = Query :: update ( )
299+ . table ( Collections :: Table )
300+ . value ( Collections :: SchemaStr , schema_str)
301+ . and_where (
302+ Expr :: col ( ( Collections :: Table , Collections :: Id ) ) . eq ( collection_id. to_string ( ) ) ,
303+ )
304+ . build_sqlx ( SqliteQueryBuilder ) ;
305+ sqlx:: query_with ( & sql, values) . execute ( & mut * tx) . await ?;
306+ Ok ( ( ) )
307+ }
308+
309+ fn ensure_schema_for_update_value (
310+ schema : & mut Option < Schema > ,
311+ key : & str ,
312+ value : & UpdateMetadataValue ,
313+ ) -> bool {
314+ if key == CHROMA_DOCUMENT_KEY {
315+ return false ;
316+ }
317+ match value {
318+ UpdateMetadataValue :: None => false ,
319+ _ => {
320+ if let Some ( schema_mut) = schema. as_mut ( ) {
321+ if let Ok ( metadata_value) = MetadataValue :: try_from ( value) {
322+ return schema_mut
323+ . ensure_key_from_metadata ( key, metadata_value. value_type ( ) ) ;
324+ }
325+ }
326+ false
327+ }
328+ }
329+ }
330+
281331 pub async fn apply_logs < C > (
282332 & self ,
283333 logs : Chunk < LogRecord > ,
284334 segment_id : SegmentUuid ,
335+ schema : Option < Schema > ,
285336 tx : & mut C ,
286- ) -> Result < ( ) , SqliteMetadataError >
337+ ) -> Result < ApplyLogsOutcome , SqliteMetadataError >
287338 where
288339 for < ' connection > & ' connection mut C : sqlx:: Executor < ' connection , Database = sqlx:: Sqlite > ,
289340 {
290341 if logs. is_empty ( ) {
291- return Ok ( ( ) ) ;
342+ return Ok ( ApplyLogsOutcome {
343+ schema_update : None ,
344+ max_seq_id : None ,
345+ } ) ;
292346 }
347+ let mut schema = schema;
348+ let mut schema_modified = false ;
293349 let mut max_seq_id = u64:: MIN ;
350+ let mut saw_log = false ;
294351 for (
295352 LogRecord {
296353 log_offset,
@@ -307,6 +364,7 @@ impl SqliteMetadataWriter {
307364 ) in logs. iter ( )
308365 {
309366 let log_offset_unsigned = ( * log_offset) . try_into ( ) ?;
367+ saw_log = true ;
310368 max_seq_id = max_seq_id. max ( log_offset_unsigned) ;
311369 let mut metadata_owned = metadata. clone ( ) ;
312370 if let Some ( doc) = document {
@@ -323,6 +381,11 @@ impl SqliteMetadataWriter {
323381 Self :: add_record ( tx, segment_id, log_offset_unsigned, id. clone ( ) ) . await ?
324382 {
325383 if let Some ( meta) = metadata_owned {
384+ for ( key, value) in meta. iter ( ) {
385+ if Self :: ensure_schema_for_update_value ( & mut schema, key, value) {
386+ schema_modified = true ;
387+ }
388+ }
326389 update_metadata :: < EmbeddingMetadata , _ , _ > ( tx, offset_id, meta) . await ?;
327390 }
328391
@@ -336,6 +399,11 @@ impl SqliteMetadataWriter {
336399 Self :: update_record ( tx, segment_id, log_offset_unsigned, id. clone ( ) ) . await ?
337400 {
338401 if let Some ( meta) = metadata_owned {
402+ for ( key, value) in meta. iter ( ) {
403+ if Self :: ensure_schema_for_update_value ( & mut schema, key, value) {
404+ schema_modified = true ;
405+ }
406+ }
339407 update_metadata :: < EmbeddingMetadata , _ , _ > ( tx, offset_id, meta) . await ?;
340408 }
341409
@@ -351,6 +419,11 @@ impl SqliteMetadataWriter {
351419 . await ?;
352420
353421 if let Some ( meta) = metadata_owned {
422+ for ( key, value) in meta. iter ( ) {
423+ if Self :: ensure_schema_for_update_value ( & mut schema, key, value) {
424+ schema_modified = true ;
425+ }
426+ }
354427 update_metadata :: < EmbeddingMetadata , _ , _ > ( tx, offset_id, meta) . await ?;
355428 }
356429
@@ -371,7 +444,12 @@ impl SqliteMetadataWriter {
371444
372445 Self :: upsert_max_seq_id ( tx, segment_id, max_seq_id) . await ?;
373446
374- Ok ( ( ) )
447+ let max_seq_id = if saw_log { Some ( max_seq_id) } else { None } ;
448+
449+ Ok ( ApplyLogsOutcome {
450+ schema_update : if schema_modified { schema } else { None } ,
451+ max_seq_id,
452+ } )
375453 }
376454}
377455
@@ -910,7 +988,17 @@ mod tests {
910988 ref_seg. apply_logs( test_data. logs. clone( ) , metadata_seg_id) ;
911989 let mut tx = runtime. block_on( sqlite_seg_writer. begin( ) ) . expect( "Should be able to start transaction" ) ;
912990 let data: Chunk <LogRecord > = Chunk :: new( test_data. logs. clone( ) . into( ) ) ;
913- runtime. block_on( sqlite_seg_writer. apply_logs( data, metadata_seg_id, & mut * tx) ) . expect( "Should be able to apply logs" ) ;
991+ runtime. block_on( sqlite_seg_writer. apply_logs(
992+ data,
993+ metadata_seg_id,
994+ test_data
995+ . collection_and_segments
996+ . collection
997+ . schema
998+ . clone( ) ,
999+ & mut * tx,
1000+ ) )
1001+ . expect( "Should be able to apply logs" ) ;
9141002 runtime. block_on( tx. commit( ) ) . expect( "Should be able to commit log" ) ;
9151003
9161004 let sqlite_seg_reader = SqliteMetadataReader {
@@ -938,7 +1026,17 @@ mod tests {
9381026 ref_seg. apply_logs( test_data. logs. clone( ) , metadata_seg_id) ;
9391027 let mut tx = runtime. block_on( sqlite_seg_writer. begin( ) ) . expect( "Should be able to start transaction" ) ;
9401028 let data: Chunk <LogRecord > = Chunk :: new( test_data. logs. clone( ) . into( ) ) ;
941- runtime. block_on( sqlite_seg_writer. apply_logs( data, metadata_seg_id, & mut * tx) ) . expect( "Should be able to apply logs" ) ;
1029+ runtime. block_on( sqlite_seg_writer. apply_logs(
1030+ data,
1031+ metadata_seg_id,
1032+ test_data
1033+ . collection_and_segments
1034+ . collection
1035+ . schema
1036+ . clone( ) ,
1037+ & mut * tx,
1038+ ) )
1039+ . expect( "Should be able to apply logs" ) ;
9421040 runtime. block_on( tx. commit( ) ) . expect( "Should be able to commit log" ) ;
9431041
9441042 let sqlite_seg_reader = SqliteMetadataReader {
@@ -1020,7 +1118,12 @@ mod tests {
10201118 . expect ( "Should be able to start transaction" ) ;
10211119 let data: Chunk < LogRecord > = Chunk :: new ( logs. into ( ) ) ;
10221120 sqlite_seg_writer
1023- . apply_logs ( data, metadata_seg_id, & mut * tx)
1121+ . apply_logs (
1122+ data,
1123+ metadata_seg_id,
1124+ collection_and_segments. collection . schema . clone ( ) ,
1125+ & mut * tx,
1126+ )
10241127 . await
10251128 . expect ( "Should be able to apply logs" ) ;
10261129 tx. commit ( ) . await . expect ( "Should be able to commit log" ) ;
@@ -1140,7 +1243,12 @@ mod tests {
11401243 . expect ( "Should be able to start transaction" ) ;
11411244 let data: Chunk < LogRecord > = Chunk :: new ( logs. into ( ) ) ;
11421245 sqlite_seg_writer
1143- . apply_logs ( data, metadata_seg_id, & mut * tx)
1246+ . apply_logs (
1247+ data,
1248+ metadata_seg_id,
1249+ collection_and_segments. collection . schema . clone ( ) ,
1250+ & mut * tx,
1251+ )
11441252 . await
11451253 . expect ( "Should be able to apply logs" ) ;
11461254 tx. commit ( ) . await . expect ( "Should be able to commit log" ) ;
0 commit comments