@@ -144,6 +144,37 @@ where
144
144
self . batch . desc . lower ( )
145
145
}
146
146
147
+ /// Efficiently rewrites the timestamps in this not-yet-committed batch.
148
+ ///
149
+ /// This [Batch] represents potentially large amounts of data, which may
150
+ /// have partly or entirely been spilled to s3. This call bulk edits the
151
+ /// timestamps of all data in this batch in a metadata-only operation (i.e.
152
+ /// without network calls).
153
+ ///
154
+ /// Specifically, every timestamp in the batch is logically advanced_by the
155
+ /// provided `frontier`.
156
+ ///
157
+ /// This method may be called multiple times, with later calls overriding
158
+ /// previous ones, but the rewrite frontier may not regress across calls.
159
+ ///
160
+ /// When this batch was created, it was given an `upper`, which bounds the
161
+ /// staged data it represents. To allow rewrite past this original `upper`,
162
+ /// this call accepts a new `upper` which replaces the previous one. Like
163
+ /// the rewrite frontier, the upper may not regress across calls.
164
+ ///
165
+ /// Multiple batches with various rewrite frontiers may be used in a single
166
+ /// [crate::write::WriteHandle::compare_and_append_batch] call. This is an
167
+ /// expected usage.
168
+ pub fn rewrite_ts (
169
+ & mut self ,
170
+ frontier : & Antichain < T > ,
171
+ new_upper : Antichain < T > ,
172
+ ) -> Result < ( ) , InvalidUsage < T > > {
173
+ self . batch
174
+ . rewrite_ts ( frontier, new_upper)
175
+ . map_err ( InvalidUsage :: InvalidRewrite )
176
+ }
177
+
147
178
/// Marks the blobs that this batch handle points to as consumed, likely
148
179
/// because they were appended to a shard.
149
180
///
@@ -771,8 +802,8 @@ pub(crate) struct BatchParts<T> {
771
802
lower : Antichain < T > ,
772
803
blob : Arc < dyn Blob + Send + Sync > ,
773
804
isolated_runtime : Arc < IsolatedRuntime > ,
774
- writing_parts : VecDeque < JoinHandle < HollowBatchPart > > ,
775
- finished_parts : Vec < HollowBatchPart > ,
805
+ writing_parts : VecDeque < JoinHandle < HollowBatchPart < T > > > ,
806
+ finished_parts : Vec < HollowBatchPart < T > > ,
776
807
batch_metrics : BatchWriteMetrics ,
777
808
}
778
809
@@ -906,6 +937,7 @@ impl<T: Timestamp + Codec64> BatchParts<T> {
906
937
encoded_size_bytes : payload_len,
907
938
key_lower,
908
939
stats,
940
+ ts_rewrite : None ,
909
941
}
910
942
}
911
943
. instrument ( write_span) ,
@@ -927,7 +959,7 @@ impl<T: Timestamp + Codec64> BatchParts<T> {
927
959
}
928
960
929
961
#[ instrument( level = "debug" , name = "batch::finish_upload" , fields( shard = %self . shard_id) ) ]
930
- pub ( crate ) async fn finish ( self ) -> Vec < HollowBatchPart > {
962
+ pub ( crate ) async fn finish ( self ) -> Vec < HollowBatchPart < T > > {
931
963
let mut parts = self . finished_parts ;
932
964
for handle in self . writing_parts {
933
965
let part = handle. wait_and_assert_finished ( ) . await ;
@@ -938,9 +970,39 @@ impl<T: Timestamp + Codec64> BatchParts<T> {
938
970
}
939
971
940
972
pub ( crate ) fn validate_truncate_batch < T : Timestamp > (
941
- batch : & Description < T > ,
973
+ batch : & HollowBatch < T > ,
942
974
truncate : & Description < T > ,
975
+ any_batch_rewrite : bool ,
943
976
) -> Result < ( ) , InvalidUsage < T > > {
977
+ // If rewrite_ts is used, we don't allow truncation, to keep things simpler
978
+ // to reason about.
979
+ if any_batch_rewrite {
980
+ // We allow a new upper to be specified at rewrite time, so that's easy:
981
+ // it must match exactly. This is both consistent with the upper
982
+ // requirement below and proves that there is no data to truncate past
983
+ // the upper.
984
+ if truncate. upper ( ) != batch. desc . upper ( ) {
985
+ return Err ( InvalidUsage :: InvalidRewrite ( format ! (
986
+ "rewritten batch might have data past {:?} up to {:?}" ,
987
+ truncate. upper( ) . elements( ) ,
988
+ batch. desc. upper( ) . elements( ) ,
989
+ ) ) ) ;
990
+ }
991
+ // To prove that there is no data to truncate below the lower, require
992
+ // that the lower is <= the rewrite ts.
993
+ for part in batch. parts . iter ( ) {
994
+ let part_lower_bound = part. ts_rewrite . as_ref ( ) . unwrap_or ( batch. desc . lower ( ) ) ;
995
+ if !PartialOrder :: less_equal ( truncate. lower ( ) , part_lower_bound) {
996
+ return Err ( InvalidUsage :: InvalidRewrite ( format ! (
997
+ "rewritten batch might have data below {:?} at {:?}" ,
998
+ truncate. lower( ) . elements( ) ,
999
+ part_lower_bound. elements( ) ,
1000
+ ) ) ) ;
1001
+ }
1002
+ }
1003
+ }
1004
+
1005
+ let batch = & batch. desc ;
944
1006
if !PartialOrder :: less_equal ( batch. lower ( ) , truncate. lower ( ) )
945
1007
|| PartialOrder :: less_than ( batch. upper ( ) , truncate. upper ( ) )
946
1008
{
@@ -958,7 +1020,7 @@ pub(crate) fn validate_truncate_batch<T: Timestamp>(
958
1020
mod tests {
959
1021
use crate :: cache:: PersistClientCache ;
960
1022
use crate :: internal:: paths:: { BlobKey , PartialBlobKey } ;
961
- use crate :: tests:: { all_ok, CodecProduct } ;
1023
+ use crate :: tests:: { all_ok, new_test_client , CodecProduct } ;
962
1024
use crate :: PersistLocation ;
963
1025
964
1026
use super :: * ;
@@ -1167,4 +1229,32 @@ mod tests {
1167
1229
assert ! ( untrimmable. should_retain( "ww-XYZ" ) ) ;
1168
1230
assert ! ( !untrimmable. should_retain( "xya" ) ) ;
1169
1231
}
1232
+
1233
+ // NB: Most edge cases are exercised in datadriven tests.
1234
+ #[ mz_ore:: test( tokio:: test) ]
1235
+ #[ cfg_attr( miri, ignore) ] // too slow
1236
+ async fn rewrite_ts_example ( ) {
1237
+ let client = new_test_client ( ) . await ;
1238
+ let ( mut write, read) = client
1239
+ . expect_open :: < String , ( ) , u64 , i64 > ( ShardId :: new ( ) )
1240
+ . await ;
1241
+
1242
+ let mut batch = write. builder ( Antichain :: from_elem ( 0 ) ) ;
1243
+ batch. add ( & "foo" . to_owned ( ) , & ( ) , & 0 , & 1 ) . await . unwrap ( ) ;
1244
+ let batch = batch. finish ( Antichain :: from_elem ( 1 ) ) . await . unwrap ( ) ;
1245
+
1246
+ // Roundtrip through a transmittable batch.
1247
+ let batch = batch. into_transmittable_batch ( ) ;
1248
+ let mut batch = write. batch_from_transmittable_batch ( batch) ;
1249
+ batch
1250
+ . rewrite_ts ( & Antichain :: from_elem ( 2 ) , Antichain :: from_elem ( 3 ) )
1251
+ . unwrap ( ) ;
1252
+ write
1253
+ . expect_compare_and_append_batch ( & mut [ & mut batch] , 0 , 3 )
1254
+ . await ;
1255
+
1256
+ let ( actual, _) = read. expect_listen ( 0 ) . await . read_until ( & 3 ) . await ;
1257
+ let expected = vec ! [ ( ( ( Ok ( "foo" . to_owned( ) ) ) , Ok ( ( ) ) ) , 2 , 1 ) ] ;
1258
+ assert_eq ! ( actual, expected) ;
1259
+ }
1170
1260
}
0 commit comments