77use  crate :: action:: Txn ; 
88use  crate :: { DeltaTableError ,  DeltaTransactionError } ; 
99use  arrow:: record_batch:: RecordBatch ; 
10+ use  log:: * ; 
1011use  parquet:: arrow:: ArrowWriter ; 
1112use  parquet:: basic:: Compression ; 
1213use  parquet:: file:: properties:: WriterProperties ; 
@@ -16,23 +17,19 @@ use std::collections::HashMap;
1617use  std:: convert:: TryFrom ; 
1718use  std:: sync:: Arc ; 
1819
19- /** 
20-  * BufferedJSONWriter allows for buffering serde_json::Value rows before flushing to parquet files 
21-  * and a Delta transaction 
22-  */ 
23- pub  struct  BufferedJSONWriter  { 
20+ /// BufferedJsonWriter allows for buffering serde_json::Value rows before flushing to parquet files 
21+ /// and a Delta transaction 
22+ pub  struct  BufferedJsonWriter  { 
2423    table :  crate :: DeltaTable , 
2524    buffer :  HashMap < WriterPartition ,  Vec < Value > > , 
2625    schema :  arrow:: datatypes:: SchemaRef , 
2726    partitions :  Vec < String > , 
2827    txns :  Vec < Txn > , 
2928} 
3029
31- impl  BufferedJSONWriter  { 
32-     /** 
33-      * Attempt to construct the BufferedJSONWriter, will fail if the table's metadata is not 
34-      * present 
35-      */ 
30+ impl  BufferedJsonWriter  { 
31+     /// Attempt to construct the BufferedJsonWriter, will fail if the table's metadata is not 
32+ /// present 
3633pub  fn  try_new ( table :  crate :: DeltaTable )  -> Result < Self ,  DeltaTableError >  { 
3734        let  metadata = table. get_metadata ( ) ?. clone ( ) ; 
3835        let  schema = metadata. schema ; 
@@ -49,38 +46,32 @@ impl BufferedJSONWriter {
4946        } ) 
5047    } 
5148
52-     /** 
53-      * Return the total Values pending in the buffer 
54-      */ 
49+     /// Return the total Values pending in the buffer 
5550pub  fn  count ( & self ,  partitions :  & WriterPartition )  -> Option < usize >  { 
5651        self . buffer . get ( & partitions) . map ( |b| b. len ( ) ) 
5752    } 
5853
59-     /** 
60-      * Add a txn action to the buffer 
61-      */ 
54+     /// Add a txn action to the buffer 
6255pub  fn  record_txn ( & mut  self ,  txn :  Txn )  { 
6356        self . txns . push ( txn) ; 
6457    } 
6558
66-     /** 
67-      * Write a new Value into the buffer 
68-      */ 
59+     /// Write a new Value into the buffer 
6960pub  fn  write ( 
7061        & mut  self , 
7162        value :  Value , 
7263        partitions :  WriterPartition , 
7364    )  -> Result < ( ) ,  DeltaTableError >  { 
7465        match  partitions { 
7566            WriterPartition :: NoPartitions  => { 
76-                 if  self . partitions . len ( )  >  0  { 
67+                 if  ! self . partitions . is_empty ( )  { 
7768                    return  Err ( DeltaTableError :: SchemaMismatch  { 
7869                        msg :  "Table has partitions but noone were supplied on write" . to_string ( ) , 
7970                    } ) ; 
8071                } 
8172            } 
8273            WriterPartition :: KeyValues  {  .. }  => { 
83-                 if  self . partitions . len ( )  ==  0  { 
74+                 if  self . partitions . is_empty ( )  { 
8475                    return  Err ( DeltaTableError :: SchemaMismatch  { 
8576                        msg :  "Table has no partitions yet they were supplied on write" . to_string ( ) , 
8677                    } ) ; 
@@ -96,12 +87,10 @@ impl BufferedJSONWriter {
9687        Ok ( ( ) ) 
9788    } 
9889
99-     /** 
100-      * Flush the buffer, causing a write of parquet files for each set of partitioned information 
101-      * as well as any buffered txn actions 
102-      * 
103-      * This will create a single transaction in the delta transaction log 
104-      */ 
90+     /// Flush the buffer, causing a write of parquet files for each set of partitioned information 
91+ /// as well as any buffered txn actions 
92+ /// 
93+ /// This will create a single transaction in the delta transaction log 
10594pub  async  fn  flush ( & mut  self )  -> Result < ( ) ,  DeltaTransactionError >  { 
10695        use  arrow:: json:: reader:: Decoder ; 
10796
@@ -116,21 +105,32 @@ impl BufferedJSONWriter {
116105                . next_batch ( & mut  value_iter) 
117106                . map_err ( |source| DeltaTableError :: ArrowError  {  source } ) ?; 
118107
119-             if  record_batch. is_none ( )  { 
120-                 return  Ok ( ( ) ) ; 
108+             if  record_batch. is_some ( )  { 
109+                 let  mut  pb = ParquetBuffer :: try_new ( self . schema . clone ( ) ) ?; 
110+                 pb. write_batch ( & record_batch. unwrap ( ) ) ?; 
111+                 let  _metadata = pb. close ( ) ; 
112+                 parquet_bufs. push ( ( partitions. clone ( ) ,  pb. data ( ) ) ) ; 
113+             }  else  { 
114+                 warn ! ( "Attempted to flush an empty RecordBatch from the BufferedJsonWriter" ) ; 
121115            } 
122- 
123-             let  mut  pb = ParquetBuffer :: try_new ( self . schema . clone ( ) ) ?; 
124-             pb. write_batch ( & record_batch. unwrap ( ) ) ?; 
125-             let  _metadata = pb. close ( ) ; 
126-             parquet_bufs. push ( pb. data ( ) ) ; 
127116        } 
128117
129118        let  mut  dtx = self . table . create_transaction ( None ) ; 
119+         for  ( partitions,  buf)  in  parquet_bufs { 
120+             match  partitions { 
121+                 WriterPartition :: NoPartitions  => { 
122+                     dtx. add_file ( & buf,  None ) . await ?; 
123+                 } 
124+                 WriterPartition :: KeyValues  {  partitions }  => { 
125+                     dtx. add_file ( & buf,  Some ( partitions) ) . await ?; 
126+                 } 
127+             } 
128+         } 
129+ 
130130        dtx. add_actions ( 
131131            self . txns 
132132                . drain ( 0 ..) 
133-                 . map ( |t|  crate :: action:: Action :: txn ( t ) ) 
133+                 . map ( crate :: action:: Action :: txn) 
134134                . collect ( ) , 
135135        ) ; 
136136
@@ -200,9 +200,7 @@ impl<'a> Iterator for InMemValueIter<'a> {
200200    } 
201201} 
202202
203- /** 
204-  * The type of partition for a row being written to a writer 
205-  */ 
203+ /// The type of partition for a row being written to a writer 
206204#[ derive( Clone ,  Debug ,  Eq ,  Hash ,  PartialEq ) ]  
207205pub  enum  WriterPartition  { 
208206    /// The row is not partitioned 
@@ -221,7 +219,7 @@ mod tests {
221219    #[ tokio:: test]  
222220    async  fn  test_writer_buffer_nopartition ( )  { 
223221        let  table = crate :: open_table ( "./tests/data/delta-0.8.0" ) . await . unwrap ( ) ; 
224-         let  mut  writer = BufferedJSONWriter :: try_new ( table) . unwrap ( ) ; 
222+         let  mut  writer = BufferedJsonWriter :: try_new ( table) . unwrap ( ) ; 
225223        assert_eq ! ( writer. count( & WriterPartition :: NoPartitions ) ,  None ) ; 
226224        let  res = writer. write ( json ! ( { "hello" : "world" } ) ,  WriterPartition :: NoPartitions ) ; 
227225        assert ! ( res. is_ok( ) ) ; 
@@ -233,15 +231,15 @@ mod tests {
233231        let  table = crate :: open_table ( "./tests/data/delta-0.8.0-partitioned" ) 
234232            . await 
235233            . unwrap ( ) ; 
236-         let  mut  writer = BufferedJSONWriter :: try_new ( table) . unwrap ( ) ; 
234+         let  mut  writer = BufferedJsonWriter :: try_new ( table) . unwrap ( ) ; 
237235        let  res = writer. write ( json ! ( { "hello" : "world" } ) ,  WriterPartition :: NoPartitions ) ; 
238236        assert ! ( res. is_err( ) ) ; 
239237    } 
240238
241239    #[ tokio:: test]  
242240    async  fn  test_writer_write_partitions_to_nopartition ( )  { 
243241        let  table = crate :: open_table ( "./tests/data/delta-0.8.0" ) . await . unwrap ( ) ; 
244-         let  mut  writer = BufferedJSONWriter :: try_new ( table) . unwrap ( ) ; 
242+         let  mut  writer = BufferedJsonWriter :: try_new ( table) . unwrap ( ) ; 
245243        let  partitions = WriterPartition :: KeyValues  { 
246244            partitions :  vec ! [ ( "year" . to_string( ) ,  "2021" . to_string( ) ) ] , 
247245        } ; 
0 commit comments