Move assert_batches_eq! macros to test_utils.rs (#746)

alamb · web-flow · commit 8a17c183e044 · 2021-07-19T15:12:40.000-04:00
* Move assert_batches_eq! macros to test_utils.rs

* port test
diff --git a/datafusion/src/test/mod.rs b/datafusion/src/test/mod.rs
@@ -280,72 +280,3 @@ pub fn make_timestamps() -> RecordBatch {
 pub mod exec;
 pub mod user_defined;
 pub mod variable;
-
-/// Compares formatted output of a record batch with an expected
-/// vector of strings, with the result of pretty formatting record
-/// batches. This is a macro so errors appear on the correct line
-///
-/// Designed so that failure output can be directly copy/pasted
-/// into the test code as expected results.
-///
-/// Expects to be called about like this:
-///
-/// `assert_batch_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
-#[macro_export]
-macro_rules! assert_batches_eq {
-    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
-        let expected_lines: Vec<String> =
-            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
-
-        let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap();
-
-        let actual_lines: Vec<&str> = formatted.trim().lines().collect();
-
-        assert_eq!(
-            expected_lines, actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-    };
-}
-
-/// Compares formatted output of a record batch with an expected
-/// vector of strings in a way that order does not matter.
-/// This is a macro so errors appear on the correct line
-///
-/// Designed so that failure output can be directly copy/pasted
-/// into the test code as expected results.
-///
-/// Expects to be called about like this:
-///
-/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
-#[macro_export]
-macro_rules! assert_batches_sorted_eq {
-    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
-        let mut expected_lines: Vec<String> =
-            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
-
-        // sort except for header + footer
-        let num_lines = expected_lines.len();
-        if num_lines > 3 {
-            expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
-        }
-
-        let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap();
-        // fix for windows: \r\n -->
-
-        let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
-
-        // sort except for header + footer
-        let num_lines = actual_lines.len();
-        if num_lines > 3 {
-            actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
-        }
-
-        assert_eq!(
-            expected_lines, actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-    };
-}
diff --git a/datafusion/src/test_util.rs b/datafusion/src/test_util.rs
@@ -15,10 +15,79 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Utils to make testing easier
+//! Utility functions to make testing DataFusion based crates easier
 
 use std::{env, error::Error, path::PathBuf};
 
+/// Compares formatted output of a record batch with an expected
+/// vector of strings, with the result of pretty formatting record
+/// batches. This is a macro so errors appear on the correct line
+///
+/// Designed so that failure output can be directly copy/pasted
+/// into the test code as expected results.
+///
+/// Expects to be called about like this:
+///
+/// `assert_batch_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
+#[macro_export]
+macro_rules! assert_batches_eq {
+    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
+        let expected_lines: Vec<String> =
+            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
+
+        let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap();
+
+        let actual_lines: Vec<&str> = formatted.trim().lines().collect();
+
+        assert_eq!(
+            expected_lines, actual_lines,
+            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+            expected_lines, actual_lines
+        );
+    };
+}
+
+/// Compares formatted output of a record batch with an expected
+/// vector of strings in a way that order does not matter.
+/// This is a macro so errors appear on the correct line
+///
+/// Designed so that failure output can be directly copy/pasted
+/// into the test code as expected results.
+///
+/// Expects to be called about like this:
+///
+/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
+#[macro_export]
+macro_rules! assert_batches_sorted_eq {
+    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
+        let mut expected_lines: Vec<String> =
+            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
+
+        // sort except for header + footer
+        let num_lines = expected_lines.len();
+        if num_lines > 3 {
+            expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
+        }
+
+        let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap();
+        // fix for windows: \r\n -->
+
+        let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
+
+        // sort except for header + footer
+        let num_lines = actual_lines.len();
+        if num_lines > 3 {
+            actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
+        }
+
+        assert_eq!(
+            expected_lines, actual_lines,
+            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+            expected_lines, actual_lines
+        );
+    };
+}
+
 /// Returns the arrow test data directory, which is by default stored
 /// in a git submodule rooted at `testing/data`.
 ///
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
@@ -35,6 +35,7 @@ use arrow::{
     util::display::array_value_to_string,
 };
 
+use datafusion::assert_batches_eq;
 use datafusion::logical_plan::LogicalPlan;
 use datafusion::prelude::*;
 use datafusion::{
@@ -112,19 +113,23 @@ async fn parquet_query() {
     // NOTE that string_col is actually a binary column and does not have the UTF8 logical type
     // so we need an explicit cast
     let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain";
-    let actual = execute(&mut ctx, sql).await;
+    let actual = execute_to_batches(&mut ctx, sql).await;
     let expected = vec![
-        vec!["4", "0"],
-        vec!["5", "1"],
-        vec!["6", "0"],
-        vec!["7", "1"],
-        vec!["2", "0"],
-        vec!["3", "1"],
-        vec!["0", "0"],
-        vec!["1", "1"],
+        "+----+--------------------------+",
+        "| id | CAST(string_col AS Utf8) |",
+        "+----+--------------------------+",
+        "| 4  | 0                        |",
+        "| 5  | 1                        |",
+        "| 6  | 0                        |",
+        "| 7  | 1                        |",
+        "| 2  | 0                        |",
+        "| 3  | 1                        |",
+        "| 0  | 0                        |",
+        "| 1  | 1                        |",
+        "+----+--------------------------+",
     ];
 
-    assert_eq!(expected, actual);
+    assert_batches_eq!(expected, &actual);
 }
 
 #[tokio::test]
@@ -2487,7 +2492,7 @@ fn register_alltypes_parquet(ctx: &mut ExecutionContext) {
 
 /// Execute query and return result set as 2-d table of Vecs
 /// `result[row][column]`
-async fn execute(ctx: &mut ExecutionContext, sql: &str) -> Vec<Vec<String>> {
+async fn execute_to_batches(ctx: &mut ExecutionContext, sql: &str) -> Vec<RecordBatch> {
     let msg = format!("Creating logical plan for '{}'", sql);
     let plan = ctx.create_logical_plan(sql).expect(&msg);
     let logical_schema = plan.schema();
@@ -2503,8 +2508,13 @@ async fn execute(ctx: &mut ExecutionContext, sql: &str) -> Vec<Vec<String>> {
     let results = collect(plan).await.expect(&msg);
 
     assert_eq!(logical_schema.as_ref(), optimized_logical_schema.as_ref());
+    results
+}
 
-    result_vec(&results)
+/// Execute query and return result set as 2-d table of Vecs
+/// `result[row][column]`
+async fn execute(ctx: &mut ExecutionContext, sql: &str) -> Vec<Vec<String>> {
+    result_vec(&execute_to_batches(ctx, sql).await)
 }
 
 /// Specialised String representation