add boundary test to better understand get_columns method

dbt-labs · mikealfare · Sep 10, 2024 · Aug 26, 2024 · Aug 26, 2024 · Aug 29, 2024
commit e2c2579eb7fa22a74d3a545d576be48d3b89611f
@@ -2,5 +2,5 @@ kind: Features
 body: Remove `pg_catalog` from metadata queries
 time: 2024-08-26T12:39:54.481505-04:00
 custom:
-  Author: mikealfare
+  Author: mikealfare, jiezhen-chen
   Issue: "555"
@@ -498,19 +498,18 @@ def columns_in_relation(self, relation) -> List[Dict[str, Any]]:
 
     @staticmethod
     def _parse_column_results(record: Tuple[Any, ...]) -> Dict[str, Any]:
-        # column positions in the tuple
-        column_name = 3
-        dtype_code = 4
-        dtype_name = 5
-        column_size = 6
-        decimals = 8
+        _, _, _, column_name, dtype_code, dtype_name, column_size, _, decimals, *_ = record
 
         char_dtypes = [1, 12]
-        num_dtypes = [2, 3, 4, 5, 6, 7, 8]
-        return {
-            "column": record[column_name],
-            "dtype": record[dtype_name],
-            "char_size": record[column_size] if record[dtype_code] in char_dtypes else None,
-            "numeric_precision": record[column_size] if record[dtype_code] in num_dtypes else None,
-            "numeric_scale": record[decimals] if record[dtype_code] in num_dtypes else None,
-        }
+        num_dtypes = [2, 3, 4, 5, 6, 7, 8, -5, 2003]
+
+        if dtype_code in char_dtypes:
+            return {"column": column_name, "dtype": dtype_name, "char_size": column_size}
+        elif dtype_code in num_dtypes:
+            return {
+                "column": column_name,
+                "dtype": dtype_name,
+                "numeric_precision": column_size,
+                "numeric_scale": decimals,
+            }
+        return {"column": column_name, "dtype": dtype_name, "char_size": column_size}
@@ -0,0 +1,28 @@
+from datetime import datetime
+import os
+import random
+
+import pytest
+import redshift_connector
+
+
+@pytest.fixture
+def connection() -> redshift_connector.Connection:
+    return redshift_connector.connect(
+        user=os.getenv("REDSHIFT_TEST_USER"),
+        password=os.getenv("REDSHIFT_TEST_PASS"),
+        host=os.getenv("REDSHIFT_TEST_HOST"),
+        port=int(os.getenv("REDSHIFT_TEST_PORT")),
+        database=os.getenv("REDSHIFT_TEST_DBNAME"),
+        region=os.getenv("REDSHIFT_TEST_REGION"),
+    )
+
+
+@pytest.fixture
+def schema_name(request) -> str:
+    runtime = datetime.utcnow() - datetime(1970, 1, 1, 0, 0, 0)
+    runtime_s = int(runtime.total_seconds())
+    runtime_ms = runtime.microseconds
+    random_int = random.randint(0, 9999)
+    file_name = request.module.__name__.split(".")[-1]
+    return f"test_{runtime_s}{runtime_ms}{random_int:04}_{file_name}"
@@ -0,0 +1,43 @@
+import pytest
+
+
+@pytest.fixture
+def schema(connection, schema_name) -> str:
+    with connection.cursor() as cursor:
+        cursor.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
+    yield schema_name
+    with connection.cursor() as cursor:
+        cursor.execute(f"DROP SCHEMA IF EXISTS {schema_name} CASCADE")
+
+
+def test_columns_in_relation(connection, schema):
+    table = "cross_db"
+    with connection.cursor() as cursor:
+        cursor.execute(f"CREATE TABLE {schema}.{table} as select 3.14 as id")
+        columns = cursor.get_columns(
+            schema_pattern=schema,
+            tablename_pattern=table,
+        )
+
+    assert len(columns) == 1
+    column = columns[0]
+
+    (
+        database_name,
+        schema_name,
+        table_name,
+        column_name,
+        type_code,
+        type_name,
+        precision,
+        _,
+        scale,
+        *_,
+    ) = column
+    assert schema_name == schema
+    assert table_name == table
+    assert column_name == "id"
+    assert type_code == 2
+    assert type_name == "numeric"
+    assert precision == 3
+    assert scale == 2
@@ -0,0 +1 @@
+# supports namespacing during test discovery