Update data frame examples.

anthony-tuininga · anthony-tuininga · commit 1a072f729f17 · 2025-02-24T15:56:03.000-07:00
diff --git a/doc/src/user_guide/sql_execution.rst b/doc/src/user_guide/sql_execution.rst
@@ -907,6 +907,7 @@ org/docs/reference/api/pandas.DataFrame.html#pandas.DataFrame>`__ is:
 .. code-block:: python
 
     import pandas
+    import pyarrow
 
     # Get an OracleDataFrame
     # Adjust arraysize to tune the query fetch performance
@@ -915,7 +916,9 @@ org/docs/reference/api/pandas.DataFrame.html#pandas.DataFrame>`__ is:
     odf = connection.fetch_df_all(statement=sql, parameters=[myid], arraysize=1000)
 
     # Get a Pandas DataFrame from the data.
-    df = pandas.api.interchange.from_dataframe(odf)
+    df = pyarrow.Table.from_arrays(
+        odf.column_arrays(), names=odf.column_names()
+    ).to_pandas()
 
     # Perform various Pandas operations on the DataFrame
     print(df.T)        # transform
diff --git a/samples/dataframe_pandas.py b/samples/dataframe_pandas.py
@@ -30,6 +30,8 @@
 # -----------------------------------------------------------------------------
 
 import pandas
+import pyarrow
+
 import oracledb
 import sample_env
 
@@ -46,12 +48,18 @@
 
 SQL = "select id, name from SampleQueryTab order by id"
 
+# -----------------------------------------------------------------------------
+#
+# Fetching all records
+
 # Get an OracleDataFrame.
 # Adjust arraysize to tune the query fetch performance
 odf = connection.fetch_df_all(statement=SQL, arraysize=100)
 
-# Get a Pandas DataFrame from the data.
-df = pandas.api.interchange.from_dataframe(odf)
+# Get a Pandas DataFrame from the data
+df = pyarrow.Table.from_arrays(
+    odf.column_arrays(), names=odf.column_names()
+).to_pandas()
 
 # Perform various Pandas operations on the DataFrame
 
@@ -68,8 +76,8 @@
 print(df.T)
 
 # -----------------------------------------------------------------------------
-
-# An example of batch fetching
+#
+# Batch record fetching
 #
 # Note that since this particular example ends up with all query rows being
 # held in memory, it would be more efficient to use fetch_df_all() as shown
@@ -81,9 +89,14 @@
 # Tune 'size' for your data set. Here it is small to show the batch fetch
 # behavior on the sample table.
 for odf in connection.fetch_df_batches(statement=SQL, size=10):
-    df_b = pandas.api.interchange.from_dataframe(odf)
+    df_b = pyarrow.Table.from_arrays(
+        odf.column_arrays(), names=odf.column_names()
+    ).to_pandas()
     print(f"Appending {df_b.shape[0]} rows")
     df = pandas.concat([df, df_b], ignore_index=True)
 
+r, c = df.shape
+print(f"{r} rows, {c} columns")
+
 print("\nLast three rows:")
 print(df.tail(3))
diff --git a/samples/dataframe_pandas_async.py b/samples/dataframe_pandas_async.py
@@ -36,6 +36,8 @@
 import asyncio
 
 import pandas
+import pyarrow
+
 import oracledb
 import sample_env
 
@@ -50,12 +52,18 @@ async def main():
 
     SQL = "select id, name from SampleQueryTab order by id"
 
+    # -------------------------------------------------------------------------
+    #
+    # Fetching all records
+
     # Get an OracleDataFrame.
     # Adjust arraysize to tune the query fetch performance
     odf = await connection.fetch_df_all(statement=SQL, arraysize=100)
 
-    # Get a Pandas DataFrame from the data.
-    df = pandas.api.interchange.from_dataframe(odf)
+    # Get a Pandas DataFrame from the data
+    df = pyarrow.Table.from_arrays(
+        odf.column_arrays(), names=odf.column_names()
+    ).to_pandas()
 
     # Perform various Pandas operations on the DataFrame
 
@@ -72,8 +80,8 @@ async def main():
     print(df.T)
 
     # -------------------------------------------------------------------------
-
-    # An example of batch fetching
+    #
+    # Batch record fetching
     #
     # Note that since this particular example ends up with all query rows being
     # held in memory, it would be more efficient to use fetch_df_all() as shown
@@ -85,10 +93,15 @@ async def main():
     # Tune 'size' for your data set. Here it is small to show the batch fetch
     # behavior on the sample table.
     async for odf in connection.fetch_df_batches(statement=SQL, size=10):
-        df_b = pandas.api.interchange.from_dataframe(odf)
+        df_b = pyarrow.Table.from_arrays(
+            odf.column_arrays(), names=odf.column_names()
+        ).to_pandas()
         print(f"Appending {df_b.shape[0]} rows")
         df = pandas.concat([df, df_b], ignore_index=True)
 
+    r, c = df.shape
+    print(f"{r} rows, {c} columns")
+
     print("\nLast three rows:")
     print(df.tail(3))
 
diff --git a/samples/dataframe_polars.py b/samples/dataframe_polars.py
@@ -25,8 +25,8 @@
 # -----------------------------------------------------------------------------
 # dataframe_polars.py
 #
-# Shows how to use connection.fetch_df_all() to efficiently put data into a
-# Polars Series
+# Shows how to use connection.fetch_df_all() to efficiently put data into
+# Polars DataFrames and Series.
 # -----------------------------------------------------------------------------
 
 import pyarrow
@@ -46,19 +46,48 @@
     params=sample_env.get_connect_params(),
 )
 
-SQL = "select id from SampleQueryTab order by id"
+# -----------------------------------------------------------------------------
+#
+# Polars DataFrame
+
+SQL1 = "select * from SampleQueryTab order by id"
+
+# Get an OracleDataFrame
+# Adjust arraysize to tune the query fetch performance
+odf = connection.fetch_df_all(statement=SQL1, arraysize=100)
+
+# Convert to a Polars DataFrame
+pyarrow_table = pyarrow.Table.from_arrays(
+    odf.column_arrays(), names=odf.column_names()
+)
+p = polars.from_arrow(pyarrow_table)
+
+print(type(p))  # <class 'polars.dataframe.frame.DataFrame'>
+
+r, c = p.shape
+print(f"{r} rows, {c} columns")
+
+print("\nSum:")
+print(p.sum())
+
+# -----------------------------------------------------------------------------
+#
+# Polars Series
+
+SQL2 = "select id from SampleQueryTab order by id"
 
 # Get an OracleDataFrame
 # Adjust arraysize to tune the query fetch performance
-odf = connection.fetch_df_all(statement=SQL, arraysize=100)
+odf = connection.fetch_df_all(statement=SQL2, arraysize=100)
 
 # Convert to a Polars Series
 pyarrow_array = pyarrow.array(odf.get_column_by_name("ID"))
 p = polars.from_arrow(pyarrow_array)
 
 print(type(p))  # <class 'polars.series.series.Series'>
 
-# Perform various Polars operations on the Series
+(r,) = p.shape
+print(f"{r} rows")
 
 print("\nSum:")
 print(p.sum())
diff --git a/samples/dataframe_pyarrow.py b/samples/dataframe_pyarrow.py
@@ -46,6 +46,7 @@
 )
 
 # -----------------------------------------------------------------------------
+#
 # Creating a PyArrow table
 
 SQL1 = "select id, name from SampleQueryTab order by id"
@@ -72,6 +73,7 @@
 print(f"{r} rows, {c} columns")
 
 # -----------------------------------------------------------------------------
+#
 # Creating a PyArrow array
 
 SQL2 = "select id from SampleQueryTab order by id"