Skip to content

Commit 1a072f7

Browse files
Update data frame examples.
1 parent 2c70997 commit 1a072f7

File tree

5 files changed

+76
-16
lines changed

5 files changed

+76
-16
lines changed

doc/src/user_guide/sql_execution.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,7 @@ org/docs/reference/api/pandas.DataFrame.html#pandas.DataFrame>`__ is:
907907
.. code-block:: python
908908
909909
import pandas
910+
import pyarrow
910911
911912
# Get an OracleDataFrame
912913
# Adjust arraysize to tune the query fetch performance
@@ -915,7 +916,9 @@ org/docs/reference/api/pandas.DataFrame.html#pandas.DataFrame>`__ is:
915916
odf = connection.fetch_df_all(statement=sql, parameters=[myid], arraysize=1000)
916917
917918
# Get a Pandas DataFrame from the data.
918-
df = pandas.api.interchange.from_dataframe(odf)
919+
df = pyarrow.Table.from_arrays(
920+
odf.column_arrays(), names=odf.column_names()
921+
).to_pandas()
919922
920923
# Perform various Pandas operations on the DataFrame
921924
print(df.T) # transform

samples/dataframe_pandas.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
# -----------------------------------------------------------------------------
3131

3232
import pandas
33+
import pyarrow
34+
3335
import oracledb
3436
import sample_env
3537

@@ -46,12 +48,18 @@
4648

4749
SQL = "select id, name from SampleQueryTab order by id"
4850

51+
# -----------------------------------------------------------------------------
52+
#
53+
# Fetching all records
54+
4955
# Get an OracleDataFrame.
5056
# Adjust arraysize to tune the query fetch performance
5157
odf = connection.fetch_df_all(statement=SQL, arraysize=100)
5258

53-
# Get a Pandas DataFrame from the data.
54-
df = pandas.api.interchange.from_dataframe(odf)
59+
# Get a Pandas DataFrame from the data
60+
df = pyarrow.Table.from_arrays(
61+
odf.column_arrays(), names=odf.column_names()
62+
).to_pandas()
5563

5664
# Perform various Pandas operations on the DataFrame
5765

@@ -68,8 +76,8 @@
6876
print(df.T)
6977

7078
# -----------------------------------------------------------------------------
71-
72-
# An example of batch fetching
79+
#
80+
# Batch record fetching
7381
#
7482
# Note that since this particular example ends up with all query rows being
7583
# held in memory, it would be more efficient to use fetch_df_all() as shown
@@ -81,9 +89,14 @@
8189
# Tune 'size' for your data set. Here it is small to show the batch fetch
8290
# behavior on the sample table.
8391
for odf in connection.fetch_df_batches(statement=SQL, size=10):
84-
df_b = pandas.api.interchange.from_dataframe(odf)
92+
df_b = pyarrow.Table.from_arrays(
93+
odf.column_arrays(), names=odf.column_names()
94+
).to_pandas()
8595
print(f"Appending {df_b.shape[0]} rows")
8696
df = pandas.concat([df, df_b], ignore_index=True)
8797

98+
r, c = df.shape
99+
print(f"{r} rows, {c} columns")
100+
88101
print("\nLast three rows:")
89102
print(df.tail(3))

samples/dataframe_pandas_async.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
import asyncio
3737

3838
import pandas
39+
import pyarrow
40+
3941
import oracledb
4042
import sample_env
4143

@@ -50,12 +52,18 @@ async def main():
5052

5153
SQL = "select id, name from SampleQueryTab order by id"
5254

55+
# -------------------------------------------------------------------------
56+
#
57+
# Fetching all records
58+
5359
# Get an OracleDataFrame.
5460
# Adjust arraysize to tune the query fetch performance
5561
odf = await connection.fetch_df_all(statement=SQL, arraysize=100)
5662

57-
# Get a Pandas DataFrame from the data.
58-
df = pandas.api.interchange.from_dataframe(odf)
63+
# Get a Pandas DataFrame from the data
64+
df = pyarrow.Table.from_arrays(
65+
odf.column_arrays(), names=odf.column_names()
66+
).to_pandas()
5967

6068
# Perform various Pandas operations on the DataFrame
6169

@@ -72,8 +80,8 @@ async def main():
7280
print(df.T)
7381

7482
# -------------------------------------------------------------------------
75-
76-
# An example of batch fetching
83+
#
84+
# Batch record fetching
7785
#
7886
# Note that since this particular example ends up with all query rows being
7987
# held in memory, it would be more efficient to use fetch_df_all() as shown
@@ -85,10 +93,15 @@ async def main():
8593
# Tune 'size' for your data set. Here it is small to show the batch fetch
8694
# behavior on the sample table.
8795
async for odf in connection.fetch_df_batches(statement=SQL, size=10):
88-
df_b = pandas.api.interchange.from_dataframe(odf)
96+
df_b = pyarrow.Table.from_arrays(
97+
odf.column_arrays(), names=odf.column_names()
98+
).to_pandas()
8999
print(f"Appending {df_b.shape[0]} rows")
90100
df = pandas.concat([df, df_b], ignore_index=True)
91101

102+
r, c = df.shape
103+
print(f"{r} rows, {c} columns")
104+
92105
print("\nLast three rows:")
93106
print(df.tail(3))
94107

samples/dataframe_polars.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
# -----------------------------------------------------------------------------
2626
# dataframe_polars.py
2727
#
28-
# Shows how to use connection.fetch_df_all() to efficiently put data into a
29-
# Polars Series
28+
# Shows how to use connection.fetch_df_all() to efficiently put data into
29+
# Polars DataFrames and Series.
3030
# -----------------------------------------------------------------------------
3131

3232
import pyarrow
@@ -46,19 +46,48 @@
4646
params=sample_env.get_connect_params(),
4747
)
4848

49-
SQL = "select id from SampleQueryTab order by id"
49+
# -----------------------------------------------------------------------------
50+
#
51+
# Polars DataFrame
52+
53+
SQL1 = "select * from SampleQueryTab order by id"
54+
55+
# Get an OracleDataFrame
56+
# Adjust arraysize to tune the query fetch performance
57+
odf = connection.fetch_df_all(statement=SQL1, arraysize=100)
58+
59+
# Convert to a Polars DataFrame
60+
pyarrow_table = pyarrow.Table.from_arrays(
61+
odf.column_arrays(), names=odf.column_names()
62+
)
63+
p = polars.from_arrow(pyarrow_table)
64+
65+
print(type(p)) # <class 'polars.dataframe.frame.DataFrame'>
66+
67+
r, c = p.shape
68+
print(f"{r} rows, {c} columns")
69+
70+
print("\nSum:")
71+
print(p.sum())
72+
73+
# -----------------------------------------------------------------------------
74+
#
75+
# Polars Series
76+
77+
SQL2 = "select id from SampleQueryTab order by id"
5078

5179
# Get an OracleDataFrame
5280
# Adjust arraysize to tune the query fetch performance
53-
odf = connection.fetch_df_all(statement=SQL, arraysize=100)
81+
odf = connection.fetch_df_all(statement=SQL2, arraysize=100)
5482

5583
# Convert to a Polars Series
5684
pyarrow_array = pyarrow.array(odf.get_column_by_name("ID"))
5785
p = polars.from_arrow(pyarrow_array)
5886

5987
print(type(p)) # <class 'polars.series.series.Series'>
6088

61-
# Perform various Polars operations on the Series
89+
(r,) = p.shape
90+
print(f"{r} rows")
6291

6392
print("\nSum:")
6493
print(p.sum())

samples/dataframe_pyarrow.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
)
4747

4848
# -----------------------------------------------------------------------------
49+
#
4950
# Creating a PyArrow table
5051

5152
SQL1 = "select id, name from SampleQueryTab order by id"
@@ -72,6 +73,7 @@
7273
print(f"{r} rows, {c} columns")
7374

7475
# -----------------------------------------------------------------------------
76+
#
7577
# Creating a PyArrow array
7678

7779
SQL2 = "select id from SampleQueryTab order by id"

0 commit comments

Comments
 (0)