Skip to content

Commit 5151e33

Browse files
committed
DOC: move info docs to DataFrameInfo
1 parent 336cc6c commit 5151e33

File tree

2 files changed

+146
-126
lines changed

2 files changed

+146
-126
lines changed

pandas/core/frame.py

Lines changed: 2 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@
159159

160160
from pandas.io.common import get_handle
161161
from pandas.io.formats import console, format as fmt
162-
from pandas.io.formats.info import BaseInfo, DataFrameInfo
162+
from pandas.io.formats.info import DataFrameInfo
163163
import pandas.plotting
164164

165165
if TYPE_CHECKING:
@@ -2520,119 +2520,7 @@ def to_html(
25202520
)
25212521

25222522
# ----------------------------------------------------------------------
2523-
@Substitution(
2524-
klass="DataFrame",
2525-
type_sub=" and columns",
2526-
max_cols_sub=dedent(
2527-
"""\
2528-
max_cols : int, optional
2529-
When to switch from the verbose to the truncated output. If the
2530-
DataFrame has more than `max_cols` columns, the truncated output
2531-
is used. By default, the setting in
2532-
``pandas.options.display.max_info_columns`` is used."""
2533-
),
2534-
null_counts_sub=dedent(
2535-
"""\
2536-
null_counts : bool, optional
2537-
Whether to show the non-null counts. By default, this is shown
2538-
only if the DataFrame is smaller than
2539-
``pandas.options.display.max_info_rows`` and
2540-
``pandas.options.display.max_info_columns``. A value of True always
2541-
shows the counts, and False never shows the counts."""
2542-
),
2543-
examples_sub=dedent(
2544-
"""\
2545-
>>> int_values = [1, 2, 3, 4, 5]
2546-
>>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
2547-
>>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
2548-
>>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,
2549-
... "float_col": float_values})
2550-
>>> df
2551-
int_col text_col float_col
2552-
0 1 alpha 0.00
2553-
1 2 beta 0.25
2554-
2 3 gamma 0.50
2555-
3 4 delta 0.75
2556-
4 5 epsilon 1.00
2557-
2558-
Prints information of all columns:
2559-
2560-
>>> df.info(verbose=True)
2561-
<class 'pandas.core.frame.DataFrame'>
2562-
RangeIndex: 5 entries, 0 to 4
2563-
Data columns (total 3 columns):
2564-
# Column Non-Null Count Dtype
2565-
--- ------ -------------- -----
2566-
0 int_col 5 non-null int64
2567-
1 text_col 5 non-null object
2568-
2 float_col 5 non-null float64
2569-
dtypes: float64(1), int64(1), object(1)
2570-
memory usage: 248.0+ bytes
2571-
2572-
Prints a summary of columns count and its dtypes but not per column
2573-
information:
2574-
2575-
>>> df.info(verbose=False)
2576-
<class 'pandas.core.frame.DataFrame'>
2577-
RangeIndex: 5 entries, 0 to 4
2578-
Columns: 3 entries, int_col to float_col
2579-
dtypes: float64(1), int64(1), object(1)
2580-
memory usage: 248.0+ bytes
2581-
2582-
Pipe output of DataFrame.info to buffer instead of sys.stdout, get
2583-
buffer content and writes to a text file:
2584-
2585-
>>> import io
2586-
>>> buffer = io.StringIO()
2587-
>>> df.info(buf=buffer)
2588-
>>> s = buffer.getvalue()
2589-
>>> with open("df_info.txt", "w",
2590-
... encoding="utf-8") as f: # doctest: +SKIP
2591-
... f.write(s)
2592-
260
2593-
2594-
The `memory_usage` parameter allows deep introspection mode, specially
2595-
useful for big DataFrames and fine-tune memory optimization:
2596-
2597-
>>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
2598-
>>> df = pd.DataFrame({
2599-
... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),
2600-
... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),
2601-
... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
2602-
... })
2603-
>>> df.info()
2604-
<class 'pandas.core.frame.DataFrame'>
2605-
RangeIndex: 1000000 entries, 0 to 999999
2606-
Data columns (total 3 columns):
2607-
# Column Non-Null Count Dtype
2608-
--- ------ -------------- -----
2609-
0 column_1 1000000 non-null object
2610-
1 column_2 1000000 non-null object
2611-
2 column_3 1000000 non-null object
2612-
dtypes: object(3)
2613-
memory usage: 22.9+ MB
2614-
2615-
>>> df.info(memory_usage='deep')
2616-
<class 'pandas.core.frame.DataFrame'>
2617-
RangeIndex: 1000000 entries, 0 to 999999
2618-
Data columns (total 3 columns):
2619-
# Column Non-Null Count Dtype
2620-
--- ------ -------------- -----
2621-
0 column_1 1000000 non-null object
2622-
1 column_2 1000000 non-null object
2623-
2 column_3 1000000 non-null object
2624-
dtypes: object(3)
2625-
memory usage: 165.9 MB"""
2626-
),
2627-
see_also_sub=dedent(
2628-
"""\
2629-
DataFrame.describe: Generate descriptive statistics of DataFrame
2630-
columns.
2631-
DataFrame.memory_usage: Memory usage of DataFrame columns."""
2632-
),
2633-
version_added_sub="",
2634-
)
2635-
@doc(BaseInfo.render)
2523+
@doc(DataFrameInfo.render)
26362524
def info(
26372525
self,
26382526
verbose: Optional[bool] = None,

pandas/io/formats/info.py

Lines changed: 144 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from abc import ABC, abstractmethod
22
import sys
3+
from textwrap import dedent
34
from typing import (
45
IO,
56
TYPE_CHECKING,
@@ -15,6 +16,7 @@
1516
from pandas._config import get_option
1617

1718
from pandas._typing import Dtype, FrameOrSeriesUnion
19+
from pandas.util._decorators import doc
1820

1921
from pandas.core.indexes.api import Index
2022

@@ -25,6 +27,132 @@
2527
from pandas.core.frame import DataFrame
2628

2729

30+
frame_max_cols_sub = dedent(
31+
"""\
32+
max_cols : int, optional
33+
When to switch from the verbose to the truncated output. If the
34+
DataFrame has more than `max_cols` columns, the truncated output
35+
is used. By default, the setting in
36+
``pandas.options.display.max_info_columns`` is used."""
37+
)
38+
39+
40+
frame_null_counts_sub = dedent(
41+
"""\
42+
null_counts : bool, optional
43+
Whether to show the non-null counts. By default, this is shown
44+
only if the DataFrame is smaller than
45+
``pandas.options.display.max_info_rows`` and
46+
``pandas.options.display.max_info_columns``. A value of True always
47+
shows the counts, and False never shows the counts."""
48+
)
49+
50+
51+
frame_examples_sub = dedent(
52+
"""\
53+
>>> int_values = [1, 2, 3, 4, 5]
54+
>>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
55+
>>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
56+
>>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,
57+
... "float_col": float_values})
58+
>>> df
59+
int_col text_col float_col
60+
0 1 alpha 0.00
61+
1 2 beta 0.25
62+
2 3 gamma 0.50
63+
3 4 delta 0.75
64+
4 5 epsilon 1.00
65+
66+
Prints information of all columns:
67+
68+
>>> df.info(verbose=True)
69+
<class 'pandas.core.frame.DataFrame'>
70+
RangeIndex: 5 entries, 0 to 4
71+
Data columns (total 3 columns):
72+
# Column Non-Null Count Dtype
73+
--- ------ -------------- -----
74+
0 int_col 5 non-null int64
75+
1 text_col 5 non-null object
76+
2 float_col 5 non-null float64
77+
dtypes: float64(1), int64(1), object(1)
78+
memory usage: 248.0+ bytes
79+
80+
Prints a summary of columns count and its dtypes but not per column
81+
information:
82+
83+
>>> df.info(verbose=False)
84+
<class 'pandas.core.frame.DataFrame'>
85+
RangeIndex: 5 entries, 0 to 4
86+
Columns: 3 entries, int_col to float_col
87+
dtypes: float64(1), int64(1), object(1)
88+
memory usage: 248.0+ bytes
89+
90+
Pipe output of DataFrame.info to buffer instead of sys.stdout, get
91+
buffer content and writes to a text file:
92+
93+
>>> import io
94+
>>> buffer = io.StringIO()
95+
>>> df.info(buf=buffer)
96+
>>> s = buffer.getvalue()
97+
>>> with open("df_info.txt", "w",
98+
... encoding="utf-8") as f: # doctest: +SKIP
99+
... f.write(s)
100+
260
101+
102+
The `memory_usage` parameter allows deep introspection mode, specially
103+
useful for big DataFrames and fine-tune memory optimization:
104+
105+
>>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
106+
>>> df = pd.DataFrame({
107+
... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),
108+
... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),
109+
... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
110+
... })
111+
>>> df.info()
112+
<class 'pandas.core.frame.DataFrame'>
113+
RangeIndex: 1000000 entries, 0 to 999999
114+
Data columns (total 3 columns):
115+
# Column Non-Null Count Dtype
116+
--- ------ -------------- -----
117+
0 column_1 1000000 non-null object
118+
1 column_2 1000000 non-null object
119+
2 column_3 1000000 non-null object
120+
dtypes: object(3)
121+
memory usage: 22.9+ MB
122+
123+
>>> df.info(memory_usage='deep')
124+
<class 'pandas.core.frame.DataFrame'>
125+
RangeIndex: 1000000 entries, 0 to 999999
126+
Data columns (total 3 columns):
127+
# Column Non-Null Count Dtype
128+
--- ------ -------------- -----
129+
0 column_1 1000000 non-null object
130+
1 column_2 1000000 non-null object
131+
2 column_3 1000000 non-null object
132+
dtypes: object(3)
133+
memory usage: 165.9 MB"""
134+
)
135+
136+
137+
frame_see_also_sub = dedent(
138+
"""\
139+
DataFrame.describe: Generate descriptive statistics of DataFrame
140+
columns.
141+
DataFrame.memory_usage: Memory usage of DataFrame columns."""
142+
)
143+
144+
145+
frame_subs = {
146+
"klass": "DataFrame",
147+
"type_sub": " and columns",
148+
"max_cols_sub": frame_max_cols_sub,
149+
"null_counts_sub": frame_null_counts_sub,
150+
"examples_sub": frame_examples_sub,
151+
"see_also_sub": frame_see_also_sub,
152+
"version_added_sub": "",
153+
}
154+
155+
28156
def _put_str(s: Union[str, Dtype], space: int) -> str:
29157
"""
30158
Make string of specified length, padding to the right if necessary.
@@ -172,26 +300,26 @@ def render(
172300
show_counts: Optional[bool],
173301
) -> None:
174302
"""
175-
Print a concise summary of a %(klass)s.
303+
Print a concise summary of a {klass}.
176304
177-
This method prints information about a %(klass)s including
178-
the index dtype%(type_sub)s, non-null values and memory usage.
179-
%(version_added_sub)s\
305+
This method prints information about a {klass} including
306+
the index dtype{type_sub}, non-null values and memory usage.
307+
{version_added_sub}\
180308
181309
Parameters
182310
----------
183-
data : %(klass)s
184-
%(klass)s to print information about.
311+
data : {klass}
312+
{klass} to print information about.
185313
verbose : bool, optional
186314
Whether to print the full summary. By default, the setting in
187315
``pandas.options.display.max_info_columns`` is followed.
188316
buf : writable buffer, defaults to sys.stdout
189317
Where to send the output. By default, the output is printed to
190318
sys.stdout. Pass a writable buffer if you need to further process
191319
the output.
192-
%(max_cols_sub)s
320+
{max_cols_sub}
193321
memory_usage : bool, str, optional
194-
Specifies whether total memory usage of the %(klass)s
322+
Specifies whether total memory usage of the {klass}
195323
elements (including the index) should be displayed. By default,
196324
this follows the ``pandas.options.display.memory_usage`` setting.
197325
@@ -203,20 +331,20 @@ def render(
203331
consume the same memory amount for corresponding dtypes. With deep
204332
memory introspection, a real memory usage calculation is performed
205333
at the cost of computational resources.
206-
%(null_counts_sub)s
334+
{null_counts_sub}
207335
208336
Returns
209337
-------
210338
None
211-
This method prints a summary of a %(klass)s and returns None.
339+
This method prints a summary of a {klass} and returns None.
212340
213341
See Also
214342
--------
215-
%(see_also_sub)s
343+
{see_also_sub}
216344
217345
Examples
218346
--------
219-
%(examples_sub)s
347+
{examples_sub}
220348
"""
221349

222350

@@ -279,6 +407,10 @@ def memory_usage_bytes(self) -> int:
279407
deep = False
280408
return self.data.memory_usage(index=True, deep=deep).sum()
281409

410+
@doc(
411+
BaseInfo.render,
412+
**frame_subs,
413+
)
282414
def render(
283415
self,
284416
*,

0 commit comments

Comments
 (0)