1
1
from abc import ABC , abstractmethod
2
2
import sys
3
+ from textwrap import dedent
3
4
from typing import (
4
5
IO ,
5
6
TYPE_CHECKING ,
15
16
from pandas ._config import get_option
16
17
17
18
from pandas ._typing import Dtype , FrameOrSeriesUnion
19
+ from pandas .util ._decorators import doc
18
20
19
21
from pandas .core .indexes .api import Index
20
22
25
27
from pandas .core .frame import DataFrame
26
28
27
29
30
+ frame_max_cols_sub = dedent (
31
+ """\
32
+ max_cols : int, optional
33
+ When to switch from the verbose to the truncated output. If the
34
+ DataFrame has more than `max_cols` columns, the truncated output
35
+ is used. By default, the setting in
36
+ ``pandas.options.display.max_info_columns`` is used."""
37
+ )
38
+
39
+
40
+ frame_null_counts_sub = dedent (
41
+ """\
42
+ null_counts : bool, optional
43
+ Whether to show the non-null counts. By default, this is shown
44
+ only if the DataFrame is smaller than
45
+ ``pandas.options.display.max_info_rows`` and
46
+ ``pandas.options.display.max_info_columns``. A value of True always
47
+ shows the counts, and False never shows the counts."""
48
+ )
49
+
50
+
51
+ frame_examples_sub = dedent (
52
+ """\
53
+ >>> int_values = [1, 2, 3, 4, 5]
54
+ >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
55
+ >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
56
+ >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,
57
+ ... "float_col": float_values})
58
+ >>> df
59
+ int_col text_col float_col
60
+ 0 1 alpha 0.00
61
+ 1 2 beta 0.25
62
+ 2 3 gamma 0.50
63
+ 3 4 delta 0.75
64
+ 4 5 epsilon 1.00
65
+
66
+ Prints information of all columns:
67
+
68
+ >>> df.info(verbose=True)
69
+ <class 'pandas.core.frame.DataFrame'>
70
+ RangeIndex: 5 entries, 0 to 4
71
+ Data columns (total 3 columns):
72
+ # Column Non-Null Count Dtype
73
+ --- ------ -------------- -----
74
+ 0 int_col 5 non-null int64
75
+ 1 text_col 5 non-null object
76
+ 2 float_col 5 non-null float64
77
+ dtypes: float64(1), int64(1), object(1)
78
+ memory usage: 248.0+ bytes
79
+
80
+ Prints a summary of columns count and its dtypes but not per column
81
+ information:
82
+
83
+ >>> df.info(verbose=False)
84
+ <class 'pandas.core.frame.DataFrame'>
85
+ RangeIndex: 5 entries, 0 to 4
86
+ Columns: 3 entries, int_col to float_col
87
+ dtypes: float64(1), int64(1), object(1)
88
+ memory usage: 248.0+ bytes
89
+
90
+ Pipe output of DataFrame.info to buffer instead of sys.stdout, get
91
+ buffer content and writes to a text file:
92
+
93
+ >>> import io
94
+ >>> buffer = io.StringIO()
95
+ >>> df.info(buf=buffer)
96
+ >>> s = buffer.getvalue()
97
+ >>> with open("df_info.txt", "w",
98
+ ... encoding="utf-8") as f: # doctest: +SKIP
99
+ ... f.write(s)
100
+ 260
101
+
102
+ The `memory_usage` parameter allows deep introspection mode, specially
103
+ useful for big DataFrames and fine-tune memory optimization:
104
+
105
+ >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
106
+ >>> df = pd.DataFrame({
107
+ ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),
108
+ ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),
109
+ ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
110
+ ... })
111
+ >>> df.info()
112
+ <class 'pandas.core.frame.DataFrame'>
113
+ RangeIndex: 1000000 entries, 0 to 999999
114
+ Data columns (total 3 columns):
115
+ # Column Non-Null Count Dtype
116
+ --- ------ -------------- -----
117
+ 0 column_1 1000000 non-null object
118
+ 1 column_2 1000000 non-null object
119
+ 2 column_3 1000000 non-null object
120
+ dtypes: object(3)
121
+ memory usage: 22.9+ MB
122
+
123
+ >>> df.info(memory_usage='deep')
124
+ <class 'pandas.core.frame.DataFrame'>
125
+ RangeIndex: 1000000 entries, 0 to 999999
126
+ Data columns (total 3 columns):
127
+ # Column Non-Null Count Dtype
128
+ --- ------ -------------- -----
129
+ 0 column_1 1000000 non-null object
130
+ 1 column_2 1000000 non-null object
131
+ 2 column_3 1000000 non-null object
132
+ dtypes: object(3)
133
+ memory usage: 165.9 MB"""
134
+ )
135
+
136
+
137
+ frame_see_also_sub = dedent (
138
+ """\
139
+ DataFrame.describe: Generate descriptive statistics of DataFrame
140
+ columns.
141
+ DataFrame.memory_usage: Memory usage of DataFrame columns."""
142
+ )
143
+
144
+
145
+ frame_subs = {
146
+ "klass" : "DataFrame" ,
147
+ "type_sub" : " and columns" ,
148
+ "max_cols_sub" : frame_max_cols_sub ,
149
+ "null_counts_sub" : frame_null_counts_sub ,
150
+ "examples_sub" : frame_examples_sub ,
151
+ "see_also_sub" : frame_see_also_sub ,
152
+ "version_added_sub" : "" ,
153
+ }
154
+
155
+
28
156
def _put_str (s : Union [str , Dtype ], space : int ) -> str :
29
157
"""
30
158
Make string of specified length, padding to the right if necessary.
@@ -172,26 +300,26 @@ def render(
172
300
show_counts : Optional [bool ],
173
301
) -> None :
174
302
"""
175
- Print a concise summary of a %( klass)s .
303
+ Print a concise summary of a { klass} .
176
304
177
- This method prints information about a %( klass)s including
178
- the index dtype%( type_sub)s , non-null values and memory usage.
179
- %( version_added_sub)s \
305
+ This method prints information about a { klass} including
306
+ the index dtype{ type_sub} , non-null values and memory usage.
307
+ { version_added_sub} \
180
308
181
309
Parameters
182
310
----------
183
- data : %( klass)s
184
- %( klass)s to print information about.
311
+ data : { klass}
312
+ { klass} to print information about.
185
313
verbose : bool, optional
186
314
Whether to print the full summary. By default, the setting in
187
315
``pandas.options.display.max_info_columns`` is followed.
188
316
buf : writable buffer, defaults to sys.stdout
189
317
Where to send the output. By default, the output is printed to
190
318
sys.stdout. Pass a writable buffer if you need to further process
191
319
the output.
192
- %( max_cols_sub)s
320
+ { max_cols_sub}
193
321
memory_usage : bool, str, optional
194
- Specifies whether total memory usage of the %( klass)s
322
+ Specifies whether total memory usage of the { klass}
195
323
elements (including the index) should be displayed. By default,
196
324
this follows the ``pandas.options.display.memory_usage`` setting.
197
325
@@ -203,20 +331,20 @@ def render(
203
331
consume the same memory amount for corresponding dtypes. With deep
204
332
memory introspection, a real memory usage calculation is performed
205
333
at the cost of computational resources.
206
- %( null_counts_sub)s
334
+ { null_counts_sub}
207
335
208
336
Returns
209
337
-------
210
338
None
211
- This method prints a summary of a %( klass)s and returns None.
339
+ This method prints a summary of a { klass} and returns None.
212
340
213
341
See Also
214
342
--------
215
- %( see_also_sub)s
343
+ { see_also_sub}
216
344
217
345
Examples
218
346
--------
219
- %( examples_sub)s
347
+ { examples_sub}
220
348
"""
221
349
222
350
@@ -279,6 +407,10 @@ def memory_usage_bytes(self) -> int:
279
407
deep = False
280
408
return self .data .memory_usage (index = True , deep = deep ).sum ()
281
409
410
+ @doc (
411
+ BaseInfo .render ,
412
+ ** frame_subs ,
413
+ )
282
414
def render (
283
415
self ,
284
416
* ,
0 commit comments