-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
ENH: Allow compression in NDFrame.to_csv to be a dict with optional arguments (#26023) #26024
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
4e73dc4
ab7620d
2e782f9
83e8834
d238878
b41be54
60ea58c
8ba9082
0a3a9fd
a1cb3f7
af2a96c
5853a28
789751f
5b09e6f
68a2b4d
c856f50
8df6c81
40d0252
18a735d
103c877
b6c34bc
969d387
abfbc0f
04ae25d
9c22652
56a75c2
bbfea34
7717f16
779511e
780eb04
6c4e679
1b567c9
9324b63
7cf65ee
29374f3
6701aa4
0f5489d
e04138e
6f2bf00
865aa81
8d1deee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2920,10 +2920,10 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, | |
def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, | ||
columns=None, header=True, index=True, index_label=None, | ||
mode='w', encoding=None, | ||
compression: Union[str, Dict, None] = 'infer', quoting=None, | ||
quotechar='"', line_terminator=None, chunksize=None, | ||
tupleize_cols=None, date_format=None, doublequote=True, | ||
escapechar=None, decimal='.'): | ||
compression: Union[str, Dict, None] = 'infer', | ||
quoting=None, quotechar='"', line_terminator=None, | ||
chunksize=None, tupleize_cols=None, date_format=None, | ||
doublequote=True, escapechar=None, decimal='.'): | ||
r""" | ||
Write object to a comma-separated values (csv) file. | ||
|
||
|
@@ -2977,9 +2977,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, | |
compression mode is 'infer' and `path_or_buf` is path-like, then | ||
detect compression mode from the following extensions: '.gz', | ||
'.bz2', '.zip' or '.xz'. (otherwise no compression). If dict given | ||
and mode is 'zip' or inferred as 'zip', optional value at 'arcname' | ||
specifies name of file within ZIP archive, assuming equal to | ||
`path_or_buf` if not specified or None. | ||
and mode is 'zip' or inferred as 'zip', other entries passed as | ||
kwargs to ByteZipFile. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ByteZipFile is not user facing, see my comments above |
||
|
||
.. versionchanged:: 0.25.0 | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ | |
import lzma | ||
import mmap | ||
import os | ||
from typing import Dict, Tuple, Union | ||
from typing import Any, Dict, Tuple, Union | ||
from urllib.error import URLError # noqa | ||
from urllib.parse import ( # noqa | ||
urlencode, urljoin, urlparse as parse_url, uses_netloc, uses_params, | ||
|
@@ -254,7 +254,7 @@ def _get_compression_method(compression: Union[str, Dict, None]): | |
------ | ||
ValueError on dict missing 'method' key | ||
""" | ||
compression_args = {} # type: Dict | ||
compression_args = {} # type: Dict[str, Any] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this even necessary? Think creation in the conditional should suffice There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @WillAyd Is this in reference to the |
||
# Handle dict | ||
if isinstance(compression, dict): | ||
compression_args = compression.copy() | ||
|
@@ -319,8 +319,8 @@ def _infer_compression(filepath_or_buffer, compression): | |
|
||
|
||
def _get_handle(path_or_buf, mode, encoding=None, | ||
compression: Union[str, Dict, None] = None, memory_map=False, | ||
is_text=True): | ||
compression: Union[str, Dict, None] = None, | ||
memory_map=False, is_text=True): | ||
""" | ||
Get file handle for given path/buffer and mode. | ||
|
||
|
@@ -338,8 +338,7 @@ def _get_handle(path_or_buf, mode, encoding=None, | |
and `filepath_or_buffer` is path-like, then detect compression from | ||
the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise | ||
no compression). If dict and compression mode is 'zip' or inferred as | ||
'zip', optional value at key 'arcname' specifies the name of the file | ||
within ZIP archive at `path_or_buf`. | ||
'zip', other entries passed as kwargs to ByteZipFile. | ||
|
||
.. versionchanged:: 0.25.0 | ||
|
||
|
@@ -466,15 +465,18 @@ class BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore | |
""" | ||
# GH 17778 | ||
def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, | ||
arcname: Union[str, zipfile.ZipInfo, None] = None, **kwargs): | ||
archive_name: Union[str, zipfile.ZipInfo, None] = None, | ||
drew-heenan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
**kwargs): | ||
if mode in ['wb', 'rb']: | ||
mode = mode.replace('b', '') | ||
self.arcname = arcname | ||
self.archive_name = archive_name | ||
super().__init__(file, mode, compression, **kwargs) | ||
|
||
def write(self, data): | ||
arcname = self.filename if self.arcname is None else self.arcname | ||
super().writestr(arcname, data) | ||
archive_name = self.filename | ||
if self.archive_name is not None: | ||
archive_name = self.archive_name | ||
super().writestr(archive_name, data) | ||
|
||
@property | ||
def closed(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should always have types for Container classes like Dict. I think this should read
Optional[Union[str, Dict[str, str]]]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right; I think it should be
Optional[Union[str, Dict[str, Optional[str]]]]
, though, as both the method and archive name can beNone
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the use case for an optional value in the dict? Isn't the point of accepting one in the first place for it to always have a
method
key with astr
value associated with it?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@WillAyd If there's some
compression_method
, including theOptional
allowsmethod
to beNone
so we can havecompression
which is passed toto_csv
be, for example,instead of something more cumbersome like
which is admittedly not a huge difference, but passing
None
as eithermethod
orarchive_name
is supported regardless.The dict keys can also be any kwargs of
zipfile.ZipFile
(see theBytesZipFile
constructor), so some of those also expect strings, ints, bools and some are optional. I'd actually have the type beOptional[Union[str, Dict[str, Any]]]