Skip to content

Commit

Permalink
Merge branch 'release-7.0.5'
Browse files Browse the repository at this point in the history
  • Loading branch information
mpenkov committed Oct 4, 2024
2 parents 623ea92 + cf4b7fc commit 497541c
Show file tree
Hide file tree
Showing 17 changed files with 241 additions and 103 deletions.
113 changes: 52 additions & 61 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
name: Test
on: [push, pull_request]
concurrency: # https://stackoverflow.com/questions/66335225#comment133398800_72408109
group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
linters:
runs-on: ubuntu-latest
Expand All @@ -12,10 +15,10 @@ jobs:
python-version: "3.11"

- name: Update pip
run: python -m pip install -U pip
run: pip install -U pip

- name: Install dependencies
run: python -m pip install flake8
run: pip install flake8

- name: Run flake8 linter (source)
run: flake8 --show-source smart_open
Expand All @@ -26,15 +29,17 @@ jobs:
strategy:
matrix:
include:
- {python: '3.8', os: ubuntu-20.04}
- {python: '3.9', os: ubuntu-20.04}
- {python: '3.10', os: ubuntu-20.04}
- {python: '3.11', os: ubuntu-20.04}

- {python: '3.8', os: windows-2019}
- {python: '3.9', os: windows-2019}
- {python: '3.10', os: windows-2019}
- {python: '3.11', os: windows-2019}
- {python-version: '3.8', os: ubuntu-20.04}
- {python-version: '3.9', os: ubuntu-20.04}
- {python-version: '3.10', os: ubuntu-20.04}
- {python-version: '3.11', os: ubuntu-20.04}
- {python-version: '3.12', os: ubuntu-20.04}

- {python-version: '3.8', os: windows-2019}
- {python-version: '3.9', os: windows-2019}
- {python-version: '3.10', os: windows-2019}
- {python-version: '3.11', os: windows-2019}
- {python-version: '3.12', os: windows-2019}
steps:
- uses: actions/checkout@v2

Expand All @@ -43,13 +48,7 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Update pip
run: python -m pip install -U pip

#
# https://askubuntu.com/questions/1428181/module-lib-has-no-attribute-x509-v-flag-cb-issuer-check
#
- name: Upgrade PyOpenSSL
run: python -m pip install pyOpenSSL --upgrade
run: pip install -U pip

- name: Install smart_open without dependencies
run: pip install -e .
Expand All @@ -69,19 +68,20 @@ jobs:
strategy:
matrix:
include:
- {python: '3.8', os: ubuntu-20.04}
- {python: '3.9', os: ubuntu-20.04}
- {python: '3.10', os: ubuntu-20.04}
- {python: '3.11', os: ubuntu-20.04}
- {python-version: '3.8', os: ubuntu-20.04}
- {python-version: '3.9', os: ubuntu-20.04}
- {python-version: '3.10', os: ubuntu-20.04}
- {python-version: '3.11', os: ubuntu-20.04}
- {python-version: '3.12', os: ubuntu-20.04}

#
# Some of the doctests don't pass on Windows because of Windows-specific
# character encoding issues.
#
# - {python: '3.7', os: windows-2019}
# - {python: '3.8', os: windows-2019}
# - {python: '3.9', os: windows-2019}
# - {python: '3.10', os: windows-2019}
# - {python-version: '3.7', os: windows-2019}
# - {python-version: '3.8', os: windows-2019}
# - {python-version: '3.9', os: windows-2019}
# - {python-version: '3.10', os: windows-2019}

steps:
- uses: actions/checkout@v2
Expand All @@ -91,10 +91,7 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Update pip
run: python -m pip install -U pip

- name: Upgrade PyOpenSSL
run: python -m pip install pyOpenSSL --upgrade
run: pip install -U pip

- name: Install smart_open and its dependencies
run: pip install -e .[test]
Expand All @@ -111,17 +108,18 @@ jobs:
strategy:
matrix:
include:
- {python: '3.8', os: ubuntu-20.04}
- {python: '3.9', os: ubuntu-20.04}
- {python: '3.10', os: ubuntu-20.04}
- {python: '3.11', os: ubuntu-20.04}
- {python-version: '3.8', os: ubuntu-20.04}
- {python-version: '3.9', os: ubuntu-20.04}
- {python-version: '3.10', os: ubuntu-20.04}
- {python-version: '3.11', os: ubuntu-20.04}
- {python-version: '3.12', os: ubuntu-20.04}

# Not sure why we exclude these, perhaps for historical reasons?
#
# - {python: '3.7', os: windows-2019}
# - {python: '3.8', os: windows-2019}
# - {python: '3.9', os: windows-2019}
# - {python: '3.10', os: windows-2019}
# - {python-version: '3.7', os: windows-2019}
# - {python-version: '3.8', os: windows-2019}
# - {python-version: '3.9', os: windows-2019}
# - {python-version: '3.10', os: windows-2019}

steps:
- uses: actions/checkout@v2
Expand All @@ -131,20 +129,17 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Update pip
run: python -m pip install -U pip

- name: Upgrade PyOpenSSL
run: python -m pip install pyOpenSSL --upgrade

- run: python -m pip install numpy
run: pip install -U pip

- name: Install smart_open and its dependencies
run: pip install -e .[test]

- run: bash ci_helpers/helpers.sh enable_moto_server
if: ${{ matrix.moto_server }}

- run: |

- name: Start vsftpd
timeout-minutes: 2
run: |
sudo apt-get install vsftpd
sudo bash ci_helpers/helpers.sh create_ftp_ftps_servers
Expand All @@ -156,7 +151,7 @@ jobs:

- run: bash ci_helpers/helpers.sh disable_moto_server
if: ${{ matrix.moto_server }}

- run: sudo bash ci_helpers/helpers.sh delete_ftp_ftps_servers

benchmarks:
Expand All @@ -165,15 +160,16 @@ jobs:
strategy:
matrix:
include:
- {python: '3.8', os: ubuntu-20.04}
- {python: '3.9', os: ubuntu-20.04}
- {python: '3.10', os: ubuntu-20.04}
- {python: '3.11', os: ubuntu-20.04}
- {python-version: '3.8', os: ubuntu-20.04}
- {python-version: '3.9', os: ubuntu-20.04}
- {python-version: '3.10', os: ubuntu-20.04}
- {python-version: '3.11', os: ubuntu-20.04}
- {python-version: '3.12', os: ubuntu-20.04}

# - {python: '3.7', os: windows-2019}
# - {python: '3.8', os: windows-2019}
# - {python: '3.9', os: windows-2019}
# - {python: '3.10', os: windows-2019}
# - {python-version: '3.7', os: windows-2019}
# - {python-version: '3.8', os: windows-2019}
# - {python-version: '3.9', os: windows-2019}
# - {python-version: '3.10', os: windows-2019}

steps:
- uses: actions/checkout@v2
Expand All @@ -183,16 +179,11 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Update pip
run: python -m pip install -U pip

- name: Upgrade PyOpenSSL
run: python -m pip install pyOpenSSL --upgrade
run: pip install -U pip

- name: Install smart_open and its dependencies
run: pip install -e .[test]

- run: pip install awscli pytest_benchmark

- name: Run benchmarks
run: python ci_helpers/run_benchmarks.py
env:
Expand Down
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# 7.0.5, 2024-10-04

- Fix zstd compression in ab mode (PR [#833](https://github.com/piskvorky/smart_open/pull/833), [@ddelange](https://github.com/ddelange))
- Fix close function not neing able to upload a compressed S3 (PR [#838](https://github.com/piskvorky/smart_open/pull/838), [@jbarragan-bridge](https://github.com/jbarragan-bridge))
- Fix test_http.request_callback (PR [#828](https://github.com/piskvorky/smart_open/pull/828), [@ddelange](https://github.com/ddelange))
- Update readline logic for azure to match s3 (PR [#826](https://github.com/piskvorky/smart_open/pull/826), [@quantumfusion](https://github.com/quantumfusion))
- Make http handler take an optional requests.Session (PR [#825](https://github.com/piskvorky/smart_open/pull/825), [@arondaniel](https://github.com/arondaniel))
- Ensure no side effects on SinglepartWriter exception (PR [#820](https://github.com/piskvorky/smart_open/pull/820), [@donsokolone](https://github.com/donsokolone))
- Add support for `get_blob_kwargs` to GCS blob read operations (PR [#817](https://github.com/piskvorky/smart_open/pull/817), [@thejcannon](https://github.com/thejcannon))

# 7.0.4, 2024-03-26

* Fix wb mode with zstd compression (PR [#815](https://github.com/piskvorky/smart_open/pull/815), [@djudd](https://github.com/djudd))
Expand Down Expand Up @@ -482,3 +492,4 @@ The old `smart_open.smart_open` function is deprecated, but continues to work as

- support for multistream bzip files (PR #9, @pombredanne)
- introduce this CHANGELOG

3 changes: 2 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,8 @@ GCS Advanced Usage

Additional keyword arguments can be propagated to the GCS open method (`docs <https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.blob.Blob#google_cloud_storage_blob_Blob_open>`__), which is used by ``smart_open`` under the hood, using the ``blob_open_kwargs`` transport parameter.

Additionally keyword arguments can be propagated to the GCS ``get_blob`` method (`docs <https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.bucket.Bucket#google_cloud_storage_bucket_Bucket_get_blob>`__) when in a read-mode, using the ``get_blob_kwargs`` transport parameter.

Additional blob properties (`docs <https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.blob.Blob#properties>`__) can be set before an upload, as long as they are not read-only, using the ``blob_properties`` transport parameter.

.. code-block:: python
Expand Down Expand Up @@ -507,4 +509,3 @@ issues or pull requests there. Suggestions, pull requests and improvements welco

``smart_open`` is open source software released under the `MIT license <https://github.com/piskvorky/smart_open/blob/master/LICENSE>`_.
Copyright (c) 2015-now `Radim Řehůřek <https://radimrehurek.com>`_.

5 changes: 5 additions & 0 deletions ci_helpers/helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ create_ftp_ftps_servers(){
mkdir $home_dir
useradd -p $(echo $pass | openssl passwd -1 -stdin) -d $home_dir $user
chown $user:$user $home_dir
openssl req -x509 -nodes -new -sha256 -days 10240 -newkey rsa:2048 -keyout /etc/vsftpd.key -out /etc/vsftpd.pem -subj "/C=ZA/CN=localhost"
chmod 755 /etc/vsftpd.key
chmod 755 /etc/vsftpd.pem

server_setup='''
listen=YES
Expand All @@ -32,6 +35,8 @@ chroot_local_user=YES
allow_writeable_chroot=YES'''

additional_ssl_setup='''
rsa_cert_file=/etc/vsftpd.pem
rsa_private_key_file=/etc/vsftpd.key
ssl_enable=YES
allow_anon_ssl=NO
force_local_data_ssl=NO
Expand Down
7 changes: 6 additions & 1 deletion integration-tests/test_ftp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from __future__ import unicode_literals
import pytest
from smart_open import open
import ssl
from functools import partial

# localhost has self-signed cert, see ci_helpers/helpers.sh:create_ftp_ftps_servers
ssl.create_default_context = partial(ssl.create_default_context, cafile="/etc/vsftpd.pem")


@pytest.fixture(params=[("ftp", 21), ("ftps", 90)])
Expand Down Expand Up @@ -81,4 +86,4 @@ def test_line_endings_binary(server_info):

with open(f"{server_type}://user:123@localhost:{port_num}/file4", "rb") as f:
for line in f:
assert B_CLRF in line
assert B_CLRF in line
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,12 @@ def read(fname):
tests_require = all_deps + [
'moto[server]',
'responses',
'boto3',
'pytest',
'pytest-rerunfailures',
'pytest_benchmark',
'awscli',
'pyopenssl',
'numpy',
]

setup(
Expand Down
26 changes: 12 additions & 14 deletions smart_open/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,24 +325,22 @@ def readline(self, limit=-1):
"""Read up to and including the next newline. Returns the bytes read."""
if limit != -1:
raise NotImplementedError('limits other than -1 not implemented yet')
the_line = io.BytesIO()

#
# A single line may span multiple buffers.
#
line = io.BytesIO()
while not (self._position == self._size and len(self._current_part) == 0):
#
# In the worst case, we're reading the unread part of self._current_part
# twice here, once in the if condition and once when calling index.
#
# This is sub-optimal, but better than the alternative: wrapping
# .index in a try..except, because that is slower.
#
remaining_buffer = self._current_part.peek()
if self._line_terminator in remaining_buffer:
next_newline = remaining_buffer.index(self._line_terminator)
the_line.write(self._read_from_buffer(next_newline + 1))
line_part = self._current_part.readline(self._line_terminator)
line.write(line_part)
self._position += len(line_part)

if line_part.endswith(self._line_terminator):
break
else:
the_line.write(self._read_from_buffer())
self._fill_buffer()
return the_line.getvalue()

return line.getvalue()

#
# Internal methods.
Expand Down
2 changes: 1 addition & 1 deletion smart_open/ftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def convert_transport_params_to_args(transport_params):
def _connect(hostname, username, port, password, secure_connection, transport_params):
kwargs = convert_transport_params_to_args(transport_params)
if secure_connection:
ssl_context = ssl.create_default_context(purpose=ssl.Purpose.CLIENT_AUTH)
ssl_context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH)
ftp = FTP_TLS(context=ssl_context, **kwargs)
else:
ftp = FTP(**kwargs)
Expand Down
10 changes: 9 additions & 1 deletion smart_open/gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def open(
buffer_size=None,
min_part_size=_DEFAULT_MIN_PART_SIZE,
client=None, # type: google.cloud.storage.Client
get_blob_kwargs=None,
blob_properties=None,
blob_open_kwargs=None,
):
Expand All @@ -78,6 +79,9 @@ def open(
The minimum part size for multipart uploads. For writing only.
client: google.cloud.storage.Client, optional
The GCS client to use when working with google-cloud-storage.
get_blob_kwargs: dict, optional
Additional keyword arguments to propagate to the bucket.get_blob
method of the google-cloud-storage library. For reading only.
blob_properties: dict, optional
Set properties on blob before writing. For writing only.
blob_open_kwargs: dict, optional
Expand All @@ -95,6 +99,7 @@ def open(
_blob = Reader(bucket=bucket_id,
key=blob_id,
client=client,
get_blob_kwargs=get_blob_kwargs,
blob_open_kwargs=blob_open_kwargs)

elif mode in (constants.WRITE_BINARY, 'w', 'wt'):
Expand All @@ -116,8 +121,11 @@ def Reader(bucket,
buffer_size=None,
line_terminator=None,
client=None,
get_blob_kwargs=None,
blob_open_kwargs=None):

if get_blob_kwargs is None:
get_blob_kwargs = {}
if blob_open_kwargs is None:
blob_open_kwargs = {}
if client is None:
Expand All @@ -128,7 +136,7 @@ def Reader(bucket,
warn_deprecated('line_terminator')

bkt = client.bucket(bucket)
blob = bkt.get_blob(key)
blob = bkt.get_blob(key, **get_blob_kwargs)

if blob is None:
raise google.cloud.exceptions.NotFound(f'blob {key} not found in {bucket}')
Expand Down
Loading

0 comments on commit 497541c

Please sign in to comment.