Skip to content

Commit

Permalink
replace usages of copy_arrays with memmap for asdf>=3.1.0 (
Browse files Browse the repository at this point in the history
…#940)

* replace usages of ``copy_arrays`` with ``memmap`` for ``asdf>=3.1.0``

* `pre-commit run -a`

* compare strings

* Update CHANGELOG.md

---------

Co-authored-by: Çağtay Fabry <cagtay.fabry@bam.de>
  • Loading branch information
zacharyburnett and CagtayFabry authored Aug 5, 2024
1 parent 35f92d7 commit 336c835
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 60 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
### Fixes

- rename (fix typo) argument to `lcs_child_in_parent` in `CoordinateSystemManager.add_cs` \[{pull}`936`\].

- replace usages of `pkg_resources` with `importlib.metadata` \[{pull}`941`\].
- replace usages of `copy_arrays` with `memmap` for `asdf>=3.1.0` \[{pull}`940`\].

### Dependencies

Expand Down
28 changes: 20 additions & 8 deletions weldx/asdf/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import copy
import importlib.metadata
import io
import pathlib
import warnings
Expand Down Expand Up @@ -42,12 +43,23 @@
__all__ = [
"WeldxFile",
"DEFAULT_ARRAY_COMPRESSION",
"DEFAULT_ARRAY_COPYING",
"DEFAULT_MEMORY_MAPPING",
"DEFAULT_ARRAY_INLINE_THRESHOLD",
"_PROTECTED_KEYS",
]


def asdf_open_memory_mapping_kwarg(memmap: bool) -> dict:
if tuple(importlib.metadata.version("asdf").split(".")) >= (
"3",
"1",
"0",
):
return {"memmap": memmap}
else:
return {"copy_arrays": not memmap}


@contextmanager
def reset_file_position(fh: SupportsFileReadWrite):
"""Reset the internal position of the given file after leaving the context.
Expand All @@ -66,8 +78,8 @@ def reset_file_position(fh: SupportsFileReadWrite):
DEFAULT_ARRAY_COMPRESSION = "input"
"""All arrays will be compressed using this algorithm, if not specified by user."""

DEFAULT_ARRAY_COPYING = True
"""Stored Arrays will be copied to memory, or not. If False, use memory mapping."""
DEFAULT_MEMORY_MAPPING = False
"""Stored Arrays will be memory-mapped, or not. If True, use memory mapping."""

DEFAULT_ARRAY_INLINE_THRESHOLD = 10
"""Arrays with less or equal elements will be inlined (stored as string, not binary)."""
Expand Down Expand Up @@ -148,8 +160,8 @@ class WeldxFile(_ProtectedViewDict):
- ``lz4``: Use lz4 compression.
- ``input``: Use the same compression as in the file read.
If there is no prior file, acts as None.
copy_arrays :
When `False`, when reading files, attempt to memory map (memmap) underlying data
memmap :
When `True`, when reading files, attempt to memory map (memmap) underlying data
arrays when possible. This avoids blowing the memory when working with very
large datasets.
array_inline_threshold :
Expand Down Expand Up @@ -219,19 +231,19 @@ def __init__(
) = None,
software_history_entry: Mapping = None,
compression: str = DEFAULT_ARRAY_COMPRESSION,
copy_arrays: bool = DEFAULT_ARRAY_COPYING,
memmap: bool = DEFAULT_MEMORY_MAPPING,
array_inline_threshold: int = DEFAULT_ARRAY_INLINE_THRESHOLD,
):
if write_kwargs is None:
write_kwargs = dict(all_array_compression=compression)

if asdffile_kwargs is None:
asdffile_kwargs = dict(copy_arrays=copy_arrays)
asdffile_kwargs = asdf_open_memory_mapping_kwarg(memmap=memmap)

# this parameter is now (asdf-2.8) a asdf.config parameter, so we store it here.
self._array_inline_threshold = array_inline_threshold

# TODO: ensure no mismatching args for compression and copy_arrays.
# TODO: ensure no mismatching args for compression and memmap.
self._write_kwargs = write_kwargs
self._asdffile_kwargs = asdffile_kwargs

Expand Down
17 changes: 12 additions & 5 deletions weldx/asdf/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import importlib.metadata
from collections.abc import Callable, Hashable, Mapping, MutableMapping, Set
from contextlib import contextmanager
from io import BytesIO, TextIOBase
Expand Down Expand Up @@ -149,7 +150,7 @@ def read_buffer_context(
Buffer containing ASDF file contents
open_kwargs
Additional keywords to pass to `asdf.AsdfFile.open`
Extensions are always set, ``copy_arrays=True`` is set by default.
Extensions are always set, ``memmap=False`` is set by default.
Returns
-------
Expand All @@ -158,7 +159,13 @@ def read_buffer_context(
"""
if open_kwargs is None:
open_kwargs = {"copy_arrays": True, "lazy_load": False}
open_kwargs = {"memmap": False, "lazy_load": False}

if "memmap" in open_kwargs and tuple(
importlib.metadata.version("asdf").split(".")
) < ("3", "1", "0"):
open_kwargs["copy_arrays"] = not open_kwargs["memmap"]
del open_kwargs["memmap"]

buffer.seek(0)

Expand Down Expand Up @@ -190,7 +197,7 @@ def read_buffer(
Buffer containing ASDF file contents
open_kwargs
Additional keywords to pass to `asdf.AsdfFile.open`
Extensions are always set, ``copy_arrays=True`` is set by default.
Extensions are always set, ``memmap=False`` is set by default.
Returns
-------
Expand Down Expand Up @@ -220,7 +227,7 @@ def write_read_buffer_context(
Extensions are always set.
open_kwargs
Additional keywords to pass to `asdf.AsdfFile.open`
Extensions are always set, ``copy_arrays=True`` is set by default.
Extensions are always set, ``memmap=False`` is set by default.
Returns
-------
Expand Down Expand Up @@ -248,7 +255,7 @@ def write_read_buffer(
Extensions are always set.
open_kwargs
Additional keywords to pass to `asdf.AsdfFile.open`
Extensions are always set, ``copy_arrays=True`` is set by default.
Extensions are always set, ``memmap=False`` is set by default.
Returns
-------
Expand Down
Loading

0 comments on commit 336c835

Please sign in to comment.