Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add more complete format string implementation for argstrings #754

Merged
merged 5 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions pydra/engine/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,12 +649,9 @@ def argstr_formatting(argstr, inputs, value_updates=None):
if value_updates:
inputs_dict.update(value_updates)
# getting all fields that should be formatted, i.e. {field_name}, ...
inp_fields = re.findall(r"{\w+}", argstr)
inp_fields_float = re.findall(r"{\w+:[0-9.]+f}", argstr)
inp_fields += [re.sub(":[0-9.]+f", "", el) for el in inp_fields_float]
inp_fields = parse_format_string(argstr)
val_dict = {}
for fld in inp_fields:
fld_name = fld[1:-1] # extracting the name form {field_name}
for fld_name in inp_fields:
fld_value = inputs_dict[fld_name]
fld_attr = getattr(attrs.fields(type(inputs)), fld_name)
if fld_value is attr.NOTHING or (
Expand Down Expand Up @@ -738,3 +735,22 @@ def parse_copyfile(fld: attr.Attribute, default_collation=FileSet.CopyCollation.
f"Unrecognised type for collation copyfile metadata of {fld}, {collation}"
)
return mode, collation


def parse_format_string(fmtstr):
"""Parse a argstr format string and return all keywords used in it."""
identifier = r"[a-zA-Z_]\w*"
attribute = rf"\.{identifier}"
item = r"\[\w+\]"
# Example: var.attr[key][0].attr2 (capture "var")
field_with_lookups = (
f"({identifier})(?:{attribute}|{item})*" # Capture only the keyword
)
conversion = "(?:!r|!s)"
nobrace = "[^{}]*"
# Example: 0{pads[hex]}x (capture "pads")
fmtspec = f"{nobrace}(?:{{({identifier}){nobrace}}}{nobrace})?" # Capture keywords in spec
full_field = f"{{{field_with_lookups}{conversion}?(?::{fmtspec})?}}"

all_keywords = re.findall(full_field, fmtstr)
return set().union(*all_keywords) - {""}
55 changes: 53 additions & 2 deletions pydra/engine/tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from pathlib import Path
import random
import platform
import typing as ty
import pytest
import attrs
import cloudpickle as cp
from unittest.mock import Mock
from fileformats.generic import Directory, File
Expand All @@ -15,9 +17,10 @@
load_and_run,
position_sort,
parse_copyfile,
argstr_formatting,
parse_format_string,
)
from ...utils.hash import hash_function
from .. import helpers_file
from ..core import Workflow


Expand Down Expand Up @@ -50,7 +53,7 @@ def test_hash_file(tmpdir):
with open(outdir / "test.file", "w") as fp:
fp.write("test")
assert (
hash_function(File(outdir / "test.file")) == "37fcc546dce7e59585f3217bb4c30299"
hash_function(File(outdir / "test.file")) == "f32ab20c4a86616e32bf2504e1ac5a22"
)


Expand Down Expand Up @@ -311,3 +314,51 @@ def mock_field(copyfile):
parse_copyfile(mock_field((1, 2)))
with pytest.raises(TypeError, match="Unrecognised type for collation copyfile"):
parse_copyfile(mock_field((Mode.copy, 2)))


def test_argstr_formatting():
@attrs.define
class Inputs:
a1_field: str
b2_field: float
c3_field: ty.Dict[str, str]
d4_field: ty.List[str]

inputs = Inputs("1", 2.0, {"c": "3"}, ["4"])
assert (
argstr_formatting(
"{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}",
effigies marked this conversation as resolved.
Show resolved Hide resolved
inputs,
)
== "1 2.000000 -test 3 -me 4"
)


def test_parse_format_string1():
assert parse_format_string("{a}") == {"a"}


def test_parse_format_string2():
assert parse_format_string("{abc}") == {"abc"}


def test_parse_format_string3():
assert parse_format_string("{a:{b}}") == {"a", "b"}


def test_parse_format_string4():
assert parse_format_string("{a:{b[2]}}") == {"a", "b"}


def test_parse_format_string5():
assert parse_format_string("{a.xyz[somekey].abc:{b[a][b].d[0]}}") == {"a", "b"}


def test_parse_format_string6():
assert parse_format_string("{a:05{b[a 2][b].e}}") == {"a", "b"}


def test_parse_format_string7():
assert parse_format_string(
"{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}"
) == {"a1_field", "b2_field", "c3_field", "d4_field"}
12 changes: 6 additions & 6 deletions pydra/engine/tests/test_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_input_file_hash_1(tmp_path):
fields = [("in_file", File)]
input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,))
inputs = make_klass(input_spec)
assert inputs(in_file=outfile).hash == "0e9306e5cae1de1b4dff1f27cca03bce"
assert inputs(in_file=outfile).hash == "02fa5f6f1bbde7f25349f54335e1adaf"


def test_input_file_hash_2(tmp_path):
Expand All @@ -154,7 +154,7 @@ def test_input_file_hash_2(tmp_path):

# checking specific hash value
hash1 = inputs(in_file=file).hash
assert hash1 == "17e4e2b4d8ce8f36bf3fd65804958dbb"
assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3"

# checking if different name doesn't affect the hash
file_diffname = tmp_path / "in_file_2.txt"
Expand Down Expand Up @@ -185,7 +185,7 @@ def test_input_file_hash_2a(tmp_path):

# checking specific hash value
hash1 = inputs(in_file=file).hash
assert hash1 == "17e4e2b4d8ce8f36bf3fd65804958dbb"
assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3"

# checking if different name doesn't affect the hash
file_diffname = tmp_path / "in_file_2.txt"
Expand All @@ -204,7 +204,7 @@ def test_input_file_hash_2a(tmp_path):

# checking if string is also accepted
hash4 = inputs(in_file=str(file)).hash
assert hash4 == "aee7c7ae25509fb4c92a081d58d17a67"
assert hash4 == "800af2b5b334c9e3e5c40c0e49b7ffb5"


def test_input_file_hash_3(tmp_path):
Expand Down Expand Up @@ -278,7 +278,7 @@ def test_input_file_hash_4(tmp_path):

# checking specific hash value
hash1 = inputs(in_file=[[file, 3]]).hash
assert hash1 == "11b7e9c90bc8d9dc5ccfc8d4526ba091"
assert hash1 == "0693adbfac9f675af87e900065b1de00"

# the same file, but int field changes
hash1a = inputs(in_file=[[file, 5]]).hash
Expand Down Expand Up @@ -315,7 +315,7 @@ def test_input_file_hash_5(tmp_path):

# checking specific hash value
hash1 = inputs(in_file=[{"file": file, "int": 3}]).hash
assert hash1 == "5fd53b79e55bbf62a4bb3027eb753a2c"
assert hash1 == "56e6e2c9f3bdf0cd5bd3060046dea480"

# the same file, but int field changes
hash1a = inputs(in_file=[{"file": file, "int": 5}]).hash
Expand Down
Loading