Skip to content

Commit 7a4fe2a

Browse files
authored
Merge pull request #20 from pyiron/resources
Add Resolvers to unify finding of resources
2 parents 553d8d4 + cdc9123 commit 7a4fe2a

17 files changed

+394
-0
lines changed

pyiron_snippets/resources.py

Lines changed: 302 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
"""
2+
Classes to find data files and executables in global paths.
3+
"""
4+
5+
from abc import ABC, abstractmethod
6+
from collections.abc import Iterator, Iterable
7+
import os
8+
import os.path
9+
from fnmatch import fnmatch
10+
from glob import glob
11+
import re
12+
from typing import Any
13+
14+
if os.name == "nt":
15+
EXE_SUFFIX = "bat"
16+
else:
17+
EXE_SUFFIX = "sh"
18+
19+
20+
class ResourceNotFound(RuntimeError):
21+
pass
22+
23+
24+
class AbstractResolver(ABC):
25+
"""
26+
Interface for resolvers.
27+
28+
Implementations must define :meth:`._search`, taking a tuple of names to search for and yielding instances of any
29+
type. Implementations should pick a single type to yield, e.g. :class:`.ResourceResolver` always yields absolute
30+
paths, while :class:`.ExecutableResolver` always yields 2-tuples of a version tag and absolute paths.
31+
"""
32+
33+
@abstractmethod
34+
def _search(self, name: tuple[str]) -> Iterator[Any]:
35+
pass
36+
37+
def search(self, name: Iterable[str] | str = "*") -> Iterator[Any]:
38+
"""
39+
Yield all matches.
40+
41+
When `name` is given as an iterable, returned results match at least one of the `name` globs.
42+
43+
Args:
44+
name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those
45+
46+
Yields:
47+
object: resources matching `name`
48+
"""
49+
if name is not None and not isinstance(name, str):
50+
name = tuple(name)
51+
else:
52+
name = (name,)
53+
yield from self._search(name)
54+
55+
def list(self, name: Iterable[str] | str = "*") -> list[Any]:
56+
"""
57+
Return all matches.
58+
59+
Args:
60+
name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those
61+
62+
Returns:
63+
list: all matches returned by :meth:`.search`.
64+
"""
65+
return list(self.search(name))
66+
67+
def first(self, name: Iterable[str] | str = "*") -> Any:
68+
"""
69+
Return first match.
70+
71+
Args:
72+
name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those
73+
74+
Returns:
75+
object: the first match returned by :meth:`.search`.
76+
77+
Raises:
78+
:class:`~.ResourceNotFound`: if no matches are found.
79+
"""
80+
try:
81+
return next(iter(self.search(name)))
82+
except StopIteration:
83+
raise ResourceNotFound(f"Could not find {name} in {self}!") from None
84+
85+
def chain(self, *resolvers: "AbstractResolver") -> "ResolverChain":
86+
"""
87+
Return a new resolver that searches this and all given resolvers sequentially.
88+
89+
You will likely want to ensure that all given resolvers yield the same types and e.g. not mix ExecutableResolver
90+
and ResourceResolver, but this is not checked.
91+
92+
The advantage of using :meth:`.chain` rather than adding more paths to one resolver is when different paths have
93+
different internal sub structure, such as when combining resources from pyiron resources and conda data
94+
packages. When searching for lammps potential files, e.g. we have some folders that are set up as
95+
96+
<resources>/lammps/potentials/...
97+
98+
but iprpy conda package that ships the NIST potentials doesn't have the lammps/potentials
99+
100+
<iprpy>/...
101+
102+
With chaining we can do very easily
103+
104+
>>> ResourceResolver([<resources>], "lammps", "potentials").chain(
105+
... ResourceResolver([<iprpy>])) # doctest: +SKIP
106+
107+
without we'd need to modify the resource paths ourselves explicitly
108+
109+
>>> ResourceResolver([r + '/lammps/potentials' for r in <resources>] + [<iprpy>]) # doctest: +SKIP
110+
111+
which is a bit more awkward.
112+
113+
Args:
114+
resolvers (:class:`.AbstractResolver`): any number of sub resolvers
115+
116+
Returns:
117+
self: if `resolvers` is empty
118+
:class:`.ResolverChain`: otherwise
119+
"""
120+
if resolvers == ():
121+
return self
122+
return ResolverChain(self, *resolvers)
123+
124+
125+
class ResolverChain(AbstractResolver):
126+
"""
127+
A chain of resolvers. Matches are returned sequentially.
128+
"""
129+
130+
__slots__ = ("_resolvers",)
131+
132+
def __init__(self, *resolvers):
133+
"""
134+
Args:
135+
*resolvers (:class:`.AbstractResolver`): sub resolvers to use
136+
"""
137+
self._resolvers = resolvers
138+
139+
def _search(self, name):
140+
for resolver in self._resolvers:
141+
yield from resolver.search(name)
142+
143+
def __repr__(self):
144+
inner = ", ".join(repr(r) for r in self._resolvers)
145+
return f"{type(self).__name__}({inner})"
146+
147+
148+
class ResourceResolver(AbstractResolver):
149+
"""
150+
Generic resolver for files and directories.
151+
152+
Resources are expected to conform to the following format:
153+
<resource_path>/<module>/<subdir0>/<subdir1>/...
154+
155+
*All* entries within in this final `subdir` are yielded by :meth:`.search`, whether they are files or directories.
156+
Search results can be restricted by passing a (list of) globs. If a list is given, entries matching at least one of
157+
them are returned.
158+
159+
>>> res = ResourceResolver(..., "lammps")
160+
>>> res.list() # doctest: +SKIP
161+
[
162+
"bin",
163+
"potentials",
164+
"potentials.csv"
165+
]
166+
"""
167+
168+
__slots__ = "_resource_paths", "_module", "_subdirs"
169+
170+
def __init__(self, resource_paths, module, *subdirs):
171+
"""
172+
Args:
173+
resource_paths (list of str): base paths for resource locations
174+
module (str): name of the module
175+
*subdirs (str): additional sub directories to descend into
176+
"""
177+
self._resource_paths = resource_paths
178+
self._module = module
179+
self._subdirs = subdirs
180+
181+
def __repr__(self):
182+
inner = repr(self._resource_paths)
183+
inner += f", {repr(self._module)}"
184+
inner += ", ".join(repr(s) for s in self._subdirs)
185+
return f"{type(self).__name__}({inner})"
186+
187+
def _search(self, name):
188+
for p in self._resource_paths:
189+
sub = os.path.join(p, self._module, *self._subdirs)
190+
if os.path.exists(sub):
191+
for n in name:
192+
yield from sorted(glob(os.path.join(sub, n)))
193+
194+
195+
class ExecutableResolver(AbstractResolver):
196+
"""
197+
A resolver for executable scripts.
198+
199+
Executables are expected to conform to the following format:
200+
<resource_path>/<module>/bin/run_<code>_<version_string>.<suffix>
201+
202+
and have the executable bit set. :meth:`.search` yields tuples of version strings and full paths to the executable
203+
instead of plain strings.
204+
205+
>>> exe = ExecutableResolver(..., "lammps")
206+
>>> exe.list() # doctest: +SKIP
207+
[
208+
('v1', '/my/resources/lammps/bin/run_lammps_v1.sh),
209+
('v1_mpi', '/my/resources/lammps/bin/run_lammps_v1_mpi.sh),
210+
('v2_default', '/my/resources/lammps/bin/run_lammps_v2_default.sh),
211+
]
212+
>>> exe.default_version # doctest: +SKIP
213+
"v2_default"
214+
>>> exe.dict("v1*") # doctest: +SKIP
215+
{
216+
'v1': '/my/resources/lammps/bin/run_lammps_v1.sh),
217+
'v1_mpi': '/my/resources/lammps/bin/run_lammps_v1_mpi.sh)
218+
}
219+
"""
220+
221+
__slots__ = "_regex", "_resolver"
222+
223+
def __init__(self, resource_paths, code, module=None, suffix=EXE_SUFFIX):
224+
"""
225+
Args:
226+
resource_paths (list of str): base paths for resource locations
227+
code (str): name of the simulation code
228+
module (str): name of the module the code is part of, same as `code` by default
229+
suffix (str, optional): file ending; if `None`, 'bat' on Windows 'sh' elsewhere
230+
"""
231+
if suffix is None:
232+
suffix = EXE_SUFFIX
233+
if module is None:
234+
module = code
235+
self._regex = re.compile(f"run_{code}_(.*)\\.{suffix}$")
236+
self._glob = f"run_{code}_*.{suffix}"
237+
self._resolver = ResourceResolver(
238+
resource_paths,
239+
module,
240+
"bin",
241+
)
242+
243+
def __repr__(self):
244+
inner = repr(self._resolver._resource_paths)
245+
inner += f", {repr(self._glob)}"
246+
inner += f", {repr(self._resolver._module)}"
247+
# recover suffix
248+
inner += f", {repr(self._glob.split('.')[-1])}"
249+
return f"{type(self).__name__}({inner})"
250+
251+
def _search(self, name):
252+
seen = set()
253+
254+
def cond(path):
255+
isfile = os.path.isfile(path)
256+
isexec = os.access(
257+
path, os.X_OK, effective_ids=os.access in os.supports_effective_ids
258+
)
259+
return isfile and isexec
260+
261+
for path in filter(cond, self._resolver.search(self._glob)):
262+
# we know that the regex has to match, because we constrain the resolver with the glob
263+
version = self._regex.search(path).group(1)
264+
if version not in seen and any(fnmatch(version, n) for n in name):
265+
yield (version, path)
266+
seen.add(version)
267+
268+
def dict(self, name="*") -> dict[str, str]:
269+
"""
270+
Construct dict from :meth:`.search` results.
271+
272+
Args:
273+
name (str or list of str): glob(s) to filter the version strings
274+
275+
Returns:
276+
dict: mapping version strings to full paths
277+
"""
278+
return dict(self.search(name=name))
279+
280+
@property
281+
def available_versions(self):
282+
"""
283+
list of str: all found versions
284+
"""
285+
return [x[0] for x in self.search("*")]
286+
287+
@property
288+
def default_version(self):
289+
"""
290+
str: the first version found in resources
291+
292+
If a version matching `*default*` exists, the first matching is returned.
293+
294+
Raises:
295+
:class:`.ResourceNotFound`: if no executables are found at all
296+
"""
297+
try:
298+
return self.first("*default*")[0]
299+
except ResourceNotFound:
300+
pass
301+
# try again outside the except clause to avoid nested error in case this fails as well
302+
return self.first("*")[0]

tests/unit/static/resources/res1/module1/bin/run_code1_version1.bat

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code1_version1.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code1_version2.bat

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code1_version2.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code1_versionnonexec.bat

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code1_versionnonexec.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code2_version1.bat

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code2_version1.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code2_version2_default.bat

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code2_version2_default.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/wrong_format

Whitespace-only changes.

tests/unit/static/resources/res1/module1/data/empty.txt

Whitespace-only changes.

tests/unit/static/resources/res1/module3/empty.txt

Whitespace-only changes.

tests/unit/static/resources/res2/module2/data/empty.txt

Whitespace-only changes.

tests/unit/static/resources/res2/module3/empty.txt

Whitespace-only changes.

tests/unit/test_resources.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import os
2+
import os.path
3+
import unittest
4+
from pyiron_snippets.resources import ResourceNotFound, ResourceResolver, ExecutableResolver
5+
6+
class TestResolvers(unittest.TestCase):
7+
"""
8+
Class to test resolvers
9+
"""
10+
11+
@classmethod
12+
def setUpClass(cls):
13+
cls.static_path = os.path.join(os.path.dirname(__file__), "static", "resources")
14+
cls.res1 = os.path.join(cls.static_path, "res1")
15+
cls.res2 = os.path.join(cls.static_path, "res2")
16+
17+
def test_resource_resolver(self):
18+
res = ResourceResolver([self.res1], "module1")
19+
self.assertEqual(set(res.search()),
20+
{os.path.join(self.res1, "module1", "bin"),
21+
os.path.join(self.res1, "module1", "data")},
22+
"Simple search does not return all resources!")
23+
self.assertEqual(res.first(), tuple(res.search())[0],
24+
"first does not return first result!")
25+
self.assertEqual(list(res.search()), res.list(), "list not equal to search!")
26+
with self.assertRaises(ResourceNotFound, msg="first does not raise error on non existing resource!"):
27+
res.first("nonexisting")
28+
res = ResourceResolver([self.res1, self.res2], "module3")
29+
self.assertTrue(len(res.list("empty.txt")) == 2,
30+
msg="should find all instances of files with the same name.")
31+
32+
def test_order(self):
33+
"""search must return results in the order given by the resource paths."""
34+
self.assertTrue("res1" in ResourceResolver([self.res1, self.res2], "module3").first(),
35+
"resolver does not respect order of given resource paths!")
36+
self.assertTrue("res2" in ResourceResolver([self.res2, self.res1], "module3").first(),
37+
"resolver does not respect order of given resource paths!")
38+
self.assertEqual(tuple(os.path.basename(r) for r in ResourceResolver([self.res1], "module1").search()),
39+
tuple(sorted(("bin", "data"))),
40+
"search does not return results from the same folder in alphabetical order!")
41+
42+
def test_chain(self):
43+
"""chained resolvers must behave like normal resolvers."""
44+
chain = ResourceResolver([self.res1], "module3").chain(ResourceResolver([self.res2], "module3"))
45+
resol = ResourceResolver([self.res1, self.res2], "module3")
46+
47+
self.assertEqual(chain.first(), resol.first(),
48+
"first returns different result for chained and normal resolver!")
49+
self.assertEqual(tuple(chain.search()), tuple(resol.search()),
50+
"search returns different result for chained and normal resolver!")
51+
52+
self.assertIs(resol, resol.chain(), "Empty chain does not return the same resolver!")
53+
54+
def test_executable(self):
55+
for suffix in (None, "sh", "bat"):
56+
with self.subTest(suffix=suffix):
57+
res = ExecutableResolver([self.res1], code="code1", module="module1", suffix=suffix)
58+
if os.name != "nt":
59+
# no exec bits are present on windows it seems
60+
self.assertNotIn("versionnonexec", res.available_versions,
61+
"ExecutableResolver must not list scripts that are not executable.")
62+
self.assertNotIn("wrong_format", res.available_versions,
63+
"ExecutableResolver must not list scripts that do not follow the correct format.")
64+
self.assertEqual("version1", res.default_version,
65+
"default version should be chosen in alphabetical order if not explicitly set.")
66+
res = ExecutableResolver([self.res1], code="code2", module="module1", suffix=suffix)
67+
self.assertEqual(res.default_version, "version2_default",
68+
"default version should be chosen as explicitly set.")
69+
self.assertEqual(dict(res.search()), res.dict(), "dict not equal to search!")
70+
71+
def test_resource_resolver_subdirs(self):
72+
"""Resolver constructor should take any additional args to search sub directories."""
73+
res = ResourceResolver([self.res1], "module1", "bin")
74+
expected_results = {
75+
os.path.join(self.res1, "module1", "bin", path)
76+
for path in ("run_code1_versionnonexec.sh", "run_code1_version1.sh", "run_code1_version2.sh")
77+
}
78+
self.assertEqual(set(res.search("*code1*.sh")), expected_results,
79+
"Search with subdirectories does not return all resources!")
80+
81+
def test_resource_resolver_name_globs(self):
82+
res = ResourceResolver([self.res1], "module1", "bin")
83+
expected_results = {
84+
os.path.join(self.res1, "module1", "bin", "run_code1_version1.sh"),
85+
os.path.join(self.res1, "module1", "bin", "run_code1_version2.sh"),
86+
}
87+
results = set(res.search(["*code1*version1.sh", "*code1*sion2.sh"]))
88+
self.assertEqual(results, expected_results,
89+
"Search with multiple glob patterns does not return all resources!")
90+
91+
if __name__ == "__main__":
92+
unittest.main()

0 commit comments

Comments
 (0)