Skip to content

Commit

Permalink
spec.splice: properly handle cached hash invalidations (spack#24795)
Browse files Browse the repository at this point in the history
* spec.splice: properly handle cached hash invalidations

* make package_hash a cached hash on the spec
  • Loading branch information
becker33 authored Jul 10, 2021
1 parent 5569755 commit ebf2076
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 18 deletions.
13 changes: 11 additions & 2 deletions lib/spack/spack/hash_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@ class SpecHashDescriptor(object):
We currently use different hashes for different use cases.
"""

hash_types = ('_dag_hash', '_build_hash', '_full_hash')
hash_types = ('_dag_hash', '_build_hash', '_full_hash', '_package_hash')

def __init__(self, deptype=('link', 'run'), package_hash=False, attr=None):
def __init__(self, deptype=('link', 'run'), package_hash=False, attr=None,
override=None):
self.deptype = dp.canonical_deptype(deptype)
self.package_hash = package_hash
self.attr = attr
# Allow spec hashes to have an alternate computation method
self.override = override


#: Default Hash descriptor, used by Spec.dag_hash() and stored in the DB.
Expand All @@ -40,3 +43,9 @@ def __init__(self, deptype=('link', 'run'), package_hash=False, attr=None):
#: Full hash used in build pipelines to determine when to rebuild packages.
full_hash = SpecHashDescriptor(
deptype=('build', 'link', 'run'), package_hash=True, attr='_full_hash')


#: Package hash used as part of full hash
package_hash = SpecHashDescriptor(
deptype=(), package_hash=True, attr='_package_hash',
override=lambda s: s.package.content_hash())
66 changes: 50 additions & 16 deletions lib/spack/spack/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -1030,9 +1030,8 @@ class Spec(object):
#: Cache for spec's prefix, computed lazily in the corresponding property
_prefix = None

def __init__(self, spec_like=None,
normal=False, concrete=False, external_path=None,
external_modules=None, full_hash=None):
def __init__(self, spec_like=None, normal=False,
concrete=False, external_path=None, external_modules=None):
"""Create a new Spec.
Arguments:
Expand All @@ -1046,8 +1045,6 @@ def __init__(self, spec_like=None,
self._concrete = concrete
self.external_path = external_path
self.external_module = external_module
self._full_hash = full_hash
"""

# Copy if spec_like is a Spec.
Expand All @@ -1068,6 +1065,8 @@ def __init__(self, spec_like=None,

self._hash = None
self._build_hash = None
self._full_hash = None
self._package_hash = None
self._dunder_hash = None
self._package = None

Expand All @@ -1082,7 +1081,6 @@ def __init__(self, spec_like=None,
self._concrete = concrete
self.external_path = external_path
self.external_modules = Spec._format_module_list(external_modules)
self._full_hash = full_hash

# Older spack versions did not compute full_hash or build_hash,
# and we may not have the necessary information to recompute them
Expand Down Expand Up @@ -1501,6 +1499,8 @@ def _spec_hash(self, hash):
"""
# TODO: curently we strip build dependencies by default. Rethink
# this when we move to using package hashing on all specs.
if hash.override is not None:
return hash.override(self)
node_dict = self.to_node_dict(hash=hash)
yaml_text = syaml.dump(node_dict, default_flow_style=True)
return spack.util.hash.b32_hash(yaml_text)
Expand Down Expand Up @@ -1528,6 +1528,10 @@ def _cached_hash(self, hash, length=None):

return hash_string[:length]

def package_hash(self):
"""Compute the hash of the contents of the package for this node"""
return self._cached_hash(ht.package_hash)

def dag_hash(self, length=None):
"""This is Spack's default hash, used to identify installations.
Expand Down Expand Up @@ -1653,7 +1657,7 @@ def to_node_dict(self, hash=ht.dag_hash):
d['patches'] = variant._patches_in_order_of_appearance

if hash.package_hash:
package_hash = self.package.content_hash()
package_hash = self.package_hash()

# Full hashes are in bytes
if (not isinstance(package_hash, six.text_type)
Expand Down Expand Up @@ -1822,6 +1826,7 @@ def from_node_dict(node):
spec._hash = node.get('hash', None)
spec._build_hash = node.get('build_hash', None)
spec._full_hash = node.get('full_hash', None)
spec._package_hash = node.get('package_hash', None)

if 'version' in node or 'versions' in node:
spec.versions = vn.VersionList.from_dict(node)
Expand Down Expand Up @@ -3441,12 +3446,14 @@ def _dup(self, other, deps=True, cleardeps=True, caches=None):
self._dunder_hash = other._dunder_hash
self._normal = other._normal
self._full_hash = other._full_hash
self._package_hash = other._package_hash
else:
self._hash = None
self._build_hash = None
self._dunder_hash = None
self._normal = False
self._full_hash = None
self._package_hash = None

return changed

Expand Down Expand Up @@ -4281,19 +4288,22 @@ def splice(self, other, transitive):
# _dependents of these specs should not be trusted.
# Variants may also be ignored here for now...

# Keep all cached hashes because we will invalidate the ones that need
# invalidating later, and we don't want to invalidate unnecessarily

if transitive:
self_nodes = dict((s.name, s.copy(deps=False))
self_nodes = dict((s.name, s.copy(deps=False, caches=True))
for s in self.traverse(root=True)
if s.name not in other)
other_nodes = dict((s.name, s.copy(deps=False))
other_nodes = dict((s.name, s.copy(deps=False, caches=True))
for s in other.traverse(root=True))
else:
# If we're not doing a transitive splice, then we only want the
# root of other.
self_nodes = dict((s.name, s.copy(deps=False))
self_nodes = dict((s.name, s.copy(deps=False, caches=True))
for s in self.traverse(root=True)
if s.name != other.name)
other_nodes = {other.name: other.copy(deps=False)}
other_nodes = {other.name: other.copy(deps=False, caches=True)}

nodes = other_nodes.copy()
nodes.update(self_nodes)
Expand All @@ -4314,17 +4324,41 @@ def splice(self, other, transitive):
if any(dep not in other_nodes for dep in dependencies):
nodes[name].build_spec = other[name].build_spec

# Clear cached hashes
nodes[self.name].clear_cached_hashes()
ret = nodes[self.name]

# Clear cached hashes for all affected nodes
# Do not touch unaffected nodes
for dep in ret.traverse(root=True, order='post'):
opposite = other_nodes if dep.name in self_nodes else self_nodes
if any(name in dep for name in opposite.keys()):
# Record whether hashes are already cached
# So we don't try to compute a hash from insufficient
# provenance later
has_build_hash = getattr(dep, ht.build_hash.attr, None)
has_full_hash = getattr(dep, ht.full_hash.attr, None)

# package hash cannot be affected by splice
dep.clear_cached_hashes(ignore=['_package_hash'])

# Since this is a concrete spec, we want to make sure hashes
# are cached writing specs only writes cached hashes in case
# the spec is too old to have full provenance for these hashes,
# so we can't rely on doing it at write time.
if has_build_hash:
_ = dep.build_hash()
if has_full_hash:
_ = dep.full_hash()

return nodes[self.name]

def clear_cached_hashes(self):
def clear_cached_hashes(self, ignore=()):
"""
Clears all cached hashes in a Spec, while preserving other properties.
"""
for attr in ht.SpecHashDescriptor.hash_types:
if hasattr(self, attr):
setattr(self, attr, None)
if attr not in ignore:
if hasattr(self, attr):
setattr(self, attr, None)

def __hash__(self):
# If the spec is concrete, we leverage the DAG hash and just use
Expand Down
30 changes: 30 additions & 0 deletions lib/spack/spack/test/spec_semantics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,36 @@ def test_splice(self, transitive):
# Finally, the spec should know it's been spliced:
assert out.spliced

@pytest.mark.parametrize('transitive', [True, False])
def test_splice_with_cached_hashes(self, transitive):
spec = Spec('splice-t')
dep = Spec('splice-h+foo')
spec.concretize()
dep.concretize()

# monkeypatch hashes so we can test that they are cached
spec._full_hash = 'aaaaaa'
spec._build_hash = 'aaaaaa'
dep._full_hash = 'bbbbbb'
dep._build_hash = 'bbbbbb'
spec['splice-h']._full_hash = 'cccccc'
spec['splice-h']._build_hash = 'cccccc'
spec['splice-z']._full_hash = 'dddddd'
spec['splice-z']._build_hash = 'dddddd'
dep['splice-z']._full_hash = 'eeeeee'
dep['splice-z']._build_hash = 'eeeeee'

out = spec.splice(dep, transitive=transitive)
out_z_expected = (dep if transitive else spec)['splice-z']

assert out.full_hash() != spec.full_hash()
assert (out['splice-h'].full_hash() == dep.full_hash()) == transitive
assert out['splice-z'].full_hash() == out_z_expected.full_hash()

assert out.build_hash() != spec.build_hash()
assert (out['splice-h'].build_hash() == dep.build_hash()) == transitive
assert out['splice-z'].build_hash() == out_z_expected.build_hash()

@pytest.mark.parametrize('transitive', [True, False])
def test_splice_input_unchanged(self, transitive):
spec = Spec('splice-t').concretized()
Expand Down

0 comments on commit ebf2076

Please sign in to comment.