Skip to content

Commit

Permalink
Add a ProviderIndex cache.
Browse files Browse the repository at this point in the history
- Spack will check if the index needs updating, and will only parse
  all package files if it does.

- Spack tries to parse as few package files as necessary.
  • Loading branch information
tgamblin committed Aug 9, 2016
1 parent cf2f902 commit faa0a0e
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 13 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
/var/spack/stage
/var/spack/cache
/var/spack/repos/*/index.yaml
/var/spack/repos/*/lock
*.pyc
/opt
*~
Expand Down
140 changes: 127 additions & 13 deletions lib/spack/spack/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##############################################################################
import os
import stat
import shutil
import errno
import exceptions
import sys
import inspect
Expand All @@ -33,6 +36,7 @@
import yaml

import llnl.util.tty as tty
from llnl.util.lock import Lock
from llnl.util.filesystem import *

import spack.error
Expand Down Expand Up @@ -394,13 +398,25 @@ def check(condition, msg):
if not condition: raise BadRepoError(msg)

# Validate repository layout.
self.config_file = join_path(self.root, repo_config_name)
self.config_file = join_path(self.root, repo_config_name)
check(os.path.isfile(self.config_file),
"No %s found in '%s'" % (repo_config_name, root))

self.packages_path = join_path(self.root, packages_dir_name)
check(os.path.isdir(self.packages_path),
"No directory '%s' found in '%s'" % (repo_config_name, root))

self.index_file = join_path(self.root, repo_index_name)
check(not os.path.exists(self.index_file) or
(os.path.isfile(self.index_file) and os.access(self.index_file, os.R_OK|os.W_OK)),
"Cannot access repository index file in %s" % root)

# lock file for reading/writing the index
self._lock_path = join_path(self.root, 'lock')
if not os.path.exists(self._lock_path):
touch(self._lock_path)
self._lock = Lock(self._lock_path)

# Read configuration and validate namespace
config = self._read_config()
check('namespace' in config, '%s must define a namespace.'
Expand All @@ -424,7 +440,14 @@ def check(condition, msg):
self._modules = {}
self._classes = {}
self._instances = {}

# list of packages that are newer than the index.
self._needs_update = []

# Index of virtual dependencies
self._provider_index = None

# Cached list of package names.
self._all_package_names = None

# make sure the namespace for packages in this repo exists.
Expand Down Expand Up @@ -611,13 +634,56 @@ def purge(self):
self._instances.clear()


def _update_provider_index(self):
# Check modification dates of all packages
self._fast_package_check()

def read():
with open(self.index_file) as f:
self._provider_index = ProviderIndex.from_yaml(f)

# Read the old ProviderIndex, or make a new one.
index_existed = os.path.isfile(self.index_file)
if index_existed and not self._needs_update:
self._lock.acquire_read()
try:
read()
finally:
self._lock.release_read()

else:
self._lock.acquire_write()
try:
if index_existed:
with open(self.index_file) as f:
self._provider_index = ProviderIndex.from_yaml(f)
else:
self._provider_index = ProviderIndex()

for pkg_name in self._needs_update:
namespaced_name = '%s.%s' % (self.namespace, pkg_name)
self._provider_index.remove_provider(namespaced_name)
self._provider_index.update(namespaced_name)


tmp = self.index_file + '.tmp'
with open(tmp, 'w') as f:
self._provider_index.to_yaml(f)
os.rename(tmp, self.index_file)

except:
shutil.rmtree(tmp, ignore_errors=True)
raise

finally:
self._lock.release_write()


@property
def provider_index(self):
"""A provider index with names *specific* to this repo."""
if self._provider_index is None:
namespaced_names = ['%s.%s' % (self.namespace, n)
for n in self.all_package_names()]
self._provider_index = ProviderIndex(namespaced_names)
self._update_provider_index()
return self._provider_index


Expand Down Expand Up @@ -663,36 +729,84 @@ def filename_for_package_name(self, spec):
return join_path(pkg_dir, package_file_name)


def all_package_names(self):
"""Returns a sorted list of all package names in the Repo."""
def _fast_package_check(self):
"""List packages in the repo and cehck whether index is up to date.
Both of these opreations require checking all `package.py`
files so we do them at the same time. We list the repo
directory and look at package.py files, and we compare the
index modification date with the ost recently modified package
file, storing the result.
The implementation here should try to minimize filesystem
calls. At the moment, it is O(number of packages) and makes
about one stat call per package. This is resonably fast, and
avoids actually importing packages in Spack, which is slow.
"""
if self._all_package_names is None:
self._all_package_names = []

# Get index modification time.
index_mtime = 0
if os.path.exists(self.index_file):
sinfo = os.stat(self.index_file)
index_mtime = sinfo.st_mtime

for pkg_name in os.listdir(self.packages_path):
# Skip non-directories in the package root.
pkg_dir = join_path(self.packages_path, pkg_name)
if not os.path.isdir(pkg_dir):
continue

# Skip directories without a package.py in them.
pkg_file = join_path(self.packages_path, pkg_name, package_file_name)
if not os.path.isfile(pkg_file):
continue

# Warn about invalid names that look like packages.
if not valid_module_name(pkg_name):
tty.warn("Skipping package at %s. '%s' is not a valid Spack module name."
% (pkg_dir, pkg_name))
continue

# construct the file name from the directory
pkg_file = join_path(
self.packages_path, pkg_name, package_file_name)

# Use stat here to avoid lots of calls to the filesystem.
try:
sinfo = os.stat(pkg_file)
except OSError as e:
if e.errno == errno.ENOENT:
# No package.py file here.
continue
elif e.errno == errno.EACCES:
tty.warn("Can't read package file %s." % pkg_file)
continue
raise e

# if it's not a file, skip it.
if stat.S_ISDIR(sinfo.st_mode):
continue

# All checks passed. Add it to the list.
self._all_package_names.append(pkg_name)

# record the package if it is newer than the index.
if sinfo.st_mtime > index_mtime:
self._needs_update.append(pkg_name)

self._all_package_names.sort()

return self._all_package_names


def all_package_names(self):
"""Returns a sorted list of all package names in the Repo."""
self._fast_package_check()
return self._all_package_names


def all_packages(self):
"""Iterator over all packages in the repository.
Use this with care, because loading packages is slow.
"""
for name in self.all_package_names():
yield self.get(name)

Expand Down
14 changes: 14 additions & 0 deletions lib/spack/spack/virtual.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,20 @@ def merge(self, other):
spdict[provided_spec] += opdict[provided_spec]


def remove_provider(self, pkg_name):
"""Remove a provider from the ProviderIndex."""
for pkg in self.providers:
pkg_dict = self.providers[pkg]
for provided, pset in pkg_dict.items():
for provider in pset:
if provider.fullname == pkg_name:
pset.remove(provider)
if not pset:
del pkg_dict[provided]
if not pkg_dict:
del self.providers[pkg]


def copy(self):
"""Deep copy of this ProviderIndex."""
clone = ProviderIndex()
Expand Down

0 comments on commit faa0a0e

Please sign in to comment.