From 2201f4e6fc9558d3922cb32a913c1076f9eddfcf Mon Sep 17 00:00:00 2001 From: Romain Cledat Date: Fri, 30 Aug 2024 00:03:10 -0700 Subject: [PATCH] Modify getting Metaflow version Favors reading the INFO file if present to be able to have the most accurate version of Metaflow when executing remotely (especially in the presence of extensions). Also limit reading the INFO file to once per process (as opposed to possibly twice). Finally, gets the version of the source of Metaflow (and not the current directory) --- metaflow/__init__.py | 20 ++-- metaflow/client/core.py | 2 +- metaflow/extension_support/__init__.py | 28 +++-- metaflow/info_file.py | 25 +++++ metaflow/metaflow_version.py | 136 +++++++++++++---------- metaflow/package.py | 3 +- metaflow/plugins/pypi/conda_decorator.py | 2 +- 7 files changed, 133 insertions(+), 83 deletions(-) create mode 100644 metaflow/info_file.py diff --git a/metaflow/__init__.py b/metaflow/__init__.py index c676a5c7a9b..537d6223635 100644 --- a/metaflow/__init__.py +++ b/metaflow/__init__.py @@ -42,14 +42,9 @@ class and related decorators. Metaflow GitHub page. """ -import importlib +import os import sys -import types -from os import path - -CURRENT_DIRECTORY = path.dirname(path.abspath(__file__)) -INFO_FILE = path.join(path.dirname(CURRENT_DIRECTORY), "INFO") from metaflow.extension_support import ( alias_submodules, @@ -61,6 +56,8 @@ class and related decorators. _ext_debug, ) +from metaflow.metaflow_version import call_git_describe as _call_git_describe +from metaflow.metaflow_version import format_git_describe as _format_git_describe # We load the module overrides *first* explicitly. Non overrides can be loaded # in toplevel as well but these can be loaded first if needed. Note that those @@ -164,8 +161,13 @@ class and related decorators. alias_submodules(extension_module, tl_package, None, extra_indent=True) ) version_info = getattr(extension_module, "__mf_extensions__", "") - if extension_module.__version__: - version_info = "%s(%s)" % (version_info, extension_module.__version__) + ext_version = _format_git_describe( + _call_git_describe(cwd=os.path.dirname(extension_module.__file__)) + ) + if ext_version is None: + ext_version = extension_module.__version__ + if ext_version: + version_info = "%s(%s)" % (version_info, ext_version) __version_addl__.append(version_info) if __version_addl__: @@ -191,6 +193,8 @@ class and related decorators. "extension_module", "tl_package", "version_info", + "_call_git_describe", + "_format_git_describe", ]: try: del globals()[_n] diff --git a/metaflow/client/core.py b/metaflow/client/core.py index 0feb2db898c..333c4e0ff0e 100644 --- a/metaflow/client/core.py +++ b/metaflow/client/core.py @@ -34,7 +34,7 @@ from metaflow.unbounded_foreach import CONTROL_TASK_TAG from metaflow.util import cached_property, is_stringish, resolve_identity, to_unicode -from .. import INFO_FILE +from ..info_file import INFO_FILE from .filecache import FileCache try: diff --git a/metaflow/extension_support/__init__.py b/metaflow/extension_support/__init__.py index be32dc961ab..ddc7dcc1974 100644 --- a/metaflow/extension_support/__init__.py +++ b/metaflow/extension_support/__init__.py @@ -1,7 +1,6 @@ from __future__ import print_function import importlib -import json import os import re import sys @@ -12,6 +11,9 @@ from importlib.abc import MetaPathFinder, Loader from itertools import chain +from metaflow.info_file import read_info_file + + # # This file provides the support for Metaflow's extension mechanism which allows # a Metaflow developer to extend metaflow by providing a package `metaflow_extensions`. @@ -304,20 +306,16 @@ def _get_extension_packages(): # If we have an INFO file with the appropriate information (if running from a saved # code package for example), we use that directly # Pre-compute on _extension_points - from metaflow import INFO_FILE - - try: - with open(INFO_FILE, encoding="utf-8") as contents: - all_pkg, ext_to_pkg = json.load(contents).get("ext_info", (None, None)) - if all_pkg is not None and ext_to_pkg is not None: - _ext_debug("Loading pre-computed information from INFO file") - # We need to properly convert stuff in ext_to_pkg - for k, v in ext_to_pkg.items(): - v = [MFExtPackage(*d) for d in v] - ext_to_pkg[k] = v - return all_pkg, ext_to_pkg - except IOError: - pass + info_content = read_info_file() + if info_content: + all_pkg, ext_to_pkg = info_content.get("ext_info", (None, None)) + if all_pkg is not None and ext_to_pkg is not None: + _ext_debug("Loading pre-computed information from INFO file") + # We need to properly convert stuff in ext_to_pkg + for k, v in ext_to_pkg.items(): + v = [MFExtPackage(*d) for d in v] + ext_to_pkg[k] = v + return all_pkg, ext_to_pkg # Check if we even have extensions try: diff --git a/metaflow/info_file.py b/metaflow/info_file.py new file mode 100644 index 00000000000..6d56a6152ba --- /dev/null +++ b/metaflow/info_file.py @@ -0,0 +1,25 @@ +import json + +from os import path + +CURRENT_DIRECTORY = path.dirname(path.abspath(__file__)) +INFO_FILE = path.join(path.dirname(CURRENT_DIRECTORY), "INFO") + +_info_file_content = None +_info_file_present = None + + +def read_info_file(): + global _info_file_content + global _info_file_present + if _info_file_present is None: + _info_file_present = path.exists(INFO_FILE) + if _info_file_present: + try: + with open(INFO_FILE, "r", encoding="utf-8") as contents: + _info_file_content = json.load(contents) + except IOError: + pass + if _info_file_present: + return _info_file_content + return None diff --git a/metaflow/metaflow_version.py b/metaflow/metaflow_version.py index 9a36dc79ae0..249929f66ba 100644 --- a/metaflow/metaflow_version.py +++ b/metaflow/metaflow_version.py @@ -7,11 +7,10 @@ # This file is adapted from https://github.com/aebrahim/python-git-version -from subprocess import check_output, CalledProcessError -from os import path, name, devnull, environ, listdir -import json +from subprocess import check_output, CalledProcessError, DEVNULL +from os import path, name, environ, listdir -from metaflow import CURRENT_DIRECTORY, INFO_FILE +from metaflow.info_file import CURRENT_DIRECTORY, read_info_file __all__ = ("get_version",) @@ -57,87 +56,110 @@ def find_git_on_windows(): GIT_COMMAND = find_git_on_windows() -def call_git_describe(abbrev=7): +def call_git_describe(abbrev=7, cwd=None): """return the string output of git describe""" try: - - # first, make sure we are actually in a Metaflow repo, - # not some other repo - with open(devnull, "w") as fnull: + if cwd is None: + cwd = CURRENT_DIRECTORY + # first, make sure we are actually in a Metaflow repo, + # not some other repo arguments = [GIT_COMMAND, "rev-parse", "--show-toplevel"] reponame = ( - check_output(arguments, cwd=CURRENT_DIRECTORY, stderr=fnull) - .decode("ascii") - .strip() + check_output(arguments, cwd=cwd, stderr=DEVNULL).decode("ascii").strip() ) + print("For %s got reponame: %s" % (cwd, reponame)) if path.basename(reponame) != "metaflow": return None - - with open(devnull, "w") as fnull: - arguments = [GIT_COMMAND, "describe", "--tags", "--abbrev=%d" % abbrev] - return ( - check_output(arguments, cwd=CURRENT_DIRECTORY, stderr=fnull) - .decode("ascii") - .strip() - ) - - except (OSError, CalledProcessError): + # Else we assume that we are in a proper repo + + arguments = [ + GIT_COMMAND, + "describe", + "--tags", + "--dirty", + "--long", + "--abbrev=%d" % abbrev, + ] + return check_output(arguments, cwd=cwd, stderr=DEVNULL).decode("ascii").strip() + + except (OSError, CalledProcessError) as e: return None -def format_git_describe(git_str, pep440=False): +def format_git_describe(git_str, public=False): """format the result of calling 'git describe' as a python version""" if git_str is None: return None - if "-" not in git_str: # currently at a tag - return git_str + splits = git_str.split("-") + if len(splits) == 4: + # Formatted as ---dirty + tag, post, h = splits[:3] + dirty = "-dirty" else: - # formatted as version-N-githash - # want to convert to version.postN-githash - git_str = git_str.replace("-", ".post", 1) - if pep440: # does not allow git hash afterwards - return git_str.split("-")[0] - else: - return git_str.replace("-g", "+git") + # Formatted as -- + tag, post, h = splits + dirty = "" + if post == "0": + if public: + return tag + return tag + dirty + + if public: + return "%s.post%s" % (tag, post) + + return "%s.post%s-git%s%s" % (tag, post, h[1:], dirty) def read_info_version(): """Read version information from INFO file""" - try: - with open(INFO_FILE, "r") as contents: - return json.load(contents).get("metaflow_version") - except IOError: - return None + info_file = read_info_file() + if info_file: + return info_file.get("metaflow_version") + return None -def get_version(pep440=False): +def get_version(public=False): """Tracks the version number. - pep440: bool - When True, this function returns a version string suitable for - a release as defined by PEP 440. When False, the githash (if - available) will be appended to the version string. - - If the script is located within an active git repository, - git-describe is used to get the version information. + public: bool + When True, this function returns a *public* version specification which + doesn't include any local information (dirtiness or hash). See + https://packaging.python.org/en/latest/specifications/version-specifiers/#version-scheme - Otherwise, the version logged by package installer is returned. + We first check the INFO file to see if we recorded a version of Metaflow. If there + is none, we check if we are in a GIT repository and if so, form the version + from that. - If even that information isn't available (likely when executing on a - remote cloud instance), the version information is returned from INFO file - in the current directory. + Otherwise, we return the version of Metaflow that was installed. """ - version = format_git_describe(call_git_describe(), pep440=pep440) - version_addl = None - if version is None: # not a git repository - import metaflow - + # To get the version we do the following: + # - First check if we have an INFO file with it. If so, use that as it is + # the most reliable way to get the version. In particular, when running remotely, + # metaflow is installed in a directory and if any extension using distutils, + # querying the version directly would fail to produce the correct result + # - Check if we are in the GIT repository and if so, use the git describe + # - If we don't have an INFO file, we look at the version information that is + # populated by metaflow and the extensions. + version = ( + read_info_version() + ) # Version info is cached in INFO file; includes extension info + if version: + return version + + import metaflow + + version_addl = metaflow.__version_addl__ + + version = format_git_describe( + call_git_describe(cwd=path.dirname(metaflow.__file__)), public=public + ) + + if version is None: version = metaflow.__version__ - version_addl = metaflow.__version_addl__ - if version is None: # not a proper python package - return read_info_version() + if version_addl: return "+".join([version, version_addl]) + return version diff --git a/metaflow/package.py b/metaflow/package.py index a3431f03e68..30435dce47f 100644 --- a/metaflow/package.py +++ b/metaflow/package.py @@ -10,7 +10,8 @@ from .metaflow_config import DEFAULT_PACKAGE_SUFFIXES from .exception import MetaflowException from .util import to_unicode -from . import R, INFO_FILE +from . import R +from .info_file import INFO_FILE DEFAULT_SUFFIXES_LIST = DEFAULT_PACKAGE_SUFFIXES.split(",") METAFLOW_SUFFIXES_LIST = [".py", ".html", ".css", ".js"] diff --git a/metaflow/plugins/pypi/conda_decorator.py b/metaflow/plugins/pypi/conda_decorator.py index 3d7dabe06b8..b60d6e59914 100644 --- a/metaflow/plugins/pypi/conda_decorator.py +++ b/metaflow/plugins/pypi/conda_decorator.py @@ -12,7 +12,7 @@ from metaflow.metaflow_environment import InvalidEnvironmentException from metaflow.util import get_metaflow_root -from ... import INFO_FILE +from ...info_file import INFO_FILE class CondaStepDecorator(StepDecorator):