Skip to content

Commit

Permalink
Modify getting Metaflow version
Browse files Browse the repository at this point in the history
Favors reading the INFO file if present to be able to have the
most accurate version of Metaflow when executing remotely (especially in
the presence of extensions).

Also limit reading the INFO file to once per process (as opposed to possibly
twice).

Finally, gets the version of the source of Metaflow (and not the current
directory)
  • Loading branch information
romain-intel committed Sep 4, 2024
1 parent 05521ff commit 2201f4e
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 83 deletions.
20 changes: 12 additions & 8 deletions metaflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,9 @@ class and related decorators.
Metaflow GitHub page.
"""

import importlib
import os
import sys
import types

from os import path

CURRENT_DIRECTORY = path.dirname(path.abspath(__file__))
INFO_FILE = path.join(path.dirname(CURRENT_DIRECTORY), "INFO")

from metaflow.extension_support import (
alias_submodules,
Expand All @@ -61,6 +56,8 @@ class and related decorators.
_ext_debug,
)

from metaflow.metaflow_version import call_git_describe as _call_git_describe
from metaflow.metaflow_version import format_git_describe as _format_git_describe

# We load the module overrides *first* explicitly. Non overrides can be loaded
# in toplevel as well but these can be loaded first if needed. Note that those
Expand Down Expand Up @@ -164,8 +161,13 @@ class and related decorators.
alias_submodules(extension_module, tl_package, None, extra_indent=True)
)
version_info = getattr(extension_module, "__mf_extensions__", "<unk>")
if extension_module.__version__:
version_info = "%s(%s)" % (version_info, extension_module.__version__)
ext_version = _format_git_describe(
_call_git_describe(cwd=os.path.dirname(extension_module.__file__))
)
if ext_version is None:
ext_version = extension_module.__version__
if ext_version:
version_info = "%s(%s)" % (version_info, ext_version)
__version_addl__.append(version_info)

if __version_addl__:
Expand All @@ -191,6 +193,8 @@ class and related decorators.
"extension_module",
"tl_package",
"version_info",
"_call_git_describe",
"_format_git_describe",
]:
try:
del globals()[_n]
Expand Down
2 changes: 1 addition & 1 deletion metaflow/client/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from metaflow.unbounded_foreach import CONTROL_TASK_TAG
from metaflow.util import cached_property, is_stringish, resolve_identity, to_unicode

from .. import INFO_FILE
from ..info_file import INFO_FILE
from .filecache import FileCache

try:
Expand Down
28 changes: 13 additions & 15 deletions metaflow/extension_support/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import print_function

import importlib
import json
import os
import re
import sys
Expand All @@ -12,6 +11,9 @@
from importlib.abc import MetaPathFinder, Loader
from itertools import chain

from metaflow.info_file import read_info_file


#
# This file provides the support for Metaflow's extension mechanism which allows
# a Metaflow developer to extend metaflow by providing a package `metaflow_extensions`.
Expand Down Expand Up @@ -304,20 +306,16 @@ def _get_extension_packages():
# If we have an INFO file with the appropriate information (if running from a saved
# code package for example), we use that directly
# Pre-compute on _extension_points
from metaflow import INFO_FILE

try:
with open(INFO_FILE, encoding="utf-8") as contents:
all_pkg, ext_to_pkg = json.load(contents).get("ext_info", (None, None))
if all_pkg is not None and ext_to_pkg is not None:
_ext_debug("Loading pre-computed information from INFO file")
# We need to properly convert stuff in ext_to_pkg
for k, v in ext_to_pkg.items():
v = [MFExtPackage(*d) for d in v]
ext_to_pkg[k] = v
return all_pkg, ext_to_pkg
except IOError:
pass
info_content = read_info_file()
if info_content:
all_pkg, ext_to_pkg = info_content.get("ext_info", (None, None))
if all_pkg is not None and ext_to_pkg is not None:
_ext_debug("Loading pre-computed information from INFO file")
# We need to properly convert stuff in ext_to_pkg
for k, v in ext_to_pkg.items():
v = [MFExtPackage(*d) for d in v]
ext_to_pkg[k] = v
return all_pkg, ext_to_pkg

# Check if we even have extensions
try:
Expand Down
25 changes: 25 additions & 0 deletions metaflow/info_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import json

from os import path

CURRENT_DIRECTORY = path.dirname(path.abspath(__file__))
INFO_FILE = path.join(path.dirname(CURRENT_DIRECTORY), "INFO")

_info_file_content = None
_info_file_present = None


def read_info_file():
global _info_file_content
global _info_file_present
if _info_file_present is None:
_info_file_present = path.exists(INFO_FILE)
if _info_file_present:
try:
with open(INFO_FILE, "r", encoding="utf-8") as contents:
_info_file_content = json.load(contents)
except IOError:
pass
if _info_file_present:
return _info_file_content
return None
136 changes: 79 additions & 57 deletions metaflow/metaflow_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@

# This file is adapted from https://github.com/aebrahim/python-git-version

from subprocess import check_output, CalledProcessError
from os import path, name, devnull, environ, listdir
import json
from subprocess import check_output, CalledProcessError, DEVNULL
from os import path, name, environ, listdir

from metaflow import CURRENT_DIRECTORY, INFO_FILE
from metaflow.info_file import CURRENT_DIRECTORY, read_info_file

__all__ = ("get_version",)

Expand Down Expand Up @@ -57,87 +56,110 @@ def find_git_on_windows():
GIT_COMMAND = find_git_on_windows()


def call_git_describe(abbrev=7):
def call_git_describe(abbrev=7, cwd=None):
"""return the string output of git describe"""
try:

# first, make sure we are actually in a Metaflow repo,
# not some other repo
with open(devnull, "w") as fnull:
if cwd is None:
cwd = CURRENT_DIRECTORY
# first, make sure we are actually in a Metaflow repo,
# not some other repo
arguments = [GIT_COMMAND, "rev-parse", "--show-toplevel"]
reponame = (
check_output(arguments, cwd=CURRENT_DIRECTORY, stderr=fnull)
.decode("ascii")
.strip()
check_output(arguments, cwd=cwd, stderr=DEVNULL).decode("ascii").strip()
)
print("For %s got reponame: %s" % (cwd, reponame))
if path.basename(reponame) != "metaflow":
return None

with open(devnull, "w") as fnull:
arguments = [GIT_COMMAND, "describe", "--tags", "--abbrev=%d" % abbrev]
return (
check_output(arguments, cwd=CURRENT_DIRECTORY, stderr=fnull)
.decode("ascii")
.strip()
)

except (OSError, CalledProcessError):
# Else we assume that we are in a proper repo

arguments = [
GIT_COMMAND,
"describe",
"--tags",
"--dirty",
"--long",
"--abbrev=%d" % abbrev,
]
return check_output(arguments, cwd=cwd, stderr=DEVNULL).decode("ascii").strip()

except (OSError, CalledProcessError) as e:
return None


def format_git_describe(git_str, pep440=False):
def format_git_describe(git_str, public=False):
"""format the result of calling 'git describe' as a python version"""
if git_str is None:
return None
if "-" not in git_str: # currently at a tag
return git_str
splits = git_str.split("-")
if len(splits) == 4:
# Formatted as <tag>-<post>-<hash>-dirty
tag, post, h = splits[:3]
dirty = "-dirty"
else:
# formatted as version-N-githash
# want to convert to version.postN-githash
git_str = git_str.replace("-", ".post", 1)
if pep440: # does not allow git hash afterwards
return git_str.split("-")[0]
else:
return git_str.replace("-g", "+git")
# Formatted as <tag>-<post>-<hash>
tag, post, h = splits
dirty = ""
if post == "0":
if public:
return tag
return tag + dirty

if public:
return "%s.post%s" % (tag, post)

return "%s.post%s-git%s%s" % (tag, post, h[1:], dirty)


def read_info_version():
"""Read version information from INFO file"""
try:
with open(INFO_FILE, "r") as contents:
return json.load(contents).get("metaflow_version")
except IOError:
return None
info_file = read_info_file()
if info_file:
return info_file.get("metaflow_version")
return None


def get_version(pep440=False):
def get_version(public=False):
"""Tracks the version number.
pep440: bool
When True, this function returns a version string suitable for
a release as defined by PEP 440. When False, the githash (if
available) will be appended to the version string.
If the script is located within an active git repository,
git-describe is used to get the version information.
public: bool
When True, this function returns a *public* version specification which
doesn't include any local information (dirtiness or hash). See
https://packaging.python.org/en/latest/specifications/version-specifiers/#version-scheme
Otherwise, the version logged by package installer is returned.
We first check the INFO file to see if we recorded a version of Metaflow. If there
is none, we check if we are in a GIT repository and if so, form the version
from that.
If even that information isn't available (likely when executing on a
remote cloud instance), the version information is returned from INFO file
in the current directory.
Otherwise, we return the version of Metaflow that was installed.
"""

version = format_git_describe(call_git_describe(), pep440=pep440)
version_addl = None
if version is None: # not a git repository
import metaflow

# To get the version we do the following:
# - First check if we have an INFO file with it. If so, use that as it is
# the most reliable way to get the version. In particular, when running remotely,
# metaflow is installed in a directory and if any extension using distutils,
# querying the version directly would fail to produce the correct result
# - Check if we are in the GIT repository and if so, use the git describe
# - If we don't have an INFO file, we look at the version information that is
# populated by metaflow and the extensions.
version = (
read_info_version()
) # Version info is cached in INFO file; includes extension info
if version:
return version

import metaflow

version_addl = metaflow.__version_addl__

version = format_git_describe(
call_git_describe(cwd=path.dirname(metaflow.__file__)), public=public
)

if version is None:
version = metaflow.__version__
version_addl = metaflow.__version_addl__
if version is None: # not a proper python package
return read_info_version()

if version_addl:
return "+".join([version, version_addl])

return version
3 changes: 2 additions & 1 deletion metaflow/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from .metaflow_config import DEFAULT_PACKAGE_SUFFIXES
from .exception import MetaflowException
from .util import to_unicode
from . import R, INFO_FILE
from . import R
from .info_file import INFO_FILE

DEFAULT_SUFFIXES_LIST = DEFAULT_PACKAGE_SUFFIXES.split(",")
METAFLOW_SUFFIXES_LIST = [".py", ".html", ".css", ".js"]
Expand Down
2 changes: 1 addition & 1 deletion metaflow/plugins/pypi/conda_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from metaflow.metaflow_environment import InvalidEnvironmentException
from metaflow.util import get_metaflow_root

from ... import INFO_FILE
from ...info_file import INFO_FILE


class CondaStepDecorator(StepDecorator):
Expand Down

0 comments on commit 2201f4e

Please sign in to comment.