Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace GitPython with pygit2 #2120

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 30 additions & 12 deletions conda_smithy/feedstock_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,28 @@
def get_repo(path, search_parent_directories=True):
mgorny marked this conversation as resolved.
Show resolved Hide resolved
repo = None
try:
import git
import pygit2

repo = git.Repo(
path, search_parent_directories=search_parent_directories
)
if search_parent_directories:
path = pygit2.discover_repository(path)
if path is not None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if path is none? Should we raise?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original behavior was to return repo, i.e. None then, and I've preserved that. In other words, if anything fails (pygit2 is not installed, there is no repo, opening repo fails for some reason), the function returns None, as it used to.

try:
no_search = pygit2.enums.RepositoryOpenFlag.NO_SEARCH
except AttributeError: # pygit2 < 1.14
no_search = pygit2.GIT_REPOSITORY_OPEN_NO_SEARCH

repo = pygit2.Repository(path, no_search)
except ImportError:
pass
except git.InvalidGitRepositoryError:
except pygit2.GitError:
pass

return repo


def get_repo_root(path):
try:
return get_repo(path).working_tree_dir
return get_repo(path).workdir.rstrip(os.path.sep)
except AttributeError:
return None

Expand All @@ -32,8 +38,13 @@ def set_exe_file(filename, set_exe=True):

repo = get_repo(filename)
if repo:
mode = "+x" if set_exe else "-x"
repo.git.execute(["git", "update-index", f"--chmod={mode}", filename])
index_entry = repo.index[os.path.relpath(filename, repo.workdir)]
if set_exe:
index_entry.mode |= all_execute_permissions
else:
index_entry.mode &= ~all_execute_permissions
repo.index.add(index_entry)
repo.index.write()

mode = os.stat(filename).st_mode
if set_exe:
Expand All @@ -54,7 +65,8 @@ def write_file(filename):

repo = get_repo(filename)
if repo:
repo.index.add([filename])
repo.index.add(os.path.relpath(filename, repo.workdir))
repo.index.write()


def touch_file(filename):
Expand All @@ -68,7 +80,8 @@ def remove_file_or_dir(filename):

repo = get_repo(filename)
if repo:
repo.index.remove([filename], r=True)
repo.index.remove_all(["filename/**"])
repo.index.write()
shutil.rmtree(filename)


Expand All @@ -77,7 +90,11 @@ def remove_file(filename):

repo = get_repo(filename)
if repo:
repo.index.remove([filename])
try:
repo.index.remove(os.path.relpath(filename, repo.workdir))
repo.index.write()
except OSError: # this is specifically "file not in index"
pass

os.remove(filename)

Expand Down Expand Up @@ -106,4 +123,5 @@ def copy_file(src, dst):

repo = get_repo(dst)
if repo:
repo.index.add([dst])
repo.index.add(os.path.relpath(dst, repo.workdir))
repo.index.write()
48 changes: 34 additions & 14 deletions conda_smithy/feedstock_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@
import json
import os
import secrets
import subprocess
import tempfile
import time
from contextlib import contextmanager, redirect_stderr, redirect_stdout

import git
import pygit2
import requests
import scrypt

Expand Down Expand Up @@ -175,7 +176,6 @@ def feedstock_token_exists(user, project, token_repo, provider=None):
.replace("$GH_TOKEN", github_token)
.replace("${GH_TOKEN}", github_token)
)
git.Repo.clone_from(_token_repo, tmpdir, depth=1)
token_file = os.path.join(
tmpdir,
"tokens",
Expand Down Expand Up @@ -250,7 +250,10 @@ def is_valid_feedstock_token(
.replace("$GH_TOKEN", github_token)
.replace("${GH_TOKEN}", github_token)
)
git.Repo.clone_from(_token_repo, tmpdir, depth=1)
subprocess.run(
["git", "clone", "-q", "--depth", "1", _token_repo, tmpdir],
check=True,
)
token_file = os.path.join(
tmpdir,
"tokens",
Expand Down Expand Up @@ -351,7 +354,12 @@ def register_feedstock_token(
.replace("$GH_TOKEN", github_token)
.replace("${GH_TOKEN}", github_token)
)
repo = git.Repo.clone_from(_token_repo, tmpdir, depth=1)
# cloning via subprocesses to take advantage of git SSH support
subprocess.run(
["git", "clone", "-q", "--depth", "1", _token_repo, tmpdir],
check=True,
)
repo = pygit2.Repository(tmpdir)
mgorny marked this conversation as resolved.
Show resolved Hide resolved
token_file = os.path.join(
tmpdir,
"tokens",
Expand Down Expand Up @@ -413,17 +421,29 @@ def register_feedstock_token(
json.dump(token_data, fp)

# push
repo.index.add(token_file)
repo.index.commit(
"[ci skip] [skip ci] [cf admin skip] ***NO_CI*** "
"added token for {}/{} on provider{}".format(
user,
project,
"" if provider is None else " " + provider,
)
repo.index.add(os.path.relpath(token_file, tmpdir))
repo.index.write()
subprocess.run(
[
"git",
"commit",
"-q",
"-m",
"[ci skip] [skip ci] [cf admin skip] ***NO_CI*** "
"added token for {}/{} on provider{}".format(
user,
project,
"" if provider is None else " " + provider,
mgorny marked this conversation as resolved.
Show resolved Hide resolved
),
],
check=True,
cwd=tmpdir,
)

subprocess.run(
["git", "pull", "-q", "--rebase"], check=True, cwd=tmpdir
)
repo.remote().pull(rebase=True)
repo.remote().push()
subprocess.run(["git", "push", "-q"], check=True, cwd=tmpdir)
except Exception as e:
if "DEBUG_FEEDSTOCK_TOKENS" in os.environ:
raise e
Expand Down
54 changes: 27 additions & 27 deletions conda_smithy/feedstocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
import multiprocessing
import os

import git
from git import GitCommandError, Repo
import pygit2
from github import Github

from conda_smithy import github as smithy_github
Expand Down Expand Up @@ -50,11 +49,11 @@ def cloned_feedstocks(feedstocks_directory):

def fetch_feedstock(repo_dir):
"""Git fetch --all a single git repository."""
repo = Repo(repo_dir)
repo = pygit2.Repository(repo_dir)
for remote in repo.remotes:
try:
remote.fetch()
except GitCommandError:
except pygit2.GitError:
print(f"Failed to fetch {remote.name} from {remote.url}.")


Expand Down Expand Up @@ -87,12 +86,14 @@ def clone_feedstock(feedstock_gh_repo, feedstocks_dir):
clone_directory = os.path.join(feedstocks_dir, repo.name)
if not os.path.exists(clone_directory):
print(f"Cloning {repo.name}")
clone = Repo.clone_from(repo.clone_url, clone_directory)
clone.delete_remote("origin")
clone = Repo(clone_directory)
if "upstream" in [remote.name for remote in clone.remotes]:
clone.delete_remote("upstream")
clone.create_remote("upstream", url=repo.clone_url)
clone = pygit2.clone_repository(repo.clone_url, clone_directory)
clone.remotes.delete("origin")
Comment on lines +89 to +90
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So wait, now we can clone? I thought we were using git subprocesses for this.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, libgit2 can clone directly. However, as noted in the PR discussion this (as implemented here) works for public HTTPS repos only. Using libgit2 to clone via SSH would require fixed libgit2 on conda and explicit credential handling which would be a lot of code and not 100% equivalent to external git anyway.

else:
clone = pygit2.Repository(clone_directory)

if "upstream" in clone.remotes.names():
clone.remotes.delete("upstream")
clone.remotes.create("upstream", repo.clone_url)


def clone_all(gh_org, feedstocks_dir):
Expand Down Expand Up @@ -193,8 +194,8 @@ def feedstocks_repos(
random.shuffle(feedstocks)

for feedstock in feedstocks:
repo = git.Repo(feedstock.directory)
upstream = repo.remotes.upstream
repo = pygit2.Repository(feedstock.directory)
upstream = repo.remotes["upstream"]

if pull_up_to_date:
print("Fetching ", feedstock.package)
Expand Down Expand Up @@ -231,23 +232,23 @@ def feedstocks_yaml(
for repo, feedstock in feedstocks_repos(
organization, feedstocks_directory, **feedstocks_repo_kwargs
):
upstream = repo.remotes.upstream
try:
refs = upstream.refs
except AssertionError:
# In early versions of gitpython and empty list of refs resulted in an
# assertion error (https://github.com/gitpython-developers/GitPython/pull/499).
refs = []
upstream = repo.remotes["upstream"]
refs = [
ref
for ref in repo.references.iterator()
if ref.name.startswith("refs/remotes/upstream/")
]

if not refs:
upstream.fetch()
refs = upstream.refs
refs = [
ref
for ref in repo.references.iterator()
if ref.name.startswith("refs/remotes/upstream/")
]

for ref in refs:
remote_branch = (
ref.remote_head
) # .replace('{}/'.format(gh_me.login), '')
if remote_branch.endswith("HEAD"):
if ref.name == "refs/remotes/upstream/HEAD":
continue

try:
Expand All @@ -259,9 +260,8 @@ def feedstocks_yaml(
) as fh:
content = "".join(fh.readlines())
else:
blob = ref.commit.tree["recipe"]["meta.yaml"]
stream = blob.data_stream
content = stream.read().decode("utf-8")
blob = repo[ref.resolve().target].tree["recipe/meta.yaml"]
content = blob.data.decode("utf-8")
yaml = yaml_meta(content)
except:
# Add a helpful comment so we know what we are working with and reraise.
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- jinja2
- requests
- pycryptodome
- gitpython
- pygit2
- pygithub >=2,<3
- ruamel.yaml
- conda-forge-pinning
Expand Down
Loading
Loading