From b5d6614f3b09ba4eda6095727be1bdf2dd05fd14 Mon Sep 17 00:00:00 2001 From: Alexander Mazuruk Date: Thu, 10 Jun 2021 11:13:47 +0200 Subject: [PATCH] Add providing location for fetch_via_{vcs,git} This allows to call fetch_via_{vcs,git} multiple times for a location to have another revision. Example: fetch_via_git("git+https://github.com/nexB/fetchcode.git", location="/tmp/repo") will checkout tip of default branch fetch_via_git("git+https://github.com/nexB/fetchcode.git@ccb7b6199681910ccf047f1a18aa89ece45d665c", location="/home/amazuruk/dupa/master") will reset to ccb7b6199681910ccf047f1a18aa89ece45d665c Additionally remove some trailing whitespaces and fix indentation of a docstring. Signed-off-by: Alexander Mazuruk --- src/fetchcode/__init__.py | 4 ++-- src/fetchcode/vcs/__init__.py | 19 ++++++++++--------- src/fetchcode/vcs/git.py | 19 +++++++++++++------ 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/fetchcode/__init__.py b/src/fetchcode/__init__.py index c573991f..b44a04aa 100644 --- a/src/fetchcode/__init__.py +++ b/src/fetchcode/__init__.py @@ -41,7 +41,7 @@ def __init__(self, location, content_type, size, url): def fetch_http(url, location): """ Return a `Response` object built from fetching the content at a HTTP/HTTPS based `url` URL string - saving the content in a file at `location` + saving the content in a file at `location` """ r = requests.get(url) with open(location, 'wb') as f: @@ -59,7 +59,7 @@ def fetch_http(url, location): def fetch_ftp(url, location): """ Return a `Response` object built from fetching the content at a FTP based `url` URL string - saving the content in a file at `location` + saving the content in a file at `location` """ url_parts = urlparse(url) diff --git a/src/fetchcode/vcs/__init__.py b/src/fetchcode/vcs/__init__.py index c839aa3b..b8152134 100644 --- a/src/fetchcode/vcs/__init__.py +++ b/src/fetchcode/vcs/__init__.py @@ -29,9 +29,9 @@ class VCSResponse: """ Represent the response from fetching a VCS URL with: -- `dest_dir`: destination of directory -- `vcs_type`: VCS Type of URL (git,bzr,hg,svn) -- `domain` : Source of git VCS (GitHub, Gitlab, Bitbucket) + - `dest_dir`: destination of directory + - `vcs_type`: VCS Type of URL (git,bzr,hg,svn) + - `domain` : Source of git VCS (GitHub, Gitlab, Bitbucket) """ def __init__(self, dest_dir, vcs_type, domain): @@ -40,16 +40,17 @@ def __init__(self, dest_dir, vcs_type, domain): self.domain = domain -def fetch_via_vcs(url): +def fetch_via_vcs(url, location=None): """ Take `url` as input and store the content of it at location specified at `location` string - Return a VCSResponse object + Return a VCSResponse object """ parsed_url = urlparse(url) scheme = parsed_url.scheme domain = parsed_url.netloc - temp = tempfile.mkdtemp() - os.rmdir(temp) + if location is None: + location = tempfile.mkdtemp() + os.rmdir(location) if scheme not in vcs.all_schemes: raise Exception("Not a supported/known scheme.") @@ -58,6 +59,6 @@ def fetch_via_vcs(url): vcs_type = vcs_name backend = vcs.get_backend_for_scheme(scheme) - backend.obtain(dest=temp, url=misc.hide_url(url)) + backend.obtain(dest=location, url=misc.hide_url(url)) - return VCSResponse(dest_dir=temp, vcs_type=vcs_type, domain=domain) + return VCSResponse(dest_dir=location, vcs_type=vcs_type, domain=domain) \ No newline at end of file diff --git a/src/fetchcode/vcs/git.py b/src/fetchcode/vcs/git.py index 9d9c6cca..72bc9708 100644 --- a/src/fetchcode/vcs/git.py +++ b/src/fetchcode/vcs/git.py @@ -19,21 +19,28 @@ from urllib.parse import urlparse from fetchcode.vcs.pip._internal.vcs.git import Git +from fetchcode.vcs.pip._internal.vcs.versioncontrol import RevOptions from fetchcode.vcs.pip._internal.utils import misc from fetchcode.vcs.pip._internal.vcs import vcs from fetchcode.vcs import VCSResponse -def fetch_via_git(url): +def fetch_via_git(url, location=None): + """ + Take `url` as input and store the content of it at location specified at `location` string + If location string is not set, a tempfile.mkdtemp() will be created to store content in. + tempfile.mkdtemp must be cleaned by user manually. + Return a VCSResponse object + """ parsed_url = urlparse(url) scheme = parsed_url.scheme domain = parsed_url.netloc - temp = tempfile.mkdtemp() - os.rmdir(temp) + if location is None: + location = tempfile.mkdtemp() + os.rmdir(location) if scheme not in Git.schemes: raise Exception("Not a Git based scheme.") backend = vcs.get_backend(name="git") - backend.obtain(dest=temp, url=misc.hide_url(url)) - - return VCSResponse(dest_dir=temp, vcs_type="git", domain=domain) + backend.obtain(dest=location, url=misc.hide_url(url)) + return VCSResponse(dest_dir=location, vcs_type="git", domain=domain) \ No newline at end of file