Skip to content

Commit 85d15d2

Browse files
committed
Switch data_source dependency to GitPython
While neither pygit2 or GitPython have a formidable API and documentation GitPython has the advantage of being pure python and support same proxy feature as git. Signed-off-by: Pierre Tardy <pierre.tardy@renault.com>
1 parent dfb1ab5 commit 85d15d2

File tree

4 files changed

+103
-195
lines changed

4 files changed

+103
-195
lines changed

etc/nix/flake.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
name = "vulnerablecode-${version}";
7575
src = vulnerablecode-src;
7676
dontConfigure = true; # do not use ./configure
77-
propagatedBuildInputs = [ pythonEnv postgresql ];
77+
propagatedBuildInputs = [ pythonEnv postgresql gitMinimal];
7878

7979
postPatch = ''
8080
# Make sure the pycodestyle binary in $PATH is used.

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ psycopg2==2.8.4
3333
ptyprocess==0.6.0
3434
py==1.8.0
3535
pycparser==2.20
36-
pygit2==1.5.0
36+
gitpython==3.1.14
3737
Pygments==2.7.4
3838
pyparsing==2.4.5
3939
pytest==5.3.2

vulnerabilities/data_source.py

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from typing import Tuple
4040
import xml.etree.ElementTree as ET
4141

42-
import pygit2
42+
from git import Repo, DiffIndex
4343
from packageurl import PackageURL
4444

4545
from vulnerabilities.oval_parser import OvalParser
@@ -319,35 +319,41 @@ def _collect_file_changes(
319319
file_ext: Optional[str],
320320
) -> Tuple[Set[str], Set[str]]:
321321

322-
previous_commit = None
323322
added_files, updated_files = set(), set()
324323

325-
for commit in self._repo.walk(self._repo.head.target, pygit2.GIT_SORT_TIME):
326-
commit_time = commit.commit_time + commit.commit_time_offset # convert to UTC
327-
328-
if commit_time < self.cutoff_timestamp:
324+
# find the most ancient commit we need to diff with
325+
cutoff_commit = None
326+
for commit in self._repo.iter_commits(self._repo.head):
327+
if commit.committed_date < self.cutoff_timestamp:
329328
break
329+
cutoff_commit = commit
330330

331-
if previous_commit is None:
332-
previous_commit = commit
333-
continue
331+
if cutoff_commit is None:
332+
return added_files, updated_files
334333

335-
for d in commit.tree.diff_to_tree(previous_commit.tree).deltas:
336-
if not _include_file(d.new_file.path, subdir, recursive, file_ext) or d.is_binary:
337-
continue
334+
def _is_binary(d: DiffIndex):
335+
if not d.b_blob:
336+
return False
337+
try:
338+
d.b_blob.data_stream.read().decode()
339+
except UnicodeDecodeError:
340+
return True
341+
return False
342+
343+
for d in cutoff_commit.diff(self._repo.head.commit):
344+
if not _include_file(d.b_path, subdir, recursive, file_ext) or _is_binary(d):
345+
continue
338346

339-
abspath = os.path.join(self.config.working_directory, d.new_file.path)
340-
# TODO
341-
# Just filtering on the two status values for "added" and "modified" is too
342-
# simplistic. This does not cover file renames, copies &
343-
# deletions.
344-
if d.status == pygit2.GIT_DELTA_ADDED:
347+
abspath = os.path.join(self.config.working_directory, d.b_path)
348+
if d.new_file:
349+
added_files.add(abspath)
350+
elif d.a_blob and d.b_blob:
351+
if d.a_path != d.b_path:
352+
# consider moved files as added
345353
added_files.add(abspath)
346-
elif d.status == pygit2.GIT_DELTA_MODIFIED:
354+
elif d.a_blob != d.b_blob:
347355
updated_files.add(abspath)
348356

349-
previous_commit = commit
350-
351357
# Any file that has been added and then updated inside the window of the git history we
352358
# looked at, should be considered "added", not "updated", since it does not exist in the
353359
# database yet.
@@ -364,29 +370,26 @@ def _ensure_working_directory(self) -> None:
364370
os.mkdir(self.config.working_directory)
365371

366372
def _ensure_repository(self) -> None:
367-
repodir = pygit2.discover_repository(self.config.working_directory)
368-
if repodir is None:
373+
if not os.path.exists(os.path.join(self.config.working_directory, ".git")):
369374
self._clone_repository()
370375
return
371-
372-
self._repo = pygit2.Repository(repodir)
376+
self._repo = Repo(self.config.working_directory)
373377

374378
if self.config.branch is None:
375-
self.config.branch = self._repo.head.shorthand
376-
branch = self._repo.branches[self.config.branch]
377-
378-
if not branch.is_checked_out():
379-
self._repo.checkout(branch)
379+
self.config.branch = str(self._repo.active_branch)
380+
branch = self.config.branch
381+
self._repo.head.reference = self._repo.heads[branch]
382+
self._repo.head.reset(index=True, working_tree=True)
380383

381384
remote = self._find_or_add_remote()
382385
self._update_from_remote(remote, branch)
383386

384387
def _clone_repository(self) -> None:
385388
kwargs = {}
386389
if self.config.branch:
387-
kwargs["checkout_branch"] = self.config.branch
390+
kwargs["branch"] = self.config.branch
388391

389-
self._repo = pygit2.clone_repository(
392+
self._repo = Repo.clone_from(
390393
self.config.repository_url, self.config.working_directory, **kwargs
391394
)
392395

@@ -398,20 +401,19 @@ def _find_or_add_remote(self):
398401
break
399402

400403
if remote is None:
401-
remote = self._repo.remotes.create(
402-
"added_by_vulnerablecode", self.config.repository_url
404+
remote = self._repo.create_remote(
405+
"added_by_vulnerablecode", url=self.config.repository_url
403406
)
404407

405408
return remote
406409

407410
def _update_from_remote(self, remote, branch) -> None:
408-
progress = remote.fetch()
409-
if progress.received_objects == 0:
411+
fetch_info = remote.fetch()
412+
if len(fetch_info) == 0:
410413
return
411-
412-
remote_branch = self._repo.branches[f"{remote.name}/{self.config.branch}"]
413-
branch.set_target(remote_branch.target)
414-
self._repo.checkout(branch, strategy=pygit2.GIT_CHECKOUT_FORCE)
414+
branch = self._repo.branches[branch]
415+
branch.set_reference(remote.refs[branch.name])
416+
self._repo.head.reset(index=True, working_tree=True)
415417

416418

417419
def _include_file(

0 commit comments

Comments
 (0)