Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
1178bfe
Upgrade scancode-toolkit to latest beta release #411
tdruez May 2, 2022
f9c6e77
Add a test class to regen test data #411
tdruez May 9, 2022
5dd16d4
Upgrade container_inspector to latest 31.0.0 version #411
tdruez May 9, 2022
2db2a8e
Handle new scan format in scancode pipes #411
JonoYang May 5, 2022
a42b374
Handle package_uids for DiscoveredPackages #411
JonoYang May 6, 2022
1c2bdc3
Update deprecated code #411
JonoYang May 6, 2022
2add897
Regenerate asgiref 3.3.0 test data #411
JonoYang May 10, 2022
ede8858
Add asgiref-3.3.0_scancode_scan.json #411
JonoYang May 10, 2022
4a4fa99
Add asgiref-3.3.0_walk_test_fixtures.json #411
JonoYang May 10, 2022
74d78b8
Signed-off-by: Jono Yang <jyang@nexb.com>
JonoYang May 10, 2022
496f826
Update make_results_summary() #411
JonoYang May 10, 2022
ef74863
Exclude system_environment from diff #411
JonoYang May 10, 2022
ad4b056
Upgrade scancode-toolkit and extractcode to latest version #411
tdruez May 11, 2022
bcca2d7
Update package_getter #434 #438
JonoYang May 12, 2022
7917727
Allow packages to be created without versions #438
JonoYang May 12, 2022
b6b1927
Update expected test results
JonoYang May 12, 2022
f46ea17
Report DiscoveredPackage correctly in summary #411
JonoYang May 12, 2022
7e0d39a
Add test for docker pipeline for alpine #411
JonoYang May 12, 2022
abd3a5c
Add docker pipeline test for rpm images #411
JonoYang May 13, 2022
a50f79b
Track package_uids in make_results_summary #435
JonoYang May 13, 2022
9823790
Add truncated ubuntu docker image for testing #435
JonoYang May 13, 2022
92f6e98
Bump scancode and commoncode versions #435
JonoYang May 18, 2022
0491ad5
Update docker pipeline #435
JonoYang May 18, 2022
5ef693b
Fix code validity #411
tdruez May 18, 2022
9757841
Simplify the filtering of key_files_packages using a QuerySet #411
tdruez May 18, 2022
2b7ba71
Remove copied code from docker.py #411 #435
JonoYang May 18, 2022
009d4e3
Update alpine test image and results #411 #435
JonoYang May 18, 2022
d867c52
Properly create multiple package instances #411
JonoYang May 19, 2022
a73e7ea
Sort packages in JSON output by type and name #411
JonoYang May 19, 2022
befe574
Get file info and packages in initial scan #438
JonoYang May 25, 2022
093a52e
Revert changes to docker pipes and pipeline #438
JonoYang Jun 7, 2022
c58771a
Use generic package_getter for all distros #438
JonoYang Jun 8, 2022
4a8713d
Use get_path() with strip_root to get paths #438
JonoYang Jun 8, 2022
a0705c3
Remove distro specific pipes #438
JonoYang Jun 9, 2022
0c26b7c
Use list comprehension for key_file_packages #438
JonoYang Jun 9, 2022
c8424b3
Add package_uid field to DiscoveredPackage #411
JonoYang Jun 9, 2022
552bdb8
Add test docker image for Ubuntu #438
JonoYang Jun 9, 2022
155fe97
Update formatting #411 #438
JonoYang Jun 9, 2022
18cc997
Use smaller rpm docker image for testing #438
JonoYang Jun 9, 2022
23dc0e1
Replace ubuntu docker test image #438
JonoYang Jun 9, 2022
b3f4656
Use purl data in update_or_create_packages #438
JonoYang Jun 9, 2022
1592cd3
Bump scancode version to v31.0.0rc1 #438 #411
JonoYang Jun 13, 2022
9fcec67
Consider all PURL fields when ordering Packages #411 #438
JonoYang Jun 13, 2022
20ffbe0
Create Packages before Resources #411 #438
JonoYang Jun 13, 2022
784dbbc
Add test for load_inventory pipeline #411
JonoYang Jun 14, 2022
d57afa6
Code cleanups and formatting #411
tdruez Jun 14, 2022
413cf0d
Upgrade dependencies #411
tdruez Jun 14, 2022
c8cb574
Refactor create_inventory_from_scan to remove duplicated code #411
tdruez Jun 14, 2022
5aed5e8
Add changelog entry #411
tdruez Jun 14, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ local
policies.yml
*.rdb
*.aof
.vscode

# This is only created when packaging for external redistribution
/thirdparty/
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ v31.0.0 (next)
- WARNING: Drop support for Python 3.6 and 3.7. Add support for Python 3.10.
Upgrade Django to version 4.x series.

- Upgrade ScanCode-toolkit to version v31.
See https://github.com/nexB/scancode-toolkit/blob/develop/CHANGELOG.rst for an
overview of the changes in v31 compared to v30.

- Implement run status auto-refresh using the htmx JavaScript library.
The statuses of queued and running pipeline are now automatically refreshed
in the project list and project details views every 10 seconds.
Expand Down
18 changes: 18 additions & 0 deletions scanpipe/migrations/0016_discoveredpackage_package_uid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.0.4 on 2022-06-09 18:26

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('scanpipe', '0015_alter_codebaseresource_project_and_more'),
]

operations = [
migrations.AddField(
model_name='discoveredpackage',
name='package_uid',
field=models.CharField(blank=True, help_text='Unique identifier for this package.', max_length=1024),
),
]
7 changes: 6 additions & 1 deletion scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1726,6 +1726,11 @@ class DiscoveredPackage(
blank=True,
help_text=_("A list of dependencies for this package."),
)
package_uid = models.CharField(
max_length=1024,
blank=True,
help_text=_("Unique identifier for this package."),
)

# `AbstractPackage` model overrides:
keywords = models.JSONField(default=list, blank=True)
Expand Down Expand Up @@ -1769,7 +1774,7 @@ def create_from_data(cls, project, package_data):
If one of the values of the required fields is not available, a "ProjectError"
is created instead of a new DiscoveredPackage instance.
"""
required_fields = ["type", "name", "version"]
required_fields = ["type", "name"]
missing_values = [
field_name
for field_name in required_fields
Expand Down
4 changes: 2 additions & 2 deletions scanpipe/pipelines/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from scanpipe.pipelines import root_filesystems
from scanpipe.pipelines.root_filesystems import RootFS
from scanpipe.pipes import docker
from scanpipe.pipes import rootfs


class Docker(root_filesystems.RootFS):
class Docker(RootFS):
"""
A pipeline to analyze Docker images.
"""
Expand Down
9 changes: 4 additions & 5 deletions scanpipe/pipelines/load_inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,14 @@ def get_scan_json_input(self):
Locates a JSON scan input from a project's input/ directory.
"""
inputs = list(self.project.inputs(pattern="*.json"))

if len(inputs) != 1:
raise Exception("Only 1 JSON input file supported")

self.input_location = str(inputs[0].absolute())

def build_inventory_from_scan(self):
"""
Processes a given JSON scan input to populate codebase resources and packages.
Processes a JSON Scan results file to populate codebase resources and packages.
"""
project = self.project
scanned_codebase = scancode.get_virtual_codebase(project, self.input_location)
scancode.create_codebase_resources(project, scanned_codebase)
scancode.create_discovered_packages(project, scanned_codebase)
scancode.create_inventory_from_scan(self.project, self.input_location)
1 change: 0 additions & 1 deletion scanpipe/pipelines/scan_codebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

from scanpipe import pipes
from scanpipe.pipelines import Pipeline
from scanpipe.pipes import output
from scanpipe.pipes import rootfs
from scanpipe.pipes import scancode
from scanpipe.pipes.input import copy_inputs
Expand Down
20 changes: 7 additions & 13 deletions scanpipe/pipelines/scan_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,9 @@ def steps(cls):
"--license-text",
"--package",
"--url",
] + [
"--classify",
"--consolidate",
"--is-license-text",
"--license-clarity-score",
"--summary",
"--summary-key-files",
]

def get_package_archive_input(self):
Expand Down Expand Up @@ -102,33 +98,31 @@ def run_scancode(self):
"""
Scans extracted codebase/ content.
"""
self.scan_output = self.project.get_output_file_path("scancode", "json")
scan_output_path = self.project.get_output_file_path("scancode", "json")
self.scan_output_location = str(scan_output_path.absolute())

with self.save_errors(scancode.ScancodeError):
scancode.run_scancode(
location=str(self.project.codebase_path),
output_file=str(self.scan_output),
output_file=self.scan_output_location,
options=self.scancode_options,
raise_on_error=True,
)

if not self.scan_output.exists():
if not scan_output_path.exists():
raise FileNotFoundError("ScanCode output not available.")

def build_inventory_from_scan(self):
"""
Processes the JSON scan results to determine resources and packages.
Processes a JSON Scan results file to populate codebase resources and packages.
"""
project = self.project
scanned_codebase = scancode.get_virtual_codebase(project, str(self.scan_output))
scancode.create_codebase_resources(project, scanned_codebase)
scancode.create_discovered_packages(project, scanned_codebase)
scancode.create_inventory_from_scan(self.project, self.scan_output_location)

def make_summary_from_scan_results(self):
"""
Builds a summary in JSON format from the generated scan results.
"""
summary = scancode.make_results_summary(self.project, str(self.scan_output))
summary = scancode.make_results_summary(self.project, self.scan_output_location)
output_file = self.project.get_output_file_path("summary", "json")

with output_file.open("w") as summary_file:
Expand Down
13 changes: 9 additions & 4 deletions scanpipe/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@

from django.db.models import Count

from packageurl import normalize_qualifiers

from scanpipe.models import CodebaseResource
from scanpipe.models import DiscoveredPackage
from scanpipe.pipes import scancode
Expand Down Expand Up @@ -73,12 +71,19 @@ def update_or_create_package(project, package_data, codebase_resource=None):
"""
Gets, updates or creates a DiscoveredPackage then returns it.
Uses the `project` and `package_data` mapping to lookup and creates the
DiscoveredPackage using its Package URL as a unique key.
DiscoveredPackage using its Package URL and package_uid as a unique key.
"""
purl_data = DiscoveredPackage.extract_purl_data(package_data)
package_uid = package_data.get("package_uid")
purl_data_and_package_uid = {
**purl_data,
"package_uid": package_uid,
}

try:
package = DiscoveredPackage.objects.get(project=project, **purl_data)
package = DiscoveredPackage.objects.get(
project=project, **purl_data_and_package_uid
)
except DiscoveredPackage.DoesNotExist:
package = None

Expand Down
32 changes: 0 additions & 32 deletions scanpipe/pipes/alpine.py

This file was deleted.

2 changes: 2 additions & 0 deletions scanpipe/pipes/codebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ def get_tree(resource, fields, codebase=None):
return resource_dict


# TODO: Walking the ProjectCodebase is broken as we do not have a consistent way
# to get the root of a codebase.
class ProjectCodebase:
"""
Represents the codebase of a project stored in the database.
Expand Down
35 changes: 0 additions & 35 deletions scanpipe/pipes/debian.py

This file was deleted.

22 changes: 10 additions & 12 deletions scanpipe/pipes/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

import logging
import posixpath
from functools import partial
from pathlib import Path

from container_inspector.image import Image
Expand Down Expand Up @@ -152,23 +151,21 @@ def scan_image_for_system_packages(project, image, detect_licenses=True):
raise rootfs.DistroNotFound(f"Distro not found.")

distro_id = image.distro.identifier
if distro_id not in rootfs.PACKAGE_GETTER_BY_DISTRO:
if distro_id not in rootfs.SUPPORTED_DISTROS:
raise rootfs.DistroNotSupported(f'Distro "{distro_id}" is not supported.')

package_getter = partial(
rootfs.PACKAGE_GETTER_BY_DISTRO[distro_id],
distro=distro_id,
detect_licenses=detect_licenses,
)

installed_packages = image.get_installed_packages(package_getter)
installed_packages = image.get_installed_packages(rootfs.package_getter)

for i, (purl, package, layer) in enumerate(installed_packages):
logger.info(f"Creating package #{i}: {purl}")
created_package = pipes.update_or_create_package(project, package.to_dict())

installed_files = []
if hasattr(package, "resources"):
installed_files = package.resources

# We have no files for this installed package, we cannot go further.
if not package.installed_files:
if not installed_files:
logger.info(f" No installed_files for: {purl}")
continue

Expand All @@ -177,8 +174,9 @@ def scan_image_for_system_packages(project, image, detect_licenses=True):

codebase_resources = project.codebaseresources.all()

for install_file in package.installed_files:
install_file_path = pipes.normalize_path(install_file.path)
for install_file in installed_files:
install_file_path = install_file.get_path(strip_root=True)
install_file_path = pipes.normalize_path(install_file_path)
layer_rootfs_path = posixpath.join(
layer.layer_id,
install_file_path.strip("/"),
Expand Down
13 changes: 9 additions & 4 deletions scanpipe/pipes/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,12 @@ def get_headers(self, project):
def get_packages(self, project):
from scanpipe.api.serializers import DiscoveredPackageSerializer

packages = project.discoveredpackages.all()
packages = project.discoveredpackages.all().order_by(
"type",
"namespace",
"name",
"version",
)

for obj in packages.iterator():
yield self.encode(DiscoveredPackageSerializer(obj).data)
Expand Down Expand Up @@ -280,9 +285,9 @@ def _add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
# https://github.com/nexB/scancode-toolkit/pull/2381
# https://github.com/nexB/scancode-toolkit/issues/2350
mappings_key_by_fieldname = {
"copyrights": "value",
"holders": "value",
"authors": "value",
"copyrights": "copyright",
"holders": "holder",
"authors": "author",
"emails": "email",
"urls": "url",
}
Expand Down
Loading