Skip to content

Commit 565ff26

Browse files
committed
feat(policies): add support for license source filtering in copyleft inspection
1 parent 0a561dc commit 565ff26

File tree

6 files changed

+313
-12
lines changed

6 files changed

+313
-12
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

88
## [Unreleased]
9+
10+
### Added
11+
- Added `--license-sources` (`-ls`) option to copyleft inspection
12+
- Filter which license sources to check (component_declared, license_file, file_header, file_spdx_tag, scancode)
13+
- Supports both `-ls source1 source2` and `-ls source1 -ls source2` syntax
14+
15+
### Changed
16+
- Copyleft inspection now defaults to component-level licenses only (component_declared, license_file)
17+
- Reduces noise from file-level license detections (file_header, scancode)
18+
- Use `-ls` to override and check specific sources
19+
920
### Fixed
1021
- Fixed terminal cursor disappearing after aborting scan with Ctrl+C
1122

src/scanoss/cli.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
from .components import Components
5656
from .constants import (
5757
DEFAULT_API_TIMEOUT,
58+
DEFAULT_COPYLEFT_LICENSE_SOURCES,
5859
DEFAULT_HFH_DEPTH,
5960
DEFAULT_HFH_MIN_ACCEPTED_SCORE,
6061
DEFAULT_HFH_RANK_THRESHOLD,
@@ -64,6 +65,7 @@
6465
DEFAULT_TIMEOUT,
6566
MIN_TIMEOUT,
6667
PYTHON_MAJOR_VERSION,
68+
VALID_LICENSE_SOURCES,
6769
)
6870
from .csvoutput import CsvOutput
6971
from .cyclonedx import CycloneDx
@@ -699,6 +701,19 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
699701
p.add_argument('--exclude', help='Licenses to exclude from analysis (comma-separated list)')
700702
p.add_argument('--explicit', help='Use only these specific licenses for analysis (comma-separated list)')
701703

704+
# License source filtering
705+
# NOTE: Python <3.13 shows choices for each option form (noisy), Python 3.13+ shows once (clean)
706+
# See: https://github.com/python/cpython/commit/c4a2e8a2c5188c3288d57b80852e92c83f46f6f3
707+
for p in [p_inspect_raw_copyleft, p_inspect_legacy_copyleft]:
708+
p.add_argument(
709+
'-ls', '--license-sources',
710+
action='extend',
711+
nargs='+',
712+
choices=VALID_LICENSE_SOURCES,
713+
help=f'Specify which license sources to check for copyleft violations. Each license object in scan results '
714+
f'has a source field indicating its origin. Default: {", ".join(DEFAULT_COPYLEFT_LICENSE_SOURCES)}',
715+
)
716+
702717
# Common options for (legacy) copyleft and undeclared component inspection
703718
for p in [p_inspect_raw_copyleft, p_inspect_raw_undeclared, p_inspect_legacy_copyleft, p_inspect_legacy_undeclared]:
704719
p.add_argument('-i', '--input', nargs='?', help='Path to scan results file to analyse')
@@ -1752,6 +1767,7 @@ def inspect_copyleft(parser, args):
17521767
include=args.include, # Additional licenses to check
17531768
exclude=args.exclude, # Licenses to ignore
17541769
explicit=args.explicit, # Explicit license list
1770+
license_sources=args.license_sources, # License sources to check (list)
17551771
)
17561772
# Execute inspection and exit with appropriate status code
17571773
status, _ = i_copyleft.run()

src/scanoss/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,6 @@
1717
DEFAULT_HFH_DEPTH = 1
1818
DEFAULT_HFH_RECURSIVE_THRESHOLD = 0.8
1919
DEFAULT_HFH_MIN_ACCEPTED_SCORE = 0.15
20+
21+
VALID_LICENSE_SOURCES = ['component_declared', 'license_file', 'file_header', 'file_spdx_tag', 'scancode']
22+
DEFAULT_COPYLEFT_LICENSE_SOURCES = ['component_declared', 'license_file']

src/scanoss/inspection/policy_check/scanoss/copyleft.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
from dataclasses import dataclass
2727
from typing import Dict, List
2828

29+
from scanoss.constants import DEFAULT_COPYLEFT_LICENSE_SOURCES
30+
2931
from ...policy_check.policy_check import PolicyCheck, PolicyOutput, PolicyStatus
3032
from ...utils.markdown_utils import generate_jira_table, generate_table
3133
from ...utils.scan_result_processor import ScanResultProcessor
@@ -63,6 +65,7 @@ def __init__( # noqa: PLR0913
6365
include: str = None,
6466
exclude: str = None,
6567
explicit: str = None,
68+
license_sources: list = None,
6669
):
6770
"""
6871
Initialise the Copyleft class.
@@ -77,6 +80,7 @@ def __init__( # noqa: PLR0913
7780
:param include: Licenses to include in the analysis
7881
:param exclude: Licenses to exclude from the analysis
7982
:param explicit: Explicitly defined licenses
83+
:param license_sources: List of license sources to check
8084
"""
8185
super().__init__(
8286
debug, trace, quiet, format_type, status, name='Copyleft Policy', output=output
@@ -85,14 +89,16 @@ def __init__( # noqa: PLR0913
8589
self.filepath = filepath
8690
self.output = output
8791
self.status = status
92+
self.license_sources = license_sources or DEFAULT_COPYLEFT_LICENSE_SOURCES
8893
self.results_processor = ScanResultProcessor(
8994
self.debug,
9095
self.trace,
9196
self.quiet,
9297
self.filepath,
9398
include,
9499
exclude,
95-
explicit)
100+
explicit,
101+
self.license_sources)
96102

97103
def _json(self, components: list[Component]) -> PolicyOutput:
98104
"""

src/scanoss/inspection/utils/scan_result_processor.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,13 @@ def __init__( # noqa: PLR0913
7171
include: str = None,
7272
exclude: str = None,
7373
explicit: str = None,
74+
license_sources: list = None,
7475
):
7576
super().__init__(debug, trace, quiet)
7677
self.result_file_path = result_file_path
7778
self.license_util = LicenseUtil()
7879
self.license_util.init(include, exclude, explicit)
80+
self.license_sources = license_sources
7981
self.results = self._load_input_file()
8082

8183
def get_results(self) -> Dict[str, Any]:
@@ -162,9 +164,11 @@ def _append_license_to_component(self,
162164
self.print_debug(f'WARNING: Results missing licenses. Skipping: {new_component}')
163165
return
164166

165-
licenses_order_by_source_priority = self._get_licenses_order_by_source_priority(new_component['licenses'])
167+
# Select licenses based on configuration (filtering or priority mode)
168+
selected_licenses = self._select_licenses(new_component['licenses'])
169+
166170
# Process licenses for this component
167-
for license_item in licenses_order_by_source_priority:
171+
for license_item in selected_licenses:
168172
if license_item.get('name'):
169173
spdxid = license_item['name']
170174
source = license_item.get('source')
@@ -309,19 +313,26 @@ def convert_components_to_list(self, components: dict):
309313
component['licenses'] = []
310314
return results_list
311315

312-
def _get_licenses_order_by_source_priority(self,licenses_data):
316+
def _select_licenses(self, licenses_data):
313317
"""
314-
Select licenses based on source priority:
315-
1. component_declared (highest priority)
316-
2. license_file
317-
3. file_header
318-
4. scancode (lowest priority)
318+
Select licenses based on configuration.
319+
320+
Two modes:
321+
- Filtering mode: If license_sources specified, filter to those sources
322+
- Priority mode: Otherwise, use original priority-based selection
319323
320-
If any high-priority source is found, return only licenses from that source.
321-
If none found, return all licenses.
324+
Args:
325+
licenses_data: List of license dictionaries
322326
323-
Returns: list with ordered licenses by source.
327+
Returns:
328+
Filtered list of licenses based on configuration
324329
"""
330+
# Filtering mode, when license_sources is explicitly provided
331+
if self.license_sources:
332+
sources_to_include = set(self.license_sources) | {'unknown'}
333+
return [lic for lic in licenses_data
334+
if lic.get('source') in sources_to_include or lic.get('source') is None]
335+
325336
# Define priority order (highest to lowest)
326337
priority_sources = ['component_declared', 'license_file', 'file_header', 'scancode']
327338

0 commit comments

Comments
 (0)