Skip to content

Commit 50be9ca

Browse files
committed
feat(licenses): integrate OSADL copyleft data for improved detection
1 parent af24cce commit 50be9ca

File tree

6 files changed

+305
-71
lines changed

6 files changed

+305
-71
lines changed

CHANGELOG.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313
- Supports both `-ls source1 source2` and `-ls source1 -ls source2` syntax
1414

1515
### Changed
16+
- **Switched to OSADL authoritative copyleft license data**
17+
- Copyleft detection now uses [OSADL (Open Source Automation Development Lab)](https://www.osadl.org/) checklist data
18+
- Adds missing `-or-later` license variants (GPL-2.0-or-later, GPL-3.0-or-later, LGPL-2.1-or-later, etc.)
19+
- Expands copyleft coverage from 21 to 32 licenses
20+
- Custom include/exclude/explicit filters still use legacy behavior for backward compatibility
21+
- Dataset attribution added to README (CC-BY-4.0 license)
22+
1623
- Copyleft inspection now defaults to component-level licenses only (component_declared, license_file)
1724
- Reduces noise from file-level license detections (file_header, scancode)
1825
- Use `-ls` to override and check specific sources
1926

2027
### Fixed
21-
- Fixed terminal cursor disappearing after aborting scan with Ctrl+C
28+
- Fixed the terminal cursor disappearing after aborting scan with Ctrl+C
2229

2330
## [1.40.1] - 2025-10-29
2431
### Changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,3 +135,8 @@ Details of major changes to the library can be found in [CHANGELOG.md](CHANGELOG
135135

136136
## Background
137137
Details about the Winnowing algorithm used for scanning can be found [here](WINNOWING.md).
138+
139+
## Dataset License Notice
140+
This application is licensed under the MIT License. However, it includes the OSADL copyleft license dataset ([osadl-copyleft.json](src/scanoss/data/osadl-copyleft.json)) which is licensed under the [Creative Commons Attribution 4.0 International license (CC-BY-4.0)](https://creativecommons.org/licenses/by/4.0/) by the [Open Source Automation Development Lab (OSADL) eG](https://www.osadl.org/).
141+
142+
**Attribution:** A project by the Open Source Automation Development Lab (OSADL) eG. Original source: [https://www.osadl.org/fileadmin/checklists/copyleft.json](https://www.osadl.org/fileadmin/checklists/copyleft.json)

src/scanoss/inspection/utils/license_utils.py

Lines changed: 50 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -23,94 +23,82 @@
2323
"""
2424

2525
from ...scanossbase import ScanossBase
26-
27-
DEFAULT_COPYLEFT_LICENSES = {
28-
'agpl-3.0-only',
29-
'artistic-1.0',
30-
'artistic-2.0',
31-
'cc-by-sa-4.0',
32-
'cddl-1.0',
33-
'cddl-1.1',
34-
'cecill-2.1',
35-
'epl-1.0',
36-
'epl-2.0',
37-
'gfdl-1.1-only',
38-
'gfdl-1.2-only',
39-
'gfdl-1.3-only',
40-
'gpl-1.0-only',
41-
'gpl-2.0-only',
42-
'gpl-3.0-only',
43-
'lgpl-2.1-only',
44-
'lgpl-3.0-only',
45-
'mpl-1.1',
46-
'mpl-2.0',
47-
'sleepycat',
48-
'watcom-1.0',
49-
}
26+
from scanoss.osadl_copyleft import OsadlCopyleft
5027

5128

5229
class LicenseUtil(ScanossBase):
5330
"""
5431
A utility class for handling software licenses, particularly copyleft licenses.
5532
56-
This class provides functionality to initialize, manage, and query a set of
57-
copyleft licenses. It also offers a method to generate URLs for license information.
33+
Uses OSADL (Open Source Automation Development Lab) authoritative copyleft data
34+
with optional include/exclude/explicit filters.
5835
"""
5936

6037
BASE_SPDX_ORG_URL = 'https://spdx.org/licenses'
6138

6239
def __init__(self, debug: bool = False, trace: bool = True, quiet: bool = False):
6340
super().__init__(debug, trace, quiet)
64-
self.default_copyleft_licenses = set(DEFAULT_COPYLEFT_LICENSES)
65-
self.copyleft_licenses = set()
41+
self.osadl = OsadlCopyleft(debug=debug)
42+
self.include_licenses = set()
43+
self.exclude_licenses = set()
44+
self.explicit_licenses = set()
6645

6746
def init(self, include: str = None, exclude: str = None, explicit: str = None):
6847
"""
69-
Initialize the set of copyleft licenses based on user input.
70-
71-
This method allows for customization of the copyleft license set by:
72-
- Setting an explicit list of licenses
73-
- Including additional licenses to the default set
74-
- Excluding specific licenses from the default set
48+
Initialize copyleft license filters.
7549
76-
:param include: Comma-separated string of licenses to include
77-
:param exclude: Comma-separated string of licenses to exclude
78-
:param explicit: Comma-separated string of licenses to use exclusively
50+
:param include: Comma-separated licenses to mark as copyleft (in addition to OSADL)
51+
:param exclude: Comma-separated licenses to mark as NOT copyleft (override OSADL)
52+
:param explicit: Comma-separated licenses to use exclusively (ignore OSADL)
7953
"""
80-
if self.debug:
81-
self.print_stderr(f'Include Copyleft licenses: ${include}')
82-
self.print_stderr(f'Exclude Copyleft licenses: ${exclude}')
83-
self.print_stderr(f'Explicit Copyleft licenses: ${explicit}')
54+
# Parse explicit list (if provided, ignore OSADL completely)
8455
if explicit:
85-
explicit = explicit.strip()
86-
if explicit:
87-
exp = [item.strip().lower() for item in explicit.split(',')]
88-
self.copyleft_licenses = set(exp)
89-
self.print_debug(f'Copyleft licenses: ${self.copyleft_licenses}')
56+
self.explicit_licenses = {lic.strip().lower() for lic in explicit.split(',') if lic.strip()}
57+
self.print_debug(f'Explicit copyleft licenses: {self.explicit_licenses}')
9058
return
91-
# If no explicit licenses were set, set default ones
92-
self.copyleft_licenses = self.default_copyleft_licenses.copy()
93-
if include:
94-
include = include.strip()
59+
60+
# Parse include list (mark these as copyleft in addition to OSADL)
9561
if include:
96-
inc = [item.strip().lower() for item in include.split(',')]
97-
self.copyleft_licenses.update(inc)
98-
if exclude:
99-
exclude = exclude.strip()
62+
self.include_licenses = {lic.strip().lower() for lic in include.split(',') if lic.strip()}
63+
self.print_debug(f'Include licenses: {self.include_licenses}')
64+
65+
# Parse exclude list (mark these as NOT copyleft, overriding OSADL)
10066
if exclude:
101-
inc = [item.strip().lower() for item in exclude.split(',')]
102-
for lic in inc:
103-
self.copyleft_licenses.discard(lic)
104-
self.print_debug(f'Copyleft licenses: ${self.copyleft_licenses}')
67+
self.exclude_licenses = {lic.strip().lower() for lic in exclude.split(',') if lic.strip()}
68+
self.print_debug(f'Exclude licenses: {self.exclude_licenses}')
10569

10670
def is_copyleft(self, spdxid: str) -> bool:
10771
"""
108-
Check if a given license is considered copyleft.
72+
Check if a license is copyleft.
10973
110-
:param spdxid: The SPDX identifier of the license to check
111-
:return: True if the license is copyleft, False otherwise
74+
Logic:
75+
1. If explicit list provided → check if license in explicit list
76+
2. If license in include list → return True
77+
3. If license in exclude list → return False
78+
4. Otherwise → use OSADL authoritative data
79+
80+
:param spdxid: SPDX license identifier
81+
:return: True if copyleft, False otherwise
11282
"""
113-
return spdxid.lower() in self.copyleft_licenses
83+
if not spdxid:
84+
return False
85+
86+
spdxid_lc = spdxid.lower()
87+
88+
# Explicit mode: use only the explicit list
89+
if self.explicit_licenses:
90+
return spdxid_lc in self.explicit_licenses
91+
92+
# Include filter: if license in include list, force copyleft=True
93+
if spdxid_lc in self.include_licenses:
94+
return True
95+
96+
# Exclude filter: if license in exclude list, force copyleft=False
97+
if spdxid_lc in self.exclude_licenses:
98+
return False
99+
100+
# No filters matched, use OSADL authoritative data
101+
return self.osadl.is_copyleft(spdxid)
114102

115103
def get_spdx_url(self, spdxid: str) -> str:
116104
"""

src/scanoss/osadl_copyleft.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
"""
2+
SPDX-License-Identifier: MIT
3+
4+
Copyright (c) 2025, SCANOSS
5+
6+
Permission is hereby granted, free of charge, to any person obtaining a copy
7+
of this software and associated documentation files (the "Software"), to deal
8+
in the Software without restriction, including without limitation the rights
9+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
copies of the Software, and to permit persons to whom the Software is
11+
furnished to do so, subject to the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included in
14+
all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
THE SOFTWARE.
23+
"""
24+
25+
import json
26+
import sys
27+
28+
import importlib_resources
29+
30+
31+
class OsadlCopyleft:
32+
"""
33+
OSADL Copyleft license checker class.
34+
35+
Provides copyleft license lookup based on OSADL (Open Source Automation
36+
Development Lab) authoritative checklist data.
37+
38+
Data is loaded once at class level and shared across all instances for efficiency.
39+
40+
Data source: https://www.osadl.org/fileadmin/checklists/copyleft.json
41+
License: CC-BY-4.0
42+
"""
43+
44+
_shared_copyleft_data = {}
45+
_data_loaded = False
46+
47+
def __init__(self, debug: bool = False):
48+
"""
49+
Initialize the OsadlCopyleft class.
50+
Data is loaded once at class level and shared across all instances.
51+
"""
52+
self.debug = debug
53+
self._load_copyleft_data()
54+
55+
@staticmethod
56+
def print_stderr(*args, **kwargs):
57+
"""
58+
Print the given message to STDERR
59+
"""
60+
print(*args, file=sys.stderr, **kwargs)
61+
62+
def print_debug(self, *args, **kwargs):
63+
"""
64+
Print debug message if enabled
65+
"""
66+
if self.debug:
67+
self.print_stderr(*args, **kwargs)
68+
69+
def _load_copyleft_data(self) -> bool:
70+
"""
71+
Load the embedded OSADL copyleft JSON file into class-level shared data.
72+
Data is loaded only once and shared across all instances.
73+
74+
:return: True if successful, False otherwise
75+
"""
76+
if OsadlCopyleft._data_loaded:
77+
return True
78+
79+
try:
80+
f_name = importlib_resources.files(__name__) / 'data/osadl-copyleft.json'
81+
with importlib_resources.as_file(f_name) as f:
82+
with open(f, 'r', encoding='utf-8') as file:
83+
data = json.load(file)
84+
except Exception as e:
85+
self.print_stderr(f'ERROR: Problem loading OSADL copyleft data: {e}')
86+
return False
87+
88+
# Process copyleft data
89+
copyleft = data.get('copyleft', {})
90+
if not copyleft:
91+
self.print_stderr('ERROR: No copyleft data found in OSADL JSON')
92+
return False
93+
94+
# Store in class-level shared dictionary
95+
for lic_id, status in copyleft.items():
96+
# Normalize license ID (lowercase) for consistent lookup
97+
lic_id_lc = lic_id.lower()
98+
OsadlCopyleft._shared_copyleft_data[lic_id_lc] = status
99+
100+
OsadlCopyleft._data_loaded = True
101+
self.print_debug(f'Loaded {len(OsadlCopyleft._shared_copyleft_data)} OSADL copyleft entries')
102+
return True
103+
104+
def is_copyleft(self, spdx_id: str) -> bool:
105+
"""
106+
Check if a license is copyleft according to OSADL data.
107+
108+
Returns True for both strong copyleft ("Yes") and weak/restricted copyleft ("Yes (restricted)").
109+
110+
:param spdx_id: SPDX license identifier
111+
:return: True if copyleft, False otherwise
112+
"""
113+
if not spdx_id:
114+
return False
115+
116+
# Normalize lookup
117+
spdx_id_lc = spdx_id.lower()
118+
# Use class-level shared data
119+
status = OsadlCopyleft._shared_copyleft_data.get(spdx_id_lc)
120+
121+
if not status:
122+
self.print_debug(f'No OSADL copyleft data for license: {spdx_id}')
123+
return False
124+
125+
# Consider both "Yes" and "Yes (restricted)" as copyleft (case-insensitive)
126+
return status.lower().startswith('yes')
127+
128+
129+
#
130+
# End of OsadlCopyleft Class
131+
#

0 commit comments

Comments
 (0)