Skip to content

Commit 7455238

Browse files
committed
feat(licenses): integrate OSADL copyleft data
1 parent 565ff26 commit 7455238

File tree

7 files changed

+443
-80
lines changed

7 files changed

+443
-80
lines changed

CHANGELOG.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313
- Supports both `-ls source1 source2` and `-ls source1 -ls source2` syntax
1414

1515
### Changed
16+
- **Switched to OSADL authoritative copyleft license data**
17+
- Copyleft detection now uses [OSADL (Open Source Automation Development Lab)](https://www.osadl.org/) checklist data
18+
- Adds missing `-or-later` license variants (GPL-2.0-or-later, GPL-3.0-or-later, LGPL-2.1-or-later, etc.)
19+
- Expands copyleft coverage from 21 to 32 licenses
20+
- Custom include/exclude/explicit filters still use legacy behavior for backward compatibility
21+
- Dataset attribution added to README (CC-BY-4.0 license)
22+
1623
- Copyleft inspection now defaults to component-level licenses only (component_declared, license_file)
1724
- Reduces noise from file-level license detections (file_header, scancode)
1825
- Use `-ls` to override and check specific sources
1926

2027
### Fixed
21-
- Fixed terminal cursor disappearing after aborting scan with Ctrl+C
28+
- Fixed the terminal cursor disappearing after aborting scan with Ctrl+C
2229

2330
## [1.40.1] - 2025-10-29
2431
### Changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,3 +135,8 @@ Details of major changes to the library can be found in [CHANGELOG.md](CHANGELOG
135135

136136
## Background
137137
Details about the Winnowing algorithm used for scanning can be found [here](WINNOWING.md).
138+
139+
## Dataset License Notice
140+
This application is licensed under the MIT License. However, it includes the OSADL copyleft license dataset ([osadl-copyleft.json](src/scanoss/data/osadl-copyleft.json)) which is licensed under the [Creative Commons Attribution 4.0 International license (CC-BY-4.0)](https://creativecommons.org/licenses/by/4.0/) by the [Open Source Automation Development Lab (OSADL) eG](https://www.osadl.org/).
141+
142+
**Attribution:** A project by the Open Source Automation Development Lab (OSADL) eG. Original source: [https://www.osadl.org/fileadmin/checklists/copyleft.json](https://www.osadl.org/fileadmin/checklists/copyleft.json)
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
{
2+
"title": "OSADL Open Source License Obligations Checklist (https:\/\/www.osadl.org\/Checklists)",
3+
"license": "Creative Commons Attribution 4.0 International license (CC-BY-4.0)",
4+
"attribution": "A project by the Open Source Automation Development Lab (OSADL) eG. For further information about the project see the description at www.osadl.org\/checklists.",
5+
"copyright": "(C) 2017 - 2024 Open Source Automation Development Lab (OSADL) eG and contributors, info@osadl.org",
6+
"disclaimer": "The checklists and particularly the copyleft data have been assembled with maximum diligence and care; however, the authors do not warrant nor can be held liable in any way for its correctness, usefulness, merchantibility or fitness for a particular purpose as far as permissible by applicable law. Anyone who uses the information does this on his or her sole responsibility. For any individual legal advice, it is recommended to contact a lawyer.",
7+
"timeformat": "%Y-%m-%dT%H:%M:%S%z",
8+
"timestamp": "2025-10-30T11:23:00+0000",
9+
"copyleft":
10+
{
11+
"0BSD": "No",
12+
"AFL-2.0": "No",
13+
"AFL-2.1": "No",
14+
"AFL-3.0": "No",
15+
"AGPL-3.0-only": "Yes",
16+
"AGPL-3.0-or-later": "Yes",
17+
"Apache-1.0": "No",
18+
"Apache-1.1": "No",
19+
"Apache-2.0": "No",
20+
"APSL-2.0": "Yes (restricted)",
21+
"Artistic-1.0": "No",
22+
"Artistic-1.0-Perl": "No",
23+
"Artistic-2.0": "No",
24+
"Bitstream-Vera": "No",
25+
"blessing": "No",
26+
"BlueOak-1.0.0": "No",
27+
"BSD-1-Clause": "No",
28+
"BSD-2-Clause": "No",
29+
"BSD-2-Clause-Patent": "No",
30+
"BSD-3-Clause": "No",
31+
"BSD-3-Clause-Open-MPI": "No",
32+
"BSD-4-Clause": "No",
33+
"BSD-4-Clause-UC": "No",
34+
"BSD-4.3TAHOE": "No",
35+
"BSD-Source-Code": "No",
36+
"BSL-1.0": "No",
37+
"bzip2-1.0.5": "No",
38+
"bzip2-1.0.6": "No",
39+
"CC-BY-2.5": "No",
40+
"CC-BY-3.0": "No",
41+
"CDDL-1.0": "Yes (restricted)",
42+
"CDDL-1.1": "Yes (restricted)",
43+
"CPL-1.0": "Yes",
44+
"curl": "No",
45+
"ECL-1.0": "No",
46+
"ECL-2.0": "No",
47+
"EFL-2.0": "No",
48+
"EPL-1.0": "Yes",
49+
"EPL-2.0": "Yes (restricted)",
50+
"EUPL-1.1": "Yes",
51+
"EUPL-1.2": "Yes",
52+
"FSFAP": "No",
53+
"FSFUL": "No",
54+
"FSFULLR": "No",
55+
"FSFULLRWD": "No",
56+
"FTL": "No",
57+
"GPL-1.0-only": "Yes",
58+
"GPL-1.0-or-later": "Yes",
59+
"GPL-2.0-only": "Yes",
60+
"GPL-2.0-only WITH Classpath-exception-2.0": "Yes (restricted)",
61+
"GPL-2.0-or-later": "Yes",
62+
"GPL-3.0-only": "Yes",
63+
"GPL-3.0-or-later": "Yes",
64+
"HPND": "No",
65+
"IBM-pibs": "No",
66+
"ICU": "No",
67+
"IJG": "No",
68+
"ImageMagick": "No",
69+
"Info-ZIP": "No",
70+
"IPL-1.0": "Yes",
71+
"ISC": "No",
72+
"JasPer-2.0": "No",
73+
"LGPL-2.0-only": "Yes (restricted)",
74+
"LGPL-2.0-or-later": "Yes (restricted)",
75+
"LGPL-2.1-only": "Yes (restricted)",
76+
"LGPL-2.1-or-later": "Yes (restricted)",
77+
"LGPL-3.0-only": "Yes (restricted)",
78+
"LGPL-3.0-or-later": "Yes (restricted)",
79+
"Libpng": "No",
80+
"libpng-2.0": "No",
81+
"libtiff": "No",
82+
"LicenseRef-scancode-bsla-no-advert": "No",
83+
"LicenseRef-scancode-info-zip-2003-05": "No",
84+
"LicenseRef-scancode-ppp": "No",
85+
"Minpack": "No",
86+
"MirOS": "No",
87+
"MIT": "No",
88+
"MIT-0": "No",
89+
"MIT-CMU": "No",
90+
"MPL-1.1": "Yes (restricted)",
91+
"MPL-2.0": "Yes (restricted)",
92+
"MPL-2.0-no-copyleft-exception": "Yes (restricted)",
93+
"MS-PL": "Questionable",
94+
"MS-RL": "Yes (restricted)",
95+
"NBPL-1.0": "No",
96+
"NCSA": "No",
97+
"NTP": "No",
98+
"OFL-1.1": "Yes (restricted)",
99+
"OGC-1.0": "No",
100+
"OLDAP-2.8": "No",
101+
"OpenSSL": "Questionable",
102+
"OSL-3.0": "Yes",
103+
"PHP-3.01": "No",
104+
"PostgreSQL": "No",
105+
"PSF-2.0": "No",
106+
"Python-2.0": "No",
107+
"Qhull": "No",
108+
"RSA-MD": "No",
109+
"Saxpath": "No",
110+
"SGI-B-2.0": "No",
111+
"Sleepycat": "Yes",
112+
"SMLNJ": "No",
113+
"Spencer-86": "No",
114+
"SSH-OpenSSH": "No",
115+
"SSH-short": "No",
116+
"SunPro": "No",
117+
"Ubuntu-font-1.0": "Yes (restricted)",
118+
"Unicode-3.0": "No",
119+
"Unicode-DFS-2015": "No",
120+
"Unicode-DFS-2016": "No",
121+
"Unlicense": "No",
122+
"UPL-1.0": "No",
123+
"W3C": "No",
124+
"W3C-19980720": "No",
125+
"W3C-20150513": "No",
126+
"WTFPL": "No",
127+
"X11": "No",
128+
"XFree86-1.1": "No",
129+
"Zlib": "No",
130+
"zlib-acknowledgement": "No",
131+
"ZPL-2.0": "No"
132+
}
133+
}

src/scanoss/inspection/utils/license_utils.py

Lines changed: 51 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -22,96 +22,84 @@
2222
THE SOFTWARE.
2323
"""
2424

25-
from ...scanossbase import ScanossBase
25+
from scanoss.osadl import Osadl
2626

27-
DEFAULT_COPYLEFT_LICENSES = {
28-
'agpl-3.0-only',
29-
'artistic-1.0',
30-
'artistic-2.0',
31-
'cc-by-sa-4.0',
32-
'cddl-1.0',
33-
'cddl-1.1',
34-
'cecill-2.1',
35-
'epl-1.0',
36-
'epl-2.0',
37-
'gfdl-1.1-only',
38-
'gfdl-1.2-only',
39-
'gfdl-1.3-only',
40-
'gpl-1.0-only',
41-
'gpl-2.0-only',
42-
'gpl-3.0-only',
43-
'lgpl-2.1-only',
44-
'lgpl-3.0-only',
45-
'mpl-1.1',
46-
'mpl-2.0',
47-
'sleepycat',
48-
'watcom-1.0',
49-
}
27+
from ...scanossbase import ScanossBase
5028

5129

5230
class LicenseUtil(ScanossBase):
5331
"""
5432
A utility class for handling software licenses, particularly copyleft licenses.
5533
56-
This class provides functionality to initialize, manage, and query a set of
57-
copyleft licenses. It also offers a method to generate URLs for license information.
34+
Uses OSADL (Open Source Automation Development Lab) authoritative copyleft data
35+
with optional include/exclude/explicit filters.
5836
"""
5937

6038
BASE_SPDX_ORG_URL = 'https://spdx.org/licenses'
61-
BASE_OSADL_URL = 'https://www.osadl.org/fileadmin/checklists/unreflicenses'
6239

6340
def __init__(self, debug: bool = False, trace: bool = True, quiet: bool = False):
6441
super().__init__(debug, trace, quiet)
65-
self.default_copyleft_licenses = set(DEFAULT_COPYLEFT_LICENSES)
66-
self.copyleft_licenses = set()
42+
self.osadl = Osadl(debug=debug)
43+
self.include_licenses = set()
44+
self.exclude_licenses = set()
45+
self.explicit_licenses = set()
6746

6847
def init(self, include: str = None, exclude: str = None, explicit: str = None):
6948
"""
70-
Initialize the set of copyleft licenses based on user input.
71-
72-
This method allows for customization of the copyleft license set by:
73-
- Setting an explicit list of licenses
74-
- Including additional licenses to the default set
75-
- Excluding specific licenses from the default set
49+
Initialize copyleft license filters.
7650
77-
:param include: Comma-separated string of licenses to include
78-
:param exclude: Comma-separated string of licenses to exclude
79-
:param explicit: Comma-separated string of licenses to use exclusively
51+
:param include: Comma-separated licenses to mark as copyleft (in addition to OSADL)
52+
:param exclude: Comma-separated licenses to mark as NOT copyleft (override OSADL)
53+
:param explicit: Comma-separated licenses to use exclusively (ignore OSADL)
8054
"""
81-
if self.debug:
82-
self.print_stderr(f'Include Copyleft licenses: ${include}')
83-
self.print_stderr(f'Exclude Copyleft licenses: ${exclude}')
84-
self.print_stderr(f'Explicit Copyleft licenses: ${explicit}')
55+
# Parse explicit list (if provided, ignore OSADL completely)
8556
if explicit:
86-
explicit = explicit.strip()
87-
if explicit:
88-
exp = [item.strip().lower() for item in explicit.split(',')]
89-
self.copyleft_licenses = set(exp)
90-
self.print_debug(f'Copyleft licenses: ${self.copyleft_licenses}')
57+
self.explicit_licenses = {lic.strip().lower() for lic in explicit.split(',') if lic.strip()}
58+
self.print_debug(f'Explicit copyleft licenses: {self.explicit_licenses}')
9159
return
92-
# If no explicit licenses were set, set default ones
93-
self.copyleft_licenses = self.default_copyleft_licenses.copy()
94-
if include:
95-
include = include.strip()
60+
61+
# Parse include list (mark these as copyleft in addition to OSADL)
9662
if include:
97-
inc = [item.strip().lower() for item in include.split(',')]
98-
self.copyleft_licenses.update(inc)
99-
if exclude:
100-
exclude = exclude.strip()
63+
self.include_licenses = {lic.strip().lower() for lic in include.split(',') if lic.strip()}
64+
self.print_debug(f'Include licenses: {self.include_licenses}')
65+
66+
# Parse exclude list (mark these as NOT copyleft, overriding OSADL)
10167
if exclude:
102-
inc = [item.strip().lower() for item in exclude.split(',')]
103-
for lic in inc:
104-
self.copyleft_licenses.discard(lic)
105-
self.print_debug(f'Copyleft licenses: ${self.copyleft_licenses}')
68+
self.exclude_licenses = {lic.strip().lower() for lic in exclude.split(',') if lic.strip()}
69+
self.print_debug(f'Exclude licenses: {self.exclude_licenses}')
10670

10771
def is_copyleft(self, spdxid: str) -> bool:
10872
"""
109-
Check if a given license is considered copyleft.
73+
Check if a license is copyleft.
74+
75+
Logic:
76+
1. If explicit list provided → check if license in explicit list
77+
2. If license in include list → return True
78+
3. If license in exclude list → return False
79+
4. Otherwise → use OSADL authoritative data
11080
111-
:param spdxid: The SPDX identifier of the license to check
112-
:return: True if the license is copyleft, False otherwise
81+
:param spdxid: SPDX license identifier
82+
:return: True if copyleft, False otherwise
11383
"""
114-
return spdxid.lower() in self.copyleft_licenses
84+
if not spdxid:
85+
return False
86+
87+
spdxid_lc = spdxid.lower()
88+
89+
# Explicit mode: use only the explicit list
90+
if self.explicit_licenses:
91+
return spdxid_lc in self.explicit_licenses
92+
93+
# Include filter: if license in include list, force copyleft=True
94+
if spdxid_lc in self.include_licenses:
95+
return True
96+
97+
# Exclude filter: if license in exclude list, force copyleft=False
98+
if spdxid_lc in self.exclude_licenses:
99+
return False
100+
101+
# No filters matched, use OSADL authoritative data
102+
return self.osadl.is_copyleft(spdxid)
115103

116104
def get_spdx_url(self, spdxid: str) -> str:
117105
"""
@@ -122,14 +110,6 @@ def get_spdx_url(self, spdxid: str) -> str:
122110
"""
123111
return f'{self.BASE_SPDX_ORG_URL}/{spdxid}.html'
124112

125-
def get_osadl_url(self, spdxid: str) -> str:
126-
"""
127-
Generate the URL for the OSADL (Open Source Automation Development Lab) page of a license.
128-
129-
:param spdxid: The SPDX identifier of the license
130-
:return: The URL of the OSADL page for the given license
131-
"""
132-
return f'{self.BASE_OSADL_URL}/{spdxid}.txt'
133113

134114

135115
#

0 commit comments

Comments
 (0)