Skip to content

Commit 5f458b9

Browse files
Address feedback on #3462
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 35d8f6b commit 5f458b9

File tree

10 files changed

+55
-38
lines changed

10 files changed

+55
-38
lines changed

docs/source/reference/available_package_parsers.rst

+14-8
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,6 @@ parsers in scancode-toolkit during documentation builds.
2323
- Datasource ID
2424
- Primary Language
2525
- Documentation URL
26-
* - JAR Java Archive
27-
- ``*.jar``
28-
- None
29-
- ``java_jar``
30-
- None
31-
- https://en.wikipedia.org/wiki/JAR_(file_format)
3226
* - AboutCode ABOUT file
3327
- ``*.ABOUT``
3428
- ``about``
@@ -306,13 +300,13 @@ parsers in scancode-toolkit during documentation builds.
306300
- ``debian_source_metadata_tarball``
307301
- None
308302
- https://manpages.debian.org/unstable/dpkg-dev/deb.5.en.html
309-
* - None
303+
* - macOS disk image file
310304
- ``*.dmg``
311305
``*.sparseimage``
312306
- ``dmg``
313307
- ``apple_dmg``
314308
- None
315-
- None
309+
- https://en.wikipedia.org/wiki/Apple_Disk_Image
316310
* - Java EAR application.xml
317311
- ``*/META-INF/application.xml``
318312
- ``ear``
@@ -437,6 +431,12 @@ parsers in scancode-toolkit during documentation builds.
437431
- ``ant_ivy_xml``
438432
- Java
439433
- https://ant.apache.org/ivy/history/latest-milestone/ivyfile.html
434+
* - JAR Java Archive
435+
- ``*.jar``
436+
- ``jar``
437+
- ``java_jar``
438+
- None
439+
- https://en.wikipedia.org/wiki/JAR_(file_format)
440440
* - Java JAR MANIFEST.MF
441441
- ``*/META-INF/MANIFEST.MF``
442442
- ``jar``
@@ -555,6 +555,12 @@ parsers in scancode-toolkit during documentation builds.
555555
- ``opam_file``
556556
- Ocaml
557557
- https://opam.ocaml.org/doc/Manual.html#Common-file-format
558+
* - Java OSGi MANIFEST.MF
559+
- None
560+
- ``osgi``
561+
- ``java_osgi_manifest``
562+
- Java
563+
- https://docs.oracle.com/javase/tutorial/deployment/jar/manifestindex.html
558564
* - Dart pubspec lockfile
559565
- ``*pubspec.lock``
560566
- ``pubspec``

etc/scripts/licenses/buildrules.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def cli(licenses_file):
223223
elif rule.is_license_intro:
224224
base_name = "license-intro"
225225
elif rule.is_license_clue:
226-
base_name = "license-clue"
226+
base_name = f"license-clue_{rule.license_expression}"
227227
else:
228228
base_name = rule.license_expression
229229

src/licensedcode/detection.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,10 @@ class DetectionCategory(Enum):
106106
EXTRA_WORDS = 'extra-words'
107107
UNKNOWN_MATCH = 'unknown-match'
108108
LICENSE_CLUES = 'license-clues'
109-
LOW_QUALITY_MATCHES = 'license-clues'
109+
LOW_QUALITY_MATCH_FRAGMENTS = 'low-quality-matches'
110110
IMPERFECT_COVERAGE = 'imperfect-match-coverage'
111111
FALSE_POSITVE = 'possible-false-positive'
112112
UNDETECTED_LICENSE = 'undetected-license'
113-
MATCH_FRAGMENTS = 'match-fragments'
114113
LOW_RELEVANCE = 'low-relevance'
115114

116115

@@ -124,6 +123,7 @@ class DetectionRule(Enum):
124123
"""
125124
UNKNOWN_MATCH = 'unknown-match'
126125
LICENSE_CLUES = 'license-clues'
126+
LOW_QUALITY_MATCH_FRAGMENTS = 'low-quality-matches'
127127
FALSE_POSITIVE = 'possible-false-positive'
128128
NOT_LICENSE_CLUES = 'not-license-clues-as-more-detections-present'
129129
UNKNOWN_REFERENCE_TO_LOCAL_FILE = 'unknown-reference-to-local-file'
@@ -1374,12 +1374,12 @@ def get_detected_license_expression(
13741374
detection_log.append(DetectionRule.LICENSE_CLUES.value)
13751375
return detection_log, combined_expression
13761376

1377-
elif analysis == DetectionCategory.LOW_QUALITY_MATCHES.value:
1377+
elif analysis == DetectionCategory.LOW_QUALITY_MATCH_FRAGMENTS.value:
13781378
if TRACE_ANALYSIS:
13791379
logger_debug(f'analysis {DetectionCategory.LICENSE_CLUES.value}')
13801380
# TODO: we are temporarily returning these as license clues, and not
13811381
# in detections but ideally we should return synthetic unknowns for these
1382-
detection_log.append(DetectionRule.LOW_QUALITY_MATCHES.value)
1382+
detection_log.append(DetectionRule.LOW_QUALITY_MATCH_FRAGMENTS.value)
13831383
return detection_log, combined_expression
13841384

13851385
else:
@@ -1501,7 +1501,7 @@ def get_ambiguous_license_detections_by_type(unique_license_detections):
15011501

15021502
for detection in unique_license_detections:
15031503
if not detection.license_expression:
1504-
ambi_license_detections[DetectionCategory.MATCH_FRAGMENTS.value] = detection
1504+
ambi_license_detections[DetectionCategory.LOW_QUALITY_MATCH_FRAGMENTS.value] = detection
15051505

15061506
elif is_undetected_license_matches(license_matches=detection.matches):
15071507
ambi_license_detections[DetectionCategory.UNDETECTED_LICENSE.value] = detection
@@ -1567,7 +1567,7 @@ def analyze_detection(license_matches, package_license=False):
15671567
return DetectionCategory.UNKNOWN_MATCH.value
15681568

15691569
elif not package_license and is_low_quality_matches(license_matches=license_matches):
1570-
return DetectionCategory.LOW_QUALITY_MATCHES.value
1570+
return DetectionCategory.LOW_QUALITY_MATCH_FRAGMENTS.value
15711571

15721572
# Case where at least one of the matches have `match_coverage`
15731573
# below IMPERFECT_MATCH_COVERAGE_THR

src/licensedcode/models.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1193,7 +1193,7 @@ def load_rules(
11931193
rules_data_dir=rules_data_dir,
11941194
with_checks=True,
11951195
is_builtin=True,
1196-
ignore_deprecated=True,
1196+
with_depreacted=False,
11971197
):
11981198
"""
11991199
Return an iterable of rules loaded from rule files in ``rules_data_dir``.
@@ -1217,7 +1217,7 @@ def load_rules(
12171217

12181218
try:
12191219
rule = Rule.from_file(rule_file=rule_file)
1220-
if rule.is_deprecated and ignore_deprecated:
1220+
if not with_depreacted and rule.is_deprecated:
12211221
continue
12221222
else:
12231223
yield rule

src/packagedcode/misc.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,12 @@
1818
# yet the purpose and semantics are rather different here
1919

2020
# TODO: parse me!!!
21-
# TODO: add missing URLs and descriptions
22-
2321

2422
class JavaJarHandler(models.NonAssemblableDatafileHandler):
2523
datasource_id = 'java_jar'
2624
# NOTE: there are a few rare cases where a .zip can be a JAR.
2725
path_patterns = ('*.jar',)
26+
default_package_type = 'jar'
2827
filetypes = ('zip archive', 'java archive',)
2928
description = 'JAR Java Archive'
3029
documentation_url = 'https://en.wikipedia.org/wiki/JAR_(file_format)'
@@ -270,8 +269,9 @@ class AppleDmgHandler(models.NonAssemblableDatafileHandler):
270269
datasource_id = 'apple_dmg'
271270
default_package_type = 'dmg'
272271
path_patterns = ('*.dmg', '*.sparseimage',)
273-
description = ''
274-
documentation_url = ''
272+
description = 'macOS disk image file'
273+
# See also https://en.wikipedia.org/wiki/Sparse_image
274+
documentation_url = 'https://en.wikipedia.org/wiki/Apple_Disk_Image'
275275

276276

277277
class IsoImageHandler(models.NonAssemblableDatafileHandler):

src/packagedcode/readme.py

+3
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737

3838
'licence': 'extracted_license_statement',
3939
'license': 'extracted_license_statement',
40+
# This also has License File sometimes
4041
}
4142

4243

@@ -50,6 +51,8 @@ class ReadmeHandler(models.NonAssemblableDatafileHandler):
5051
'*/README.google',
5152
'*/README.thirdparty',
5253
)
54+
description = ''
55+
documentation_url = ''
5356

5457
@classmethod
5558
def parse(cls, location):

src/summarycode/todo.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,6 @@ def get_package_identifier(package_data, file_path):
213213

214214

215215
def get_unknown_purl(package_type):
216-
if not package_type:
217-
package_type = "unknown"
218216
purl = PackageURL(type=package_type, name="unknown")
219217
return purl.to_string()
220218

@@ -337,7 +335,7 @@ class ReviewComments(Enum):
337335
"been matched to rules having unknown as their license key, and these "
338336
"needs to be reviewed."
339337
)
340-
MATCH_FRAGMENTS = (
338+
LOW_QUALITY_MATCH_FRAGMENTS = (
341339
"Fragments of license text were detected which are not proper license detections "
342340
"and likely has misleading license expression, but this has some clues about licenses, "
343341
"which needs review."
@@ -391,8 +389,8 @@ def get_review_comments(detection_log):
391389
if LicenseDetectionCategory.UNKNOWN_MATCH.value in detection_log:
392390
review_comments[LicenseDetectionCategory.UNKNOWN_MATCH.value] = ReviewComments.UNKNOWN_MATCH.value
393391

394-
if LicenseDetectionCategory.MATCH_FRAGMENTS.value in detection_log:
395-
review_comments[LicenseDetectionCategory.MATCH_FRAGMENTS.value] = ReviewComments.MATCH_FRAGMENTS.value
392+
if LicenseDetectionCategory.LOW_QUALITY_MATCH_FRAGMENTS.value in detection_log:
393+
review_comments[LicenseDetectionCategory.LOW_QUALITY_MATCH_FRAGMENTS.value] = ReviewComments.LOW_QUALITY_MATCH_FRAGMENTS.value
396394

397395
if LicenseDetectionCategory.LOW_RELEVANCE.value in detection_log:
398396
review_comments[LicenseDetectionCategory.LOW_RELEVANCE.value] = ReviewComments.LOW_RELEVANCE.value

tests/packagedcode/data/plugin/help.txt

+11-11
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,4 @@
11
--------------------------------------------
2-
Package type: None
3-
datasource_id: java_jar
4-
documentation URL: https://en.wikipedia.org/wiki/JAR_(file_format)
5-
primary language: None
6-
description: JAR Java Archive
7-
path_patterns: '*.jar'
8-
--------------------------------------------
92
Package type: about
103
datasource_id: about_file
114
documentation URL: https://aboutcode-toolkit.readthedocs.io/en/latest/specification.html
@@ -323,9 +316,9 @@ Package type: deb
323316
--------------------------------------------
324317
Package type: dmg
325318
datasource_id: apple_dmg
326-
documentation URL:
319+
documentation URL: https://en.wikipedia.org/wiki/Apple_Disk_Image
327320
primary language: None
328-
description:
321+
description: macOS disk image file
329322
path_patterns: '*.dmg', '*.sparseimage'
330323
--------------------------------------------
331324
Package type: ear
@@ -468,6 +461,13 @@ Package type: ivy
468461
description: Ant IVY dependency file
469462
path_patterns: '*/ivy.xml'
470463
--------------------------------------------
464+
Package type: jar
465+
datasource_id: java_jar
466+
documentation URL: https://en.wikipedia.org/wiki/JAR_(file_format)
467+
primary language: None
468+
description: JAR Java Archive
469+
path_patterns: '*.jar'
470+
--------------------------------------------
471471
Package type: jar
472472
datasource_id: java_jar_manifest
473473
documentation URL: https://docs.oracle.com/javase/tutorial/deployment/jar/manifestindex.html
@@ -715,9 +715,9 @@ Package type: pypi
715715
--------------------------------------------
716716
Package type: readme
717717
datasource_id: readme
718-
documentation URL: None
718+
documentation URL:
719719
primary language: None
720-
description: None
720+
description:
721721
path_patterns: '*/README.android', '*/README.chromium', '*/README.facebook', '*/README.google', '*/README.thirdparty'
722722
--------------------------------------------
723723
Package type: rpm

tests/packagedcode/test_package_models.py

+10
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,16 @@ def test_package_data_datasource_id_are_unique(self):
153153
), f'Duplicated datasource_id: {pdh!r} with {seen[pdhid]!r}'
154154
seen[pdh.datasource_id] = pdh
155155

156+
def test_package_data_handlers_have_package_type(self):
157+
"""
158+
Check that we do not have two DataFileHandlers with the same
159+
datasource_id and that all have one.
160+
"""
161+
for pdh in ALL_DATAFILE_HANDLERS:
162+
pdh_type = pdh.default_package_type
163+
assert pdh_type
164+
165+
156166
def test_package_data_file_patterns_are_tuples(self):
157167
"""
158168
Check that all file patterns are tuples, as if they are

tests/summarycode/data/todo/todo_present/README.multi-orig-tarball-package-expected-diag.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
}
2525
],
2626
"detection_log": [
27-
"license-clues"
27+
"low-quality-matches"
2828
],
2929
"identifier": "borceux-3c39742c-edef-82b7-0cdd-fc4d9ff8b044"
3030
}

0 commit comments

Comments
 (0)