Skip to content

Commit cb4ac9e

Browse files
Modify LicenseMatch data in results
LicenseMatch data now is based on a license-expression instead of a license key. Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent d70acde commit cb4ac9e

File tree

5 files changed

+568
-186
lines changed

5 files changed

+568
-186
lines changed

src/licensedcode/detection.py

Lines changed: 50 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ def to_dict(
362362
data_matches = []
363363

364364
for match in matches:
365-
data_matches.extend(
365+
data_matches.append(
366366
licenses_data_from_match(
367367
match=match,
368368
include_text=include_text,
@@ -400,60 +400,67 @@ def licenses_data_from_match(
400400
SCANCODE_LICENSE_TEXT_URL = SCANCODE_BASE_URL + '/{}.LICENSE'
401401
SCANCODE_LICENSE_DATA_URL = SCANCODE_BASE_URL + '/{}.yml'
402402

403-
detected_licenses = []
403+
result = {}
404+
405+
# Detection Level Information
406+
result['score'] = match.score()
407+
result['start_line'] = match.start_line
408+
result['end_line'] = match.end_line
409+
result['matched_length'] = match.len()
410+
result['match_coverage'] = match.coverage()
411+
result['matcher'] = match.matcher
412+
413+
# LicenseDB Level Information (Rule that was matched)
414+
result['license_expression'] = match.rule.license_expression
415+
result['licensedb_identifier'] = match.rule.identifier
416+
result['referenced_filenames'] = match.rule.referenced_filenames
417+
result['is_license_text'] = match.rule.is_license_text
418+
result['is_license_notice'] = match.rule.is_license_notice
419+
result['is_license_reference'] = match.rule.is_license_reference
420+
result['is_license_tag'] = match.rule.is_license_tag
421+
result['is_license_intro'] = match.rule.is_license_intro
422+
result['rule_length'] = match.rule.length
423+
result['rule_relevance'] = match.rule.relevance
424+
if include_text:
425+
result['matched_text'] = matched_text
426+
427+
# License Level Information (Individual licenses that this rule refers to)
428+
result['licenses'] = detected_licenses = []
404429
for license_key in match.rule.license_keys():
430+
detected_license = {}
431+
detected_licenses.append(detected_license)
432+
405433
lic = licenses.get(license_key)
406-
result = {}
407-
detected_licenses.append(result)
408-
result['key'] = lic.key
409-
result['score'] = match.score()
410-
result['name'] = lic.name
411-
result['short_name'] = lic.short_name
412-
result['category'] = lic.category
413-
result['is_exception'] = lic.is_exception
414-
result['is_unknown'] = lic.is_unknown
415-
result['owner'] = lic.owner
416-
result['homepage_url'] = lic.homepage_url
417-
result['text_url'] = lic.text_urls[0] if lic.text_urls else ''
418-
result['reference_url'] = license_url_template.format(lic.key)
419-
result['scancode_text_url'] = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
420-
result['scancode_data_url'] = SCANCODE_LICENSE_DATA_URL.format(lic.key)
434+
435+
detected_license['key'] = lic.key
436+
detected_license['name'] = lic.name
437+
detected_license['short_name'] = lic.short_name
438+
detected_license['category'] = lic.category
439+
detected_license['is_exception'] = lic.is_exception
440+
detected_license['is_unknown'] = lic.is_unknown
441+
detected_license['owner'] = lic.owner
442+
detected_license['homepage_url'] = lic.homepage_url
443+
detected_license['text_url'] = lic.text_urls[0] if lic.text_urls else ''
444+
detected_license['reference_url'] = license_url_template.format(lic.key)
445+
detected_license['scancode_text_url'] = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
446+
detected_license['scancode_data_url'] = SCANCODE_LICENSE_DATA_URL.format(lic.key)
421447

422448
spdx_key = lic.spdx_license_key
423-
result['spdx_license_key'] = spdx_key
449+
detected_license['spdx_license_key'] = spdx_key
424450

425451
if spdx_key:
426452
is_license_ref = spdx_key.lower().startswith('licenseref-')
427453
if is_license_ref:
428454
spdx_url = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
429455
else:
456+
# TODO: Is this replacing spdx_key???
430457
spdx_key = lic.spdx_license_key.rstrip('+')
431458
spdx_url = SPDX_LICENSE_URL.format(spdx_key)
432459
else:
433460
spdx_url = ''
434-
result['spdx_url'] = spdx_url
435-
result['start_line'] = match.start_line
436-
result['end_line'] = match.end_line
437-
matched_rule = result['matched_rule'] = {}
438-
matched_rule['identifier'] = match.rule.identifier
439-
matched_rule['license_expression'] = match.rule.license_expression
440-
matched_rule['licenses'] = match.rule.license_keys()
441-
matched_rule['referenced_filenames'] = match.rule.referenced_filenames
442-
matched_rule['is_license_text'] = match.rule.is_license_text
443-
matched_rule['is_license_notice'] = match.rule.is_license_notice
444-
matched_rule['is_license_reference'] = match.rule.is_license_reference
445-
matched_rule['is_license_tag'] = match.rule.is_license_tag
446-
matched_rule['is_license_intro'] = match.rule.is_license_intro
447-
matched_rule['has_unknown'] = match.rule.has_unknown
448-
matched_rule['matcher'] = match.matcher
449-
matched_rule['rule_length'] = match.rule.length
450-
matched_rule['matched_length'] = match.len()
451-
matched_rule['match_coverage'] = match.coverage()
452-
matched_rule['rule_relevance'] = match.rule.relevance
453-
# FIXME: for sanity this should always be included?????
454-
if include_text:
455-
result['matched_text'] = matched_text
456-
return detected_licenses
461+
detected_license['spdx_url'] = spdx_url
462+
463+
return result
457464

458465

459466
def is_correct_detection(license_matches):
@@ -622,7 +629,7 @@ def is_license_reference_local_file(license_match):
622629
Return True if `license_match` LicenseMatch dict has a non-empty `referenced_filename`,
623630
i.e. contains a license reference to a local file.
624631
"""
625-
return bool(license_match['matched_rule']['referenced_filenames'])
632+
return bool(license_match['referenced_filenames'])
626633

627634

628635
def filter_license_references(license_matches):
@@ -666,7 +673,7 @@ def get_detected_license_expression(matches, analysis, post_scan=False):
666673

667674
if isinstance(matches[0], dict):
668675
combined_expression = combine_expressions(
669-
expressions=[match['matched_rule']['license_expression'] for match in matches_for_expression]
676+
expressions=[match['license_expression'] for match in matches_for_expression]
670677
)
671678
else:
672679
combined_expression = combine_expressions(

src/licensedcode/plugin_license.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def get_referenced_filenames(license_matches):
287287
"""
288288
unique_filenames = []
289289
for license_match in license_matches:
290-
for filename in license_match['matched_rule']['referenced_filenames']:
290+
for filename in license_match['referenced_filenames']:
291291
if filename not in unique_filenames:
292292
unique_filenames.append(filename)
293293

0 commit comments

Comments
 (0)