Skip to content

Commit cc945a0

Browse files
Apply LicenseDetection everywhere
Modify code to align to the LicenseDetection model everywhere. Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent d57f390 commit cc945a0

File tree

10 files changed

+121
-50
lines changed

10 files changed

+121
-50
lines changed

src/formattedcode/output_debian.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from formattedcode import FileOptionType
1717
from plugincode.output import output_impl
1818
from plugincode.output import OutputPlugin
19+
from licensedcode.detection import get_matches_from_detections
1920

2021
from scancode import notice
2122

@@ -179,8 +180,8 @@ def get_texts(detected_licenses):
179180

180181
# set of (start line, end line, matched_rule identifier)
181182
seen = set()
182-
for lic in detected_licenses:
183-
key = lic['start_line'], lic['end_line'], lic['matched_rule']['identifier']
183+
for lic in get_matches_from_detections(detected_licenses):
184+
key = lic['start_line'], lic['end_line'], lic['licensedb_identifier']
184185
if key not in seen:
185186
yield lic['matched_text']
186187
seen.add(key)

src/formattedcode/output_html.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from commoncode.cliutils import OUTPUT_GROUP
3030
from plugincode.output import output_impl
3131
from plugincode.output import OutputPlugin
32+
from licensedcode.detection import get_matches_from_detections
3233

3334
"""
3435
Output plugins to write scan results using templates such as HTML.
@@ -146,6 +147,7 @@ def generate_output(results, version, template):
146147
# support adding new scans at all
147148

148149
from licensedcode.cache import get_licenses_db
150+
licenses_db = get_licenses_db()
149151

150152
converted = {}
151153
converted_infos = {}
@@ -170,22 +172,22 @@ def generate_output(results, version, template):
170172
'value': entry['copyright'],
171173
})
172174
if LICENSES in scanned_file:
173-
for entry in scanned_file[LICENSES]:
175+
for match in get_matches_from_detections(scanned_file[LICENSES]):
174176
# make copy
175-
entry = dict(entry)
176-
entry_key = entry['key']
177+
match = dict(match)
178+
license_expression = match['license_expression']
177179
results.append({
178-
'start': entry['start_line'],
179-
'end': entry['end_line'],
180+
'start': match['start_line'],
181+
'end': match['end_line'],
180182
'what': 'license',
181-
'value': entry_key,
183+
'value': license_expression,
182184
})
183185

184186
# FIXME: we should NOT rely on license objects: only use what is in the JSON instead
185-
if entry_key not in licenses:
186-
licenses[entry_key] = entry
187+
if license_expression not in licenses:
188+
licenses[license_expression] = match
187189
# we were modifying the scan data in place ....
188-
entry['object'] = get_licenses_db().get(entry_key)
190+
match['object'] = licenses_db.get(license_expression)
189191
if results:
190192
converted[path] = sorted(results, key=itemgetter('start'))
191193

src/licensedcode/detection.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,19 @@ def get_matches_from_detections(license_detections):
759759
return license_matches
760760

761761

762+
def get_license_keys_from_detections(license_detections):
763+
"""
764+
Return a list of unique license key strings from a list of LicenseDetections.
765+
"""
766+
license_keys = set()
767+
768+
matches = get_matches_from_detections(license_detections)
769+
for match in matches:
770+
licenses = match.get('licenses')
771+
license_keys.update([entry.get('key') for entry in licenses])
772+
return list(license_keys)
773+
774+
762775
def analyze_detection(license_matches):
763776
"""
764777
Analyse a list of LicenseMatch objects, and determine if the license detection
@@ -919,5 +932,8 @@ def detect_licenses(location, min_score, deadline, **kwargs):
919932
**kwargs,
920933
)
921934

935+
if not matches:
936+
return
937+
922938
for group_of_matches in group_matches(matches):
923939
yield LicenseDetection.from_matches(matches=group_of_matches)

src/licensedcode/plugin_license.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@
3131

3232
TRACE = os.environ.get('SCANCODE_DEBUG_PLUGIN_LICENSE', False)
3333

34-
def logger_debug(*args): pass
35-
3634

3735
def logger_debug(*args):
3836
pass

src/licensedcode/plugin_license_policy.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,39 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
from inspect import trace
1011
from os.path import exists
1112
from os.path import isdir
1213

1314
import attr
15+
import os
16+
import logging
1417
import saneyaml
1518

1619
from plugincode.post_scan import PostScanPlugin
1720
from plugincode.post_scan import post_scan_impl
1821
from commoncode.cliutils import PluggableCommandLineOption
1922
from commoncode.cliutils import POST_SCAN_GROUP
23+
from licensedcode.detection import get_license_keys_from_detections
24+
25+
26+
TRACE = os.environ.get('SCANCODE_DEBUG_LICENSE_POLICY', False)
27+
28+
29+
def logger_debug(*args):
30+
pass
31+
32+
33+
logger = logging.getLogger(__name__)
34+
35+
36+
import sys
37+
38+
logging.basicConfig(stream=sys.stdout)
39+
logger.setLevel(logging.DEBUG)
40+
41+
def logger_debug(*args):
42+
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))
2043

2144

2245
@post_scan_impl
@@ -63,7 +86,7 @@ def process_codebase(self, codebase, license_policy, **kwargs):
6386
continue
6487

6588
try:
66-
resource_license_keys = set([entry.get('key') for entry in resource.licenses])
89+
resource_license_keys = get_license_keys_from_detections(resource.licenses)
6790

6891
except AttributeError:
6992
# add license_policy regardless if there is license info or not

src/licensedcode/plugin_license_text.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from plugincode.post_scan import post_scan_impl
1414
from commoncode.cliutils import PluggableCommandLineOption
1515
from commoncode.cliutils import POST_SCAN_GROUP
16+
from licensedcode.detection import get_matches_from_detections
1617

1718
# Set to True to enable debug tracing
1819
TRACE = False
@@ -72,12 +73,13 @@ def process_codebase(self, codebase, is_license_text, **kwargs):
7273
continue
7374
# keep unique texts/line ranges since we repeat this for each matched licenses
7475
license_texts = set()
75-
for lic in resource.licenses:
76+
matches = get_matches_from_detections(resource.licenses)
77+
for match in matches:
7678
license_texts.add(
77-
(lic.get('matched_text'),
78-
lic.get('start_line', 0),
79-
lic.get('end_line', 0),
80-
lic.get('matched_rule', {}).get('match_coverage', 0))
79+
(match.get('matched_text'),
80+
match.get('start_line', 0),
81+
match.get('end_line', 0),
82+
match.get('match_coverage', 0))
8183
)
8284

8385
# use coverage to weight and estimate of the the actual matched length

src/summarycode/score.py

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from plugincode.post_scan import post_scan_impl
1717

1818
from packagedcode.utils import combine_expressions
19+
from licensedcode.detection import get_matches_from_detections
1920

2021
# Tracing flags
2122
TRACE = False
@@ -124,11 +125,12 @@ def compute_license_score(codebase):
124125
"""
125126

126127
scoring_elements = ScoringElements()
127-
declared_licenses = get_field_values_from_codebase_resources(
128+
license_detections = get_field_values_from_codebase_resources(
128129
codebase=codebase,
129130
field_name='licenses',
130131
key_files_only=True,
131132
)
133+
declared_licenses = get_matches_from_detections(license_detections)
132134
declared_license_expressions = get_field_values_from_codebase_resources(
133135
codebase=codebase, field_name='license_expressions', key_files_only=True
134136
)
@@ -140,9 +142,10 @@ def compute_license_score(codebase):
140142
codebase=codebase, field_name='copyrights', key_files_only=True
141143
)
142144

143-
other_licenses = get_field_values_from_codebase_resources(
145+
other_license_detections = get_field_values_from_codebase_resources(
144146
codebase=codebase, field_name='licenses', key_files_only=False
145147
)
148+
other_licenses = get_matches_from_detections(other_license_detections)
146149

147150
scoring_elements.declared_license = bool(declared_licenses)
148151
if scoring_elements.declared_license:
@@ -244,20 +247,19 @@ class LicenseFilter(object):
244247

245248
def is_good_license(detected_license):
246249
"""
247-
Return True if a `detected license` mapping is consider to a high quality
250+
Return True if a `detected license` mapping is considered to be a high quality
248251
conclusive match.
249252
"""
250253
score = detected_license['score']
251-
rule = detected_license['matched_rule']
252-
coverage = rule.get('match_coverage') or 0
253-
relevance = rule.get('rule_relevance') or 0
254+
coverage = detected_license.get('match_coverage') or 0
255+
relevance = detected_license.get('rule_relevance') or 0
254256
match_types = dict(
255257
[
256-
('is_license_text', rule['is_license_text']),
257-
('is_license_notice', rule['is_license_notice']),
258-
('is_license_reference', rule['is_license_reference']),
259-
('is_license_tag', rule['is_license_tag']),
260-
('is_license_intro', rule['is_license_intro']),
258+
('is_license_text', detected_license['is_license_text']),
259+
('is_license_notice', detected_license['is_license_notice']),
260+
('is_license_reference', detected_license['is_license_reference']),
261+
('is_license_tag', detected_license['is_license_tag']),
262+
('is_license_intro', detected_license['is_license_intro']),
261263
]
262264
)
263265
matched = False
@@ -320,15 +322,24 @@ def get_field_values_from_codebase_resources(codebase, field_name, key_files_onl
320322
return values
321323

322324

323-
def get_license_categories(license_infos):
325+
def get_categories_from_match(license_match):
326+
"""
327+
Return a list of license category strings from a single LicenseMatch mapping.
328+
"""
329+
licenses = license_match.get('licenses')
330+
return [license_info.get('category') for license_info in licenses]
331+
332+
333+
def get_license_categories(declared_licenses):
324334
"""
325335
Return a list of license category strings from `license_infos`
326336
"""
327337
license_categories = []
328-
for license_info in license_infos:
329-
category = license_info.get('category', '')
330-
if category not in license_categories:
331-
license_categories.append(category)
338+
339+
for match in declared_licenses:
340+
for category in get_categories_from_match(match):
341+
if category not in license_categories:
342+
license_categories.append(category)
332343
return license_categories
333344

334345

@@ -339,11 +350,10 @@ def check_for_license_texts(declared_licenses):
339350
If so, return True. Otherwise, return False.
340351
"""
341352
for declared_license in declared_licenses:
342-
matched_rule = declared_license.get('matched_rule', {})
343353
if any(
344354
[
345-
matched_rule.get('is_license_text', False),
346-
matched_rule.get('is_license_notice', False),
355+
declared_license.get('is_license_text', False),
356+
declared_license.get('is_license_notice', False),
347357
]
348358
):
349359
return True
@@ -379,9 +389,10 @@ def check_for_conflicting_licenses(other_licenses):
379389
380390
If so, return True. Otherwise, return False.
381391
"""
382-
for license_info in other_licenses:
383-
if license_info.get('category', '') in CONFLICTING_LICENSE_CATEGORIES:
384-
return True
392+
for license_match in other_licenses:
393+
for category in get_categories_from_match(license_match):
394+
if category in CONFLICTING_LICENSE_CATEGORIES:
395+
return True
385396
return False
386397

387398

tests/licensedcode/test_plugin_license_detection.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,10 @@ def test_license_match_referenced_filename():
144144

145145
def test_get_referenced_filenames():
146146
license_matches = [
147-
{'matched_rule': {'referenced_filenames' : ['LICENSE.txt', 'COPYING']}},
148-
{'matched_rule': {'referenced_filenames' : ['COPYING', 'LICENSE.txt']}},
149-
{'matched_rule': {'referenced_filenames' : ['copying']}},
150-
{'matched_rule': {'referenced_filenames' : []}},
147+
{'referenced_filenames' : ['LICENSE.txt', 'COPYING']},
148+
{'referenced_filenames' : ['COPYING', 'LICENSE.txt']},
149+
{'referenced_filenames' : ['copying']},
150+
{'referenced_filenames' : []},
151151
]
152152
expected = ['LICENSE.txt', 'COPYING', 'copying']
153153
assert get_referenced_filenames(license_matches) == expected

tests/licensedcode/test_plugin_license_policy.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,31 @@
1515
from licensedcode.plugin_license_policy import load_license_policy
1616
from scancode.cli_test_utils import load_json_result
1717
from scancode.cli_test_utils import run_scan_click
18-
18+
from scancode.cli_test_utils import check_json_scan
19+
from scancode_config import REGEN_TEST_FIXTURES
1920

2021
class TestLicensePolicy(FileDrivenTesting):
2122

2223
test_data_dir = join(dirname(__file__), 'data')
2324

25+
def test_end_to_end_scan_with_license_policy(self):
26+
test_dir = self.extract_test_tar('plugin_license_policy/policy-codebase.tgz')
27+
policy_file = self.get_test_loc('plugin_license_policy/process_codebase_info_license_valid_policy_file.yml')
28+
result_file = self.get_temp_file('json')
29+
args = [
30+
'--info',
31+
'--license',
32+
'--license-policy',
33+
policy_file,
34+
test_dir,
35+
'--json-pp',
36+
result_file
37+
]
38+
run_scan_click(args)
39+
test_loc = self.get_test_loc('plugin_license_policy/policy-codebase.expected.json')
40+
check_json_scan(test_loc, result_file, regen=REGEN_TEST_FIXTURES)
41+
42+
2443
def test_process_codebase_info_license_duplicate_key_policy_file(self):
2544
test_dir = self.extract_test_tar('plugin_license_policy/policy-codebase.tgz')
2645
policy_file = self.get_test_loc('plugin_license_policy/process_codebase_info_license_duplicate_key_policy_file.yml')

tests/scancode/test_api.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,7 @@ def test_get_license_with_expression(self):
129129
test_file = self.get_test_loc('api/license/apache-1.0.txt')
130130
results = api.get_licenses(test_file)
131131
expected = [
132-
'apache-1.0',
133-
'gpl-2.0 WITH linux-syscall-exception-gpl OR linux-openib'
132+
'apache-1.0 AND (gpl-2.0 WITH linux-syscall-exception-gpl OR linux-openib)'
134133
]
135134
assert results['license_expressions'] == expected
136135

@@ -144,5 +143,5 @@ def test_get_license_returns_correct_lines(self):
144143
test_file = self.get_test_loc('api/license/correct_lines2')
145144
results = api.get_licenses(test_file)
146145
assert results['license_expressions'] == ['mit']
147-
assert results['licenses'][0]['start_line'] == 2
148-
assert results['licenses'][0]['end_line'] == 4
146+
assert results['licenses'][0]['matches'][0]['start_line'] == 2
147+
assert results['licenses'][0]['matches'][0]['end_line'] == 4

0 commit comments

Comments
 (0)