Skip to content

Commit a94d414

Browse files
authored
Merge pull request #2974 from nexB/2972-summary-consider-copyrights
Consider only copyrights in summry #2972
2 parents 2c49c4c + 91a2094 commit a94d414

File tree

820 files changed

+1321
-929
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

820 files changed

+1321
-929
lines changed

src/summarycode/copyright_tallies.py

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -485,49 +485,50 @@ def filter_junk(texts):
485485

486486

487487
COMMON_NAMES = {
488-
'3dfxinteractiveinc.': '3dfx Interactive, Inc.',
488+
'3dfxinteractiveinc.': '3dfx Interactive',
489489

490490
'cern': 'CERN - European Organization for Nuclear Research',
491491

492-
'ciscosystemsinc': 'Cisco Systems, Inc.',
493-
'ciscosystems': 'Cisco Systems, Inc.',
494-
'cisco': 'Cisco Systems, Inc.',
492+
'ciscosystemsinc': 'Cisco Systems',
493+
'ciscosystems': 'Cisco Systems',
494+
'cisco': 'Cisco Systems',
495495

496-
'daisy': 'Daisy Ltd.',
496+
'daisy': 'Daisy',
497+
'daisyltd': 'Daisy',
497498

498-
'fsf': 'Free Software Foundation, Inc.',
499-
'freesoftwarefoundation': 'Free Software Foundation, Inc.',
500-
'freesoftwarefoundationinc': 'Free Software Foundation, Inc.',
501-
'thefreesoftwarefoundation': 'Free Software Foundation, Inc.',
502-
'thefreesoftwarefoundationinc': 'Free Software Foundation, Inc.',
499+
'fsf': 'Free Software Foundation',
500+
'freesoftwarefoundation': 'Free Software Foundation',
501+
'freesoftwarefoundationinc': 'Free Software Foundation',
502+
'thefreesoftwarefoundation': 'Free Software Foundation',
503+
'thefreesoftwarefoundationinc': 'Free Software Foundation',
503504

504-
'hp': 'Hewlett-Packard, Inc.',
505-
'hewlettpackard': 'Hewlett-Packard, Inc.',
506-
'hewlettpackardco': 'Hewlett-Packard, Inc.',
507-
'hpcompany': 'Hewlett-Packard, Inc.',
508-
'hpdevelopmentcompanylp': 'Hewlett-Packard, Inc.',
509-
'hpdevelopmentcompany': 'Hewlett-Packard, Inc.',
510-
'hewlettpackardcompany': 'Hewlett-Packard, Inc.',
505+
'hp': 'Hewlett-Packard',
506+
'hewlettpackard': 'Hewlett-Packard',
507+
'hewlettpackardco': 'Hewlett-Packard',
508+
'hpcompany': 'Hewlett-Packard',
509+
'hpdevelopmentcompanylp': 'Hewlett-Packard',
510+
'hpdevelopmentcompany': 'Hewlett-Packard',
511+
'hewlettpackardcompany': 'Hewlett-Packard',
511512

512-
'theandroidopensourceproject': 'The Android Open Source Project, Inc.',
513-
'androidopensourceproject': 'The Android Open Source Project, Inc.',
513+
'theandroidopensourceproject': 'Android Open Source Project',
514+
'androidopensourceproject': 'Android Open Source Project',
514515

515-
'ibm': 'IBM Corporation',
516+
'ibm': 'IBM',
516517

517-
'redhat': 'Red Hat, Inc.',
518-
'redhatinc': 'Red Hat, Inc.',
518+
'redhat': 'Red Hat',
519+
'redhatinc': 'Red Hat',
519520

520-
'softwareinthepublicinterest': 'Software in the Public Interest, Inc.',
521-
'spiinc': 'Software in the Public Interest, Inc.',
521+
'softwareinthepublicinterest': 'Software in the Public Interest',
522+
'spiinc': 'Software in the Public Interest',
522523

523-
'suse': 'SuSE, Inc.',
524-
'suseinc': 'SuSE, Inc.',
524+
'suse': 'SuSE',
525+
'suseinc': 'SuSE',
525526

526-
'sunmicrosystems': 'Sun Microsystems, Inc.',
527-
'sunmicrosystemsinc': 'Sun Microsystems, Inc.',
528-
'sunmicro': 'Sun Microsystems, Inc.',
527+
'sunmicrosystems': 'Sun Microsystems',
528+
'sunmicrosystemsinc': 'Sun Microsystems',
529+
'sunmicro': 'Sun Microsystems',
529530

530-
'thaiopensourcesoftwarecenter': 'Thai Open Source Software Center Ltd.',
531+
'thaiopensourcesoftwarecenter': 'Thai Open Source Software Center',
531532

532533
'apachefoundation': 'The Apache Software Foundation',
533534
'apachegroup': 'The Apache Software Foundation',
@@ -540,20 +541,19 @@ def filter_junk(texts):
540541

541542
'regentsoftheuniversityofcalifornia': 'The Regents of the University of California',
542543

543-
# 'mit': 'the Massachusetts Institute of Technology',
544-
545-
'borland': 'Borland Corp.',
544+
'borland': 'Borland',
545+
'borlandcorp': 'Borland',
546546

547547
'microsoft': 'Microsoft',
548548
'microsoftcorp': 'Microsoft',
549549
'microsoftinc': 'Microsoft',
550550
'microsoftcorporation': 'Microsoft',
551551

552-
'google': 'Google Inc.',
553-
'googlellc': 'Google Inc.',
554-
'googleinc': 'Google Inc.',
552+
'google': 'Google',
553+
'googlellc': 'Google',
554+
'googleinc': 'Google',
555555

556-
'intel': 'Intel Corporation',
556+
'intel': 'Intel',
557557
}
558558

559559
# Remove everything except letters and numbers

src/summarycode/summarizer.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from cluecode.copyrights import CopyrightDetector
1919
from packagedcode.utils import combine_expressions
2020
from packagedcode import models
21+
from summarycode.copyright_tallies import canonical_holder
2122
from summarycode.score import compute_license_score
2223
from summarycode.score import get_field_values_from_codebase_resources
2324
from summarycode.score import unique
@@ -167,7 +168,7 @@ def get_declared_holders(codebase, holders_tallies):
167168
codebase, 'holders', key_files_only=True
168169
)
169170
entry_by_key_file_holders = {
170-
fingerprints.generate(entry['holder']): entry
171+
fingerprints.generate(canonical_holder(entry['holder'])): entry
171172
for entry in key_file_holders
172173
if entry['holder']
173174
}
@@ -212,25 +213,24 @@ def get_primary_language(programming_language_tallies):
212213

213214
def get_origin_info_from_top_level_packages(top_level_packages, codebase):
214215
"""
215-
Return a 3-tuple containing the strings of declared license expression,
216-
copyright holder, and primary programming language from a
216+
Return a 3-tuple containing the declared license expression string, a list
217+
of copyright holder, and primary programming language string from a
217218
``top_level_packages`` list of detected top-level packages mapping and a
218219
``codebase``.
219220
"""
220221
if not top_level_packages:
221-
return '', '', ''
222+
return '', [], ''
222223

223224
license_expressions = []
224225
programming_languages = []
225226
copyrights = []
226-
parties = []
227-
228-
for package_mapping in top_level_packages:
229-
package = models.Package.from_dict(package_mapping)
230-
# we are only interested in key packages
231-
if not is_key_package(package, codebase):
232-
continue
233227

228+
top_level_packages = [
229+
models.Package.from_dict(package_mapping)
230+
for package_mapping in top_level_packages
231+
]
232+
key_file_packages = [p for p in top_level_packages if is_key_package(p, codebase)]
233+
for package in key_file_packages:
234234
license_expression = package.license_expression
235235
if license_expression:
236236
license_expressions.append(license_expression)
@@ -243,8 +243,6 @@ def get_origin_info_from_top_level_packages(top_level_packages, codebase):
243243
if copyright_statement:
244244
copyrights.append(copyright_statement)
245245

246-
parties.extend(package.parties or [])
247-
248246
# Combine license expressions
249247
unique_license_expressions = unique(license_expressions)
250248
combined_declared_license_expression = combine_expressions(
@@ -263,9 +261,20 @@ def get_origin_info_from_top_level_packages(top_level_packages, codebase):
263261
declared_holders = []
264262
if holders:
265263
declared_holders = holders
266-
elif parties:
267-
declared_holders = [party.name for party in parties or []]
268-
264+
else:
265+
# If the package data does not contain an explicit copyright, check the
266+
# key files where the package data was detected from and see if there
267+
# are any holder detections that can be used.
268+
for package in key_file_packages:
269+
for datafile_path in package.datafile_paths:
270+
key_file_resource = codebase.get_resource(path=datafile_path)
271+
if not key_file_resource:
272+
continue
273+
holders = [h['holder'] for h in key_file_resource.holders]
274+
declared_holders.extend(holders)
275+
# Normalize holder names before collecting them
276+
# This allows us to properly remove declared holders from `other_holders` later
277+
declared_holders = [canonical_holder(h) for h in declared_holders]
269278
declared_holders = unique(declared_holders)
270279

271280
# Programming language

tests/cluecode/data/copyrights/COPYING_gpl-COPYING_gpl.gpl.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ holders:
99
- Free Software Foundation, Inc.
1010
- the Free Software Foundation
1111
holders_summary:
12-
- value: Free Software Foundation, Inc.
12+
- value: Free Software Foundation
1313
count: 2

tests/cluecode/data/copyrights/afferogplv1-AfferoGPLv.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,5 @@ holders:
1313
holders_summary:
1414
- value: Affero Inc.
1515
count: 2
16-
- value: Free Software Foundation, Inc.
16+
- value: Free Software Foundation
1717
count: 1

tests/cluecode/data/copyrights/afferogplv3-AfferoGPLv.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@ copyrights:
77
holders:
88
- Free Software Foundation, Inc.
99
holders_summary:
10-
- value: Free Software Foundation, Inc.
10+
- value: Free Software Foundation
1111
count: 1

tests/cluecode/data/copyrights/android_c-c.c.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ holders:
99
- The Android Open Source Project
1010
- Colin Percival
1111
holders_summary:
12-
- value: Colin Percival
12+
- value: Android Open Source Project
1313
count: 1
14-
- value: The Android Open Source Project, Inc.
14+
- value: Colin Percival
1515
count: 1

tests/cluecode/data/copyrights/apache2_debian_trailing_name_missed-apache.label.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ holders_summary:
7575
count: 2
7676
- value: Board of Trustees of the University of Illinois
7777
count: 1
78-
- value: Cisco Systems, Inc.
78+
- value: Cisco Systems
7979
count: 1
8080
- value: Eric Haines
8181
count: 1

tests/cluecode/data/copyrights/bigelow_holmes-Bigelow&Holmes.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@ holders:
1111
holders_summary:
1212
- value: Bigelow & Holmes
1313
count: 1
14-
- value: Sun Microsystems, Inc.
14+
- value: Sun Microsystems
1515
count: 1

tests/cluecode/data/copyrights/colin_android-bsdiff_c.c.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ holders:
99
- The Android Open Source Project
1010
- Colin Percival
1111
holders_summary:
12-
- value: Colin Percival
12+
- value: Android Open Source Project
1313
count: 1
14-
- value: The Android Open Source Project, Inc.
14+
- value: Colin Percival
1515
count: 1

tests/cluecode/data/copyrights/complex_notice-NOTICE.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ holders:
5858
holders_summary:
5959
- value: David Schultz
6060
count: 9
61-
- value: Sun Microsystems, Inc.
61+
- value: Sun Microsystems
6262
count: 4
6363
- value: Mike Barcroft
6464
count: 2

0 commit comments

Comments
 (0)