1313import fingerprints
1414from commoncode .cliutils import POST_SCAN_GROUP , PluggableCommandLineOption
1515from plugincode .post_scan import PostScanPlugin , post_scan_impl
16-
16+ from license_expression import Licensing
1717from cluecode .copyrights import CopyrightDetector
18+ from packagedcode .utils import combine_expressions
1819from summarycode .copyright_summary import canonical_holder
1920from summarycode .score import (compute_license_score ,
2021 get_field_values_from_codebase_resources ,
@@ -91,7 +92,7 @@ def process_codebase(self, codebase, summary, **kwargs):
9192 ]
9293
9394 # Determine declared license expression, declared holder, and primary language from Package data
94- declared_license_expression , declared_holder , primary_language = get_origin_info_from_package_data (key_file_package_data , programming_language_summary )
95+ declared_license_expression , declared_holder , primary_language = get_origin_info_from_package_data (key_file_package_data )
9596
9697 if declared_license_expression :
9798 scoring_elements , _ = compute_license_score (codebase )
@@ -340,18 +341,18 @@ def get_declared_holder(codebase, holders_summary):
340341 for entry in holders_summary if entry ['value' ]
341342 }
342343 key_file_holders = get_field_values_from_codebase_resources (codebase , 'holders' , key_files_only = True )
343- key_file_holders = [
344- fingerprints .generate (entry ['holder' ])
345- for entry in key_file_holders
346- ]
347- unique_key_file_holders = unique (key_file_holders )
344+ entry_by_key_file_holders = {
345+ fingerprints .generate (entry ['holder' ]): entry
346+ for entry in key_file_holders if entry ['holder' ]
347+ }
348+ unique_key_file_holders = unique (entry_by_key_file_holders .keys ())
349+ unique_key_file_holders_entries = [entry_by_holders [holder ] for holder in unique_key_file_holders ]
348350
349351 holder_by_counts = defaultdict (list )
350- for holder in unique_key_file_holders :
351- entry = entry_by_holders .get (holder ) or {}
352- count = entry .get ('count' )
352+ for holder_entry in unique_key_file_holders_entries :
353+ count = holder_entry .get ('count' )
353354 if count :
354- holder = entry .get ('value' )
355+ holder = holder_entry .get ('value' )
355356 holder_by_counts [count ].append (holder )
356357
357358 declared_holder = ''
@@ -381,56 +382,64 @@ def get_primary_language(programming_language_summary):
381382 return primary_language
382383
383384
384- def get_origin_info_from_package_data (key_file_package_data , programming_language_summary ):
385+ def get_origin_info_from_package_data (key_file_package_data ):
385386 """
386387 Return a 3-tuple containing the strings of declared license expression,
387388 copyright holder, and primary programming language from a list of detected
388389 package data.
389390 """
390- counts_by_programming_languages = {
391- entry ['value' ]: entry ['count' ]
392- for entry in programming_language_summary
393- }
394- packages_by_primary_languages = {
395- package ['primary_language' ]: package
396- for package in key_file_package_data if package ['primary_language' ]
397- }
398-
399- # We pick the package data to report as the origin information based on the
400- # primary language of the packages
401- # We will use the package whose primary language occurs most often in our codebase
402- highest_count = 0
403- top_package = None
404- for package_primary_language , package in packages_by_primary_languages .items ():
405- count = counts_by_programming_languages .get (package_primary_language ) or 0
406- if count > highest_count :
407- highest_count = count
408- top_package = package
409-
410- if not top_package :
391+ if not key_file_package_data :
411392 return '' , '' , ''
412393
413- package = top_package
394+ if len (key_file_package_data ) > 1 :
395+ license_expressions = []
396+ programming_languages = []
397+ copyrights = []
398+ parties = []
399+ for package_data in key_file_package_data :
400+ license_expression = package_data .get ('license_expression' ) or ''
401+ programming_language = package_data .get ('primary_language' ) or ''
402+ copyright_statement = package_data .get ('copyright' ) or ''
403+ package_parties = package_data .get ('parties' , [])
404+ license_expressions .append (license_expression )
405+ programming_languages .append (programming_language )
406+ copyrights .append (copyright_statement )
407+ parties .extend (package_parties )
408+
409+ # Combine license expressions
410+ unique_license_expressions = unique (license_expressions )
411+ combined_declared_license_expression = combine_expressions (unique_license_expressions )
412+ declared_license_expression = ''
413+ if combined_declared_license_expression :
414+ declared_license_expression = str (Licensing ().parse (combined_declared_license_expression ).simplify ())
415+
416+ # Combine holders
417+ holders = list (get_holders_from_copyright (copyrights ))
418+ declared_holder = ''
419+ if holders :
420+ declared_holder = ', ' .join (holders )
421+ elif parties :
422+ party_members = [party ['name' ] for party in parties ]
423+ declared_holder = ', ' .join (party_members )
424+
425+ # Programming language
426+ unique_programming_languages = unique (programming_languages )
427+ primary_language = ''
428+ if len (unique_programming_languages ) == 1 :
429+ primary_language = unique_programming_languages [0 ]
430+
431+ return declared_license_expression , declared_holder , primary_language
432+
433+ package = key_file_package_data [0 ]
414434 declared_license_expression = package .get ('license_expression' ) or ''
415435 package_primary_language = package .get ('primary_language' ) or ''
416436
417437 # Determine declared holder from Package copyright statement
418438 package_copyright = package .get ('copyright' , '' )
419439 package_holders = []
420440 if package_copyright :
421- numbered_lines = [(0 , package_copyright )]
422-
423- holder_detections = CopyrightDetector ().detect (
424- numbered_lines ,
425- include_copyrights = False ,
426- include_holders = True ,
427- include_authors = False ,
428- )
429-
430- for holder_detection in holder_detections :
431- package_holders .append (holder_detection .holder )
441+ package_holders = list (get_holders_from_copyright (package_copyright ))
432442
433- declared_holder = ''
434443 if package_holders :
435444 declared_holder = ', ' .join (package_holders )
436445 else :
@@ -442,3 +451,29 @@ def get_origin_info_from_package_data(key_file_package_data, programming_languag
442451 declared_holder = ', ' .join (party_members )
443452
444453 return declared_license_expression , declared_holder , package_primary_language
454+
455+
456+ def get_holders_from_copyright (copyright ):
457+ """
458+ Yield holders detected from a `copyright` string or list.
459+ """
460+ numbered_lines = []
461+ if isinstance (copyright , list ):
462+ for i , c in enumerate (copyright ):
463+ numbered_lines .append (
464+ (i , c )
465+ )
466+ else :
467+ numbered_lines .append (
468+ (0 , copyright )
469+ )
470+
471+ holder_detections = CopyrightDetector ().detect (
472+ numbered_lines ,
473+ include_copyrights = False ,
474+ include_holders = True ,
475+ include_authors = False ,
476+ )
477+
478+ for holder_detection in holder_detections :
479+ yield holder_detection .holder
0 commit comments