@@ -104,6 +104,7 @@ class DetectionCategory(Enum):
104104 IMPERFECT_COVERAGE = 'imperfect-match-coverage'
105105 FALSE_POSITVE = 'possible-false-positive'
106106 UNDETECTED_LICENSE = 'undetected-license'
107+ MATCH_FRAGMENTS = 'match_fragments'
107108
108109
109110class DetectionRule (Enum ):
@@ -141,6 +142,9 @@ class FileRegion:
141142 start_line = attr .ib (type = int )
142143 end_line = attr .ib (type = int )
143144
145+ def to_dict (self ):
146+ return attr .asdict (self , dict_factory = dict )
147+
144148
145149@attr .s (slots = True , eq = False , order = False )
146150class LicenseDetection :
@@ -613,6 +617,106 @@ def from_dicts(cls, license_match_mappings):
613617 """
614618 return [LicenseMatchFromResult .from_dict (lmm ) for lmm in license_match_mappings ]
615619
620+ def to_dict (
621+ self ,
622+ include_text = False ,
623+ license_text_diagnostics = False ,
624+ whole_lines = True ,
625+ ):
626+ """
627+ Return a "result" scan data built from a LicenseMatch object.
628+ """
629+ matched_text = None
630+ if include_text :
631+ matched_text = self .matched_text
632+
633+ result = {}
634+
635+ # Detection Level Information
636+ result ['score' ] = self .score ()
637+ result ['start_line' ] = self .start_line
638+ result ['end_line' ] = self .end_line
639+ result ['matched_length' ] = self .len ()
640+ result ['match_coverage' ] = self .coverage ()
641+ result ['matcher' ] = self .matcher
642+
643+ # LicenseDB Level Information (Rule that was matched)
644+ result ['license_expression' ] = self .rule .license_expression
645+ result ['rule_identifier' ] = self .rule .identifier
646+ result ['rule_relevance' ] = self .rule .relevance
647+ result ['rule_url' ] = self .rule .rule_url
648+
649+ if include_text :
650+ result ['matched_text' ] = matched_text
651+ return result
652+
653+
654+ def collect_license_detections (codebase , include_license_clues = True ):
655+ """
656+ Return a list of LicenseDetectionFromResult from a ``codebase``
657+ """
658+ has_packages = hasattr (codebase .root , 'package_data' )
659+ has_licenses = hasattr (codebase .root , 'license_detections' )
660+
661+ all_license_detections = []
662+
663+ for resource in codebase .walk ():
664+
665+ resource_license_detections = []
666+ if has_licenses :
667+ license_detections = getattr (resource , 'license_detections' , []) or []
668+ license_clues = getattr (resource , 'license_clues' , []) or []
669+
670+ if license_detections :
671+ license_detection_objects = detections_from_license_detection_mappings (
672+ license_detection_mappings = license_detections ,
673+ file_path = resource .path ,
674+ )
675+ resource_license_detections .extend (license_detection_objects )
676+
677+ if include_license_clues and license_clues :
678+ license_matches = LicenseMatchFromResult .from_dicts (
679+ license_match_mappings = license_clues ,
680+ )
681+
682+ for group_of_matches in group_matches (license_matches = license_matches ):
683+ detection = LicenseDetection .from_matches (matches = group_of_matches )
684+ detection .file_region = detection .get_file_region (path = resource .path )
685+ resource_license_detections .append (detection )
686+
687+ all_license_detections .extend (
688+ list (process_detections (detections = resource_license_detections ))
689+ )
690+
691+ if TRACE :
692+ logger_debug (
693+ f'before process_detections licenses:' ,
694+ f'resource_license_detections: { resource_license_detections } \n ' ,
695+ f'all_license_detections: { all_license_detections } ' ,
696+ )
697+
698+ if has_packages :
699+ package_data = getattr (resource , 'package_data' , []) or []
700+
701+ package_license_detection_mappings = []
702+ for package in package_data :
703+
704+ if package ["license_detections" ]:
705+ package_license_detection_mappings .extend (package ["license_detections" ])
706+
707+ if package ["other_license_detections" ]:
708+ package_license_detection_mappings .extend (package ["other_license_detections" ])
709+
710+ if package_license_detection_mappings :
711+ package_license_detection_objects = detections_from_license_detection_mappings (
712+ license_detection_mappings = package_license_detection_mappings ,
713+ file_path = resource .path ,
714+ )
715+
716+ all_license_detections .extend (package_license_detection_objects )
717+
718+ return all_license_detections
719+
616720
617721@attr .s
618722class UniqueDetection :
@@ -624,7 +728,7 @@ class UniqueDetection:
624728 detection_count = attr .ib (default = None )
625729 matches = attr .ib (default = attr .Factory (list ))
626730 detection_log = attr .ib (default = attr .Factory (list ))
627- files = attr .ib (factory = list )
731+ file_regions = attr .ib (factory = list )
628732
629733 @classmethod
630734 def get_unique_detections (cls , license_detections ):
@@ -640,17 +744,18 @@ def get_unique_detections(cls, license_detections):
640744 detection .file_region
641745 for detection in all_detections
642746 ]
643-
644747 detection = next (iter (all_detections ))
645- detection_mapping = detection .to_dict ()
748+ if not hasattr (detection , "detection_log" ):
749+ detection .detection_log = []
750+
646751 unique_license_detections .append (
647752 cls (
648- identifier = detection_mapping [ " identifier" ] ,
649- license_expression = detection_mapping [ " license_expression" ] ,
650- detection_log = detection_mapping . get ( " detection_log" , []) or [] ,
651- matches = detection_mapping [ " matches" ] ,
753+ identifier = detection . identifier ,
754+ license_expression = detection . license_expression ,
755+ detection_log = detection . detection_log ,
756+ matches = detection . matches ,
652757 detection_count = len (file_regions ),
653- files = file_regions ,
758+ file_regions = file_regions ,
654759 )
655760 )
656761
@@ -660,7 +765,7 @@ def to_dict(self, license_diagnostics):
660765
661766 def dict_fields (attr , value ):
662767
663- if attr .name == 'files ' :
768+ if attr .name == 'file_regions ' :
664769 return False
665770
666771 if attr .name == 'matches' :
@@ -673,6 +778,15 @@ def dict_fields(attr, value):
673778
674779 return attr .asdict (self , filter = dict_fields )
675780
781+ def get_license_detection_object (self ):
782+ return LicenseDetection (
783+ license_expression = self .license_expression ,
784+ detection_log = self .detection_log ,
785+ matches = self .matches ,
786+ identifier = self .identifier ,
787+ file_region = None ,
788+ )
789+
676790
677791def get_detections_by_id (license_detections ):
678792 """
@@ -1215,6 +1329,35 @@ def get_license_keys_from_detections(license_detections, licensing=Licensing()):
12151329 return list (license_keys )
12161330
12171331
1332+ def get_ambiguous_license_detections_by_type (unique_license_detections ):
1333+ """
1334+ Return a list of ambiguous unique license detections which needs review
1335+ and would be todo items for the reviewer from a list of
1336+ `unique_license_detections`.
1337+ """
1338+
1339+ ambi_license_detections = {}
1340+
1341+ for detection in unique_license_detections :
1342+ if is_undetected_license_matches (license_matches = detection .matches ):
1343+ ambi_license_detections [DetectionCategory .UNDETECTED_LICENSE .value ] = detection
1344+
1345+ elif "unknown" in detection .license_expression :
1346+ if has_unknown_matches (license_matches = detection .matches ):
1347+ ambi_license_detections [DetectionCategory .UNKNOWN_MATCH .value ] = detection
1348+
1349+ elif is_match_coverage_less_than_threshold (
1350+ license_matches = detection .matches ,
1351+ threshold = IMPERFECT_MATCH_COVERAGE_THR ,
1352+ ):
1353+ ambi_license_detections [DetectionCategory .IMPERFECT_COVERAGE .value ] = detection
1354+
1355+ elif has_extra_words (license_matches = detection .matches ):
1356+ ambi_license_detections [DetectionCategory .EXTRA_WORDS .value ] = detection
1357+
1358+ return ambi_license_detections
1359+
1360+
12181361def analyze_detection (license_matches , package_license = False ):
12191362 """
12201363 Analyse a list of LicenseMatch objects, and determine if the license detection
0 commit comments