Skip to content

Commit

Permalink
fix(license): stop spliting a long license text (#7336)
Browse files Browse the repository at this point in the history
Signed-off-by: knqyf263 <knqyf263@gmail.com>
Co-authored-by: knqyf263 <knqyf263@gmail.com>
  • Loading branch information
afdesk and knqyf263 authored Sep 5, 2024
1 parent 7a1e8b8 commit 4926da7
Show file tree
Hide file tree
Showing 11 changed files with 192 additions and 72 deletions.
2 changes: 1 addition & 1 deletion pkg/dependency/parser/python/packaging/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func (p *Parser) Parse(r xio.ReadSeekerAt) ([]ftypes.Package, []ftypes.Dependenc
}

if license == "" && h.Get("License-File") != "" {
license = "file://" + h.Get("License-File")
license = licensing.LicenseFilePrefix + h.Get("License-File")
}

return []ftypes.Package{
Expand Down
4 changes: 2 additions & 2 deletions pkg/fanal/analyzer/language/python/packaging/packaging.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,11 @@ func (a packagingAnalyzer) fillAdditionalData(fsys fs.FS, app *types.Application
// Parser adds `file://` prefix to filepath from `License-File` field
// We need to read this file to find licenses
// Otherwise, this is the name of the license
if !strings.HasPrefix(lic, "file://") {
if !strings.HasPrefix(lic, licensing.LicenseFilePrefix) {
licenses = append(licenses, lic)
continue
}
licenseFilePath := path.Base(strings.TrimPrefix(lic, "file://"))
licenseFilePath := path.Base(strings.TrimPrefix(lic, licensing.LicenseFilePrefix))

findings, err := classifyLicense(app.FilePath, licenseFilePath, a.licenseClassifierConfidenceLevel, fsys)
if err != nil {
Expand Down
45 changes: 45 additions & 0 deletions pkg/licensing/normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ var mapping = map[string]string{
"PUBLIC DOMAIN": Unlicense,
}

const (
LicenseTextPrefix = "text://"
LicenseFilePrefix = "file://"
CustomLicensePrefix = "CUSTOM License"
)

// pythonLicenseExceptions contains licenses that we cannot separate correctly using our logic.
// first word after separator (or/and) => license name
var pythonLicenseExceptions = map[string]string{
Expand All @@ -179,6 +185,39 @@ var pythonLicenseExceptions = map[string]string{

var licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+(?:or|and)[_ ]+)|(,[ ]*)")

// Typical keywords for license texts
var licenseTextKeywords = []string{
"http://",
"https://",
"(c)",
"as-is",
";",
"hereby",
"permission to use",
"permission is",
"use in source",
"use, copy, modify",
"using",
}

func isLicenseText(str string) bool {
for _, keyword := range licenseTextKeywords {
if strings.Contains(str, keyword) {
return true
}
}
return false
}

func TrimLicenseText(text string) string {
s := strings.Split(text, " ")
n := len(s)
if n > 3 {
n = 3
}
return strings.Join(s[:n], " ") + "..."
}

func Normalize(name string) string {
name = strings.TrimSpace(name)
if l, ok := mapping[strings.ToUpper(name)]; ok {
Expand All @@ -191,6 +230,12 @@ func SplitLicenses(str string) []string {
if str == "" {
return nil
}
if isLicenseText(strings.ToLower(str)) {
return []string{
LicenseTextPrefix + str,
}
}

var licenses []string
for _, maybeLic := range licenseSplitRegexp.Split(str, -1) {
lower := strings.ToLower(maybeLic)
Expand Down
7 changes: 7 additions & 0 deletions pkg/licensing/normalize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@ func TestSplitLicenses(t *testing.T) {
"Historical Permission Notice and Disclaimer (HPND)",
},
},
{
name: "License text",
license: "* Permission to use this software in any way is granted without",
licenses: []string{
"text://* Permission to use this software in any way is granted without",
},
},
}

for _, tt := range tests {
Expand Down
2 changes: 2 additions & 0 deletions pkg/rpc/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ func ConvertFromRPCDetectedLicenses(rpcLicenses []*common.DetectedLicense) []typ
PkgName: l.PkgName,
FilePath: l.FilePath,
Name: l.Name,
Text: l.Text,
Confidence: float64(l.Confidence),
Link: l.Link,
})
Expand Down Expand Up @@ -983,6 +984,7 @@ func ConvertToRPCLicenses(licenses []types.DetectedLicense) []*common.DetectedLi
PkgName: l.PkgName,
FilePath: l.FilePath,
Name: l.Name,
Text: l.Text,
Confidence: float32(l.Confidence),
Link: l.Link,
})
Expand Down
4 changes: 4 additions & 0 deletions pkg/rpc/convert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,7 @@ func TestConvertFromRPCLicenses(t *testing.T) {
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
Text: "text://* Permission to use this software in any way is granted without",
Confidence: 1,
Link: "https://some-link",
},
Expand All @@ -771,6 +772,7 @@ func TestConvertFromRPCLicenses(t *testing.T) {
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
Text: "text://* Permission to use this software in any way is granted without",
Confidence: 1,
Link: "https://some-link",
},
Expand Down Expand Up @@ -806,6 +808,7 @@ func TestConvertToRPCLicenses(t *testing.T) {
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
Text: "text://* Permission to use this software in any way is granted without",
Confidence: 1,
Link: "https://some-link",
},
Expand All @@ -817,6 +820,7 @@ func TestConvertToRPCLicenses(t *testing.T) {
PkgName: "alpine-baselayout",
FilePath: "some-path",
Name: "GPL-2.0",
Text: "text://* Permission to use this software in any way is granted without",
Confidence: 1,
Link: "https://some-link",
},
Expand Down
48 changes: 29 additions & 19 deletions pkg/scanner/local/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,14 +261,7 @@ func (s Scanner) scanLicenses(target types.ScanTarget, options types.ScanOptions
var osPkgLicenses []types.DetectedLicense
for _, pkg := range target.Packages {
for _, license := range pkg.Licenses {
category, severity := scanner.Scan(license)
osPkgLicenses = append(osPkgLicenses, types.DetectedLicense{
Severity: severity,
Category: category,
PkgName: pkg.Name,
Name: license,
Confidence: 1.0,
})
osPkgLicenses = append(osPkgLicenses, toDetectedLicense(scanner, license, pkg.Name, ""))
}
}
results = append(results, types.Result{
Expand All @@ -282,17 +275,11 @@ func (s Scanner) scanLicenses(target types.ScanTarget, options types.ScanOptions
var langLicenses []types.DetectedLicense
for _, lib := range app.Packages {
for _, license := range lib.Licenses {
category, severity := scanner.Scan(license)
langLicenses = append(langLicenses, types.DetectedLicense{
Severity: severity,
Category: category,
PkgName: lib.Name,
Name: license,
// Lock files use app.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L245-L246
// Applications use lib.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L93-L94
FilePath: lo.Ternary(lib.FilePath != "", lib.FilePath, app.FilePath),
Confidence: 1.0,
})
// Lock files use app.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L245-L246
// Applications use lib.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L93-L94
filePath := lo.Ternary(lib.FilePath != "", lib.FilePath, app.FilePath)

langLicenses = append(langLicenses, toDetectedLicense(scanner, license, lib.Name, filePath))
}
}

Expand Down Expand Up @@ -390,6 +377,29 @@ func toDetectedMisconfiguration(res ftypes.MisconfResult, defaultSeverity dbType
}
}

func toDetectedLicense(scanner licensing.Scanner, license, pkgName, filePath string) types.DetectedLicense {
var category ftypes.LicenseCategory
var severity, licenseText string
if strings.HasPrefix(license, licensing.LicenseTextPrefix) { // License text
licenseText = strings.TrimPrefix(license, licensing.LicenseTextPrefix)
category = ftypes.CategoryUnknown
severity = dbTypes.SeverityUnknown.String()
license = licensing.CustomLicensePrefix + ": " + licensing.TrimLicenseText(licenseText)
} else { // License name
category, severity = scanner.Scan(license)
}

return types.DetectedLicense{
Severity: severity,
Category: category,
PkgName: pkgName,
FilePath: filePath,
Name: license,
Text: licenseText,
Confidence: 1.0,
}
}

func ShouldScanMisconfigOrRbac(scanners types.Scanners) bool {
return scanners.AnyEnabled(types.MisconfigScanner, types.RBACScanner)
}
Expand Down
38 changes: 38 additions & 0 deletions pkg/scanner/local/scan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,25 @@ var (
},
Licenses: []string{"MIT"},
}
python39min = ftypes.Package{
Name: "python3.9-minimal",
Version: "3.9.1",
FilePath: "/usr/lib/python/site-packages/python3.9-minimal/METADATA",
Layer: ftypes.Layer{
DiffID: "sha256:0ea33a93585cf1917ba522b2304634c3073654062d5282c1346322967790ef33",
},
Licenses: []string{"text://Redistribution and use in source and binary forms, with or without"},
}
menuinstPkg = ftypes.Package{
Name: "menuinst",
Version: "2.0.2",
FilePath: "opt/conda/lib/python3.11/site-packages/menuinst-2.0.2.dist-info/METADATA",
Layer: ftypes.Layer{
DiffID: "sha256:0ea33a93585cf1917ba522b2304634c3073654062d5282c1346322967790ef33",
},
Licenses: []string{"text://(c) 2016 Continuum Analytics, Inc. / http://continuum.io All Rights Reserved"},
}

laravelPkg = ftypes.Package{
Name: "laravel/framework",
Version: "6.0.0",
Expand Down Expand Up @@ -225,6 +244,7 @@ func TestScanner_Scan(t *testing.T) {
},
Packages: []ftypes.Package{
muslPkg,
python39min,
},
Applications: []ftypes.Application{
{
Expand All @@ -239,6 +259,7 @@ func TestScanner_Scan(t *testing.T) {
FilePath: "",
Packages: []ftypes.Package{
urllib3Pkg,
menuinstPkg,
},
},
},
Expand All @@ -257,6 +278,14 @@ func TestScanner_Scan(t *testing.T) {
Name: "MIT",
Confidence: 1,
},
{
Severity: "UNKNOWN",
Category: "unknown",
PkgName: python39min.Name,
Name: "CUSTOM License: Redistribution and use...",
Text: "Redistribution and use in source and binary forms, with or without",
Confidence: 1,
},
},
},
{
Expand Down Expand Up @@ -286,6 +315,15 @@ func TestScanner_Scan(t *testing.T) {
Name: "MIT",
Confidence: 1,
},
{
Severity: "UNKNOWN",
Category: "unknown",
PkgName: menuinstPkg.Name,
FilePath: "opt/conda/lib/python3.11/site-packages/menuinst-2.0.2.dist-info/METADATA",
Name: "CUSTOM License: (c) 2016 Continuum...",
Text: "(c) 2016 Continuum Analytics, Inc. / http://continuum.io All Rights Reserved",
Confidence: 1,
},
},
},
{
Expand Down
3 changes: 3 additions & 0 deletions pkg/types/license.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ type DetectedLicense struct {
// Name holds a detected license name
Name string

// Text holds a long license text if Trivy detects a license name as a license text
Text string

// Confidence is level of the match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an
// exact match and 0.0 indicating a complete mismatch
Confidence float64
Expand Down
Loading

0 comments on commit 4926da7

Please sign in to comment.