Skip to content

Commit 9e99951

Browse files
fix: fall back to license from package if not present in versioned package (#109)
* fix: fall back to license from package if not present in versioned package * chore: use normalized licenses from versioned package * fix: retain previous behaviour of using unformatted licenses * test: add test to verify spdx behaviour is retained * docs: update readme with license data info * chore: don't allocate string slice unnecessarily * style: simplify testing abstraction and pass in raw json string * chore: call t.Helper in helper methods, and use require.NoError instead of panicking
1 parent e5607ba commit 9e99951

File tree

7 files changed

+210
-66
lines changed

7 files changed

+210
-66
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ You can also return raw JSON information about a specific repository:
129129
parlay ecosystems repo https://github.com/open-policy-agent/conftest
130130
```
131131

132+
### License data
133+
134+
parlay enriches components and packages with their license information from ecosyste.ms on a best-effort basis. It prefers the license data of the package version at hand; however, it may not always be possible to retrieve the license for a specific version (see [ecosyste.ms issue here](https://github.com/ecosyste-ms/packages/issues/1027) for more info). In this case, parlay will fall back to enriching with the license data of the package's latest release. In rare cases — where the licensing model of a package changed over time — this may result in license data inaccuracies.
135+
132136

133137
## Enriching with Snyk
134138

internal/utils/spdx.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,19 @@ func GetPurlFromSPDXPackage(pkg *spdx_2_3.Package) (*packageurl.PackageURL, erro
3232
return &purl, nil
3333
}
3434

35-
func GetSPDXLicenseExpressionFromEcosystemsLicense(data *packages.VersionWithDependencies) string {
36-
if data == nil || data.Licenses == nil || *data.Licenses == "" {
35+
func GetLicensesFromEcosystemsLicense(pkgVersionData *packages.VersionWithDependencies, pkgData *packages.Package) []string {
36+
if pkgVersionData != nil && pkgVersionData.Licenses != nil && *pkgVersionData.Licenses != "" {
37+
return strings.Split(*pkgVersionData.Licenses, ",")
38+
} else if pkgData != nil && len(pkgData.NormalizedLicenses) > 0 {
39+
return pkgData.NormalizedLicenses
40+
}
41+
return nil
42+
}
43+
44+
func GetLicenseExpressionFromEcosystemsLicense(pkgVersionData *packages.VersionWithDependencies, pkgData *packages.Package) string {
45+
licenses := GetLicensesFromEcosystemsLicense(pkgVersionData, pkgData)
46+
if len(licenses) == 0 {
3747
return ""
3848
}
39-
return fmt.Sprintf("(%s)", strings.Join(strings.Split(*data.Licenses, ","), " OR "))
49+
return fmt.Sprintf("(%s)", strings.Join(licenses, " OR "))
4050
}

internal/utils/spdx_test.go

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,52 @@ import (
1111

1212
func TestGetSPDXLicenseExpressionFromEcosystemsLicense(t *testing.T) {
1313
assert := assert.New(t)
14-
licenses := "GPLv2,MIT"
15-
data := packages.VersionWithDependencies{Licenses: &licenses}
16-
expression := utils.GetSPDXLicenseExpressionFromEcosystemsLicense(&data)
14+
versionedLicenses := "GPLv2,MIT"
15+
pkgVersionData := packages.VersionWithDependencies{Licenses: &versionedLicenses}
16+
latestLicenses := []string{"Apache-2.0"}
17+
pkgData := packages.Package{NormalizedLicenses: latestLicenses}
18+
expression := utils.GetLicenseExpressionFromEcosystemsLicense(&pkgVersionData, &pkgData)
1719
assert.Equal("(GPLv2 OR MIT)", expression)
1820
}
1921

2022
func TestGetSPDXLicenseExpressionFromEcosystemsLicense_NoData(t *testing.T) {
2123
assert := assert.New(t)
22-
expression := utils.GetSPDXLicenseExpressionFromEcosystemsLicense(nil)
24+
expression := utils.GetLicenseExpressionFromEcosystemsLicense(nil, nil)
2325
assert.Equal("", expression)
2426
}
2527

28+
func TestGetSPDXLicenseExpressionFromEcosystemsLicense_NoVersionedData(t *testing.T) {
29+
assert := assert.New(t)
30+
pkgVersionData := packages.VersionWithDependencies{}
31+
latestLicenses := []string{"Apache-2.0"}
32+
pkgData := packages.Package{NormalizedLicenses: latestLicenses}
33+
expression := utils.GetLicenseExpressionFromEcosystemsLicense(&pkgVersionData, &pkgData)
34+
assert.Equal("(Apache-2.0)", expression)
35+
}
36+
37+
func TestGetSPDXLicenseExpressionFromEcosystemsLicense_NoLatestData(t *testing.T) {
38+
assert := assert.New(t)
39+
versionedLicenses := "GPLv2,MIT"
40+
pkgVersionData := packages.VersionWithDependencies{Licenses: &versionedLicenses}
41+
pkgData := packages.Package{}
42+
expression := utils.GetLicenseExpressionFromEcosystemsLicense(&pkgVersionData, &pkgData)
43+
assert.Equal("(GPLv2 OR MIT)", expression)
44+
}
45+
2646
func TestGetSPDXLicenseExpressionFromEcosystemsLicense_NoLicenses(t *testing.T) {
2747
assert := assert.New(t)
28-
data := packages.VersionWithDependencies{}
29-
expression := utils.GetSPDXLicenseExpressionFromEcosystemsLicense(&data)
48+
pkgVersionData := packages.VersionWithDependencies{}
49+
pkgData := packages.Package{}
50+
expression := utils.GetLicenseExpressionFromEcosystemsLicense(&pkgVersionData, &pkgData)
3051
assert.Equal("", expression)
3152
}
3253

3354
func TestGetSPDXLicenseExpressionFromEcosystemsLicense_EmptyLicenses(t *testing.T) {
3455
assert := assert.New(t)
35-
licenses := ""
36-
data := packages.VersionWithDependencies{Licenses: &licenses}
37-
expression := utils.GetSPDXLicenseExpressionFromEcosystemsLicense(&data)
56+
versionedLicenses := ""
57+
pkgVersionData := packages.VersionWithDependencies{Licenses: &versionedLicenses}
58+
latestLicenses := []string{}
59+
pkgData := packages.Package{NormalizedLicenses: latestLicenses}
60+
expression := utils.GetLicenseExpressionFromEcosystemsLicense(&pkgVersionData, &pkgData)
3861
assert.Equal("", expression)
3962
}

lib/ecosystems/enrich_cyclonedx.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import (
3131
)
3232

3333
type cdxPackageEnricher = func(*cdx.Component, *packages.Package)
34-
type cdxPackageVersionEnricher = func(*cdx.Component, *packages.VersionWithDependencies)
34+
type cdxPackageVersionEnricher = func(*cdx.Component, *packages.VersionWithDependencies, *packages.Package)
3535

3636
var cdxPackageEnrichers = []cdxPackageEnricher{
3737
enrichCDXDescription,
@@ -58,8 +58,8 @@ func enrichCDXDescription(comp *cdx.Component, data *packages.Package) {
5858
}
5959
}
6060

61-
func enrichCDXLicense(comp *cdx.Component, data *packages.VersionWithDependencies) {
62-
expression := utils.GetSPDXLicenseExpressionFromEcosystemsLicense(data)
61+
func enrichCDXLicense(comp *cdx.Component, pkgVersionData *packages.VersionWithDependencies, pkgData *packages.Package) {
62+
expression := utils.GetLicenseExpressionFromEcosystemsLicense(pkgVersionData, pkgData)
6363
if expression != "" {
6464
licenses := cdx.LicenseChoice{Expression: expression}
6565
comp.Licenses = &cdx.Licenses{licenses}
@@ -248,7 +248,7 @@ func enrichCDX(bom *cdx.BOM, logger *zerolog.Logger) {
248248
}
249249

250250
for _, enrichFunc := range cdxPackageVersionEnrichers {
251-
enrichFunc(comp, packageVersionResp.JSON200)
251+
enrichFunc(comp, packageVersionResp.JSON200, packageResp.JSON200)
252252
}
253253

254254
}(comps[i])

lib/ecosystems/enrich_cyclonedx_test.go

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,15 +190,54 @@ func TestEnrichLicense(t *testing.T) {
190190
Name: "cyclonedx-go",
191191
Version: "v0.3.0",
192192
}
193-
lic := "BSD-3-Clause"
194-
pack := &packages.VersionWithDependencies{
195-
Licenses: &lic,
193+
versionedLicenses := "BSD-3-Clause"
194+
pkgVersionData := &packages.VersionWithDependencies{Licenses: &versionedLicenses}
195+
latestLicenses := []string{"Apache-2.0"}
196+
pkgData := &packages.Package{NormalizedLicenses: latestLicenses}
197+
198+
enrichCDXLicense(component, pkgVersionData, pkgData)
199+
200+
licenses := *component.Licenses
201+
comp := cdx.LicenseChoice(cdx.LicenseChoice{Expression: "(BSD-3-Clause)"})
202+
assert.Equal(t, 1, len(licenses))
203+
assert.Equal(t, comp, licenses[0])
204+
}
205+
206+
func TestEnrichLicenseNoVersionedLicense(t *testing.T) {
207+
component := &cdx.Component{
208+
Type: cdx.ComponentTypeLibrary,
209+
Name: "cyclonedx-go",
210+
Version: "v0.3.0",
211+
}
212+
versionedLicenses := ""
213+
pkgVersionData := &packages.VersionWithDependencies{Licenses: &versionedLicenses}
214+
latestLicenses := []string{"Apache-2.0"}
215+
pkgData := &packages.Package{NormalizedLicenses: latestLicenses}
216+
217+
enrichCDXLicense(component, pkgVersionData, pkgData)
218+
219+
licenses := *component.Licenses
220+
comp := cdx.LicenseChoice(cdx.LicenseChoice{Expression: "(Apache-2.0)"})
221+
assert.Equal(t, 1, len(licenses))
222+
assert.Equal(t, comp, licenses[0])
223+
}
224+
225+
func TestEnrichLicenseNoLatestLicense(t *testing.T) {
226+
component := &cdx.Component{
227+
Type: cdx.ComponentTypeLibrary,
228+
Name: "cyclonedx-go",
229+
Version: "v0.3.0",
196230
}
231+
versionedLicenses := "BSD-3-Clause"
232+
pkgVersionData := &packages.VersionWithDependencies{Licenses: &versionedLicenses}
233+
latestLicenses := []string{""}
234+
pkgData := &packages.Package{NormalizedLicenses: latestLicenses}
197235

198-
enrichCDXLicense(component, pack)
236+
enrichCDXLicense(component, pkgVersionData, pkgData)
199237

200238
licenses := *component.Licenses
201239
comp := cdx.LicenseChoice(cdx.LicenseChoice{Expression: "(BSD-3-Clause)"})
240+
assert.Equal(t, 1, len(licenses))
202241
assert.Equal(t, comp, licenses[0])
203242
}
204243

lib/ecosystems/enrich_spdx.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package ecosystems
1818

1919
import (
2020
"errors"
21+
"strings"
2122

2223
"github.com/package-url/packageurl-go"
2324
"github.com/rs/zerolog"
@@ -64,7 +65,7 @@ func enrichSPDX(bom *spdx.Document, logger *zerolog.Logger) {
6465
continue
6566
}
6667

67-
enrichSPDXLicense(pkg, pkgVersionData)
68+
enrichSPDXLicense(pkg, pkgVersionData, pkgData)
6869
}
6970
}
7071

@@ -96,10 +97,10 @@ func enrichSPDXSupplier(pkg *v2_3.Package, data *packages.Package) {
9697
}
9798
}
9899

99-
func enrichSPDXLicense(pkg *v2_3.Package, data *packages.VersionWithDependencies) {
100-
expression := utils.GetSPDXLicenseExpressionFromEcosystemsLicense(data)
101-
if expression != "" {
102-
pkg.PackageLicenseConcluded = *data.Licenses
100+
func enrichSPDXLicense(pkg *v2_3.Package, pkgVersionData *packages.VersionWithDependencies, pkgData *packages.Package) {
101+
licenses := utils.GetLicensesFromEcosystemsLicense(pkgVersionData, pkgData)
102+
if len(licenses) > 0 {
103+
pkg.PackageLicenseConcluded = strings.Join(licenses, ",")
103104
}
104105
}
105106

lib/ecosystems/enrich_spdx_test.go

Lines changed: 108 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package ecosystems
1818

1919
import (
2020
"bytes"
21+
"encoding/json"
2122
"net/http"
2223
"testing"
2324

@@ -31,34 +32,49 @@ import (
3132
"github.com/snyk/parlay/lib/sbom"
3233
)
3334

34-
func TestEnrichSBOM_SPDX(t *testing.T) {
35+
func parseJson(t *testing.T, jsonStr string) map[string]any {
36+
t.Helper()
37+
var result map[string]any
38+
require.NoError(t, json.Unmarshal([]byte(jsonStr), &result))
39+
return result
40+
}
41+
42+
func setupHttpmock(t *testing.T, packageVersionsResponse, packageResponse *string) {
43+
t.Helper()
3544
httpmock.Activate()
36-
defer httpmock.DeactivateAndReset()
3745

38-
httpmock.RegisterResponder("GET", `=~^https://packages.ecosyste.ms/api/v1/registries/.*/packages/.*/versions`,
39-
func(r *http.Request) (*http.Response, error) {
40-
return httpmock.NewJsonResponse(200, map[string]interface{}{
41-
// This is the license we expect to see for the specific package version
42-
"licenses": "MIT",
43-
})
44-
},
45-
)
46-
httpmock.RegisterResponder("GET", `=~^https://packages.ecosyste.ms/api/v1/registries`,
47-
func(req *http.Request) (*http.Response, error) {
48-
return httpmock.NewJsonResponse(200, map[string]interface{}{
49-
"description": "description",
50-
"normalized_licenses": []string{
51-
// This license should be ignored as it corresponds to the latest version of the package
52-
"BSD-3-Clause",
53-
},
54-
"homepage": "https://github.com/spdx/tools-golang",
55-
"repo_metadata": map[string]interface{}{
56-
"owner_record": map[string]interface{}{
57-
"name": "Acme Corp",
58-
},
59-
},
46+
if packageVersionsResponse != nil {
47+
httpmock.RegisterResponder("GET", `=~^https://packages.ecosyste.ms/api/v1/registries/.*/packages/.*/versions`,
48+
func(r *http.Request) (*http.Response, error) {
49+
return httpmock.NewJsonResponse(200, parseJson(t, *packageVersionsResponse))
50+
},
51+
)
52+
}
53+
54+
if packageResponse != nil {
55+
httpmock.RegisterResponder("GET", `=~^https://packages.ecosyste.ms/api/v1/registries`,
56+
func(req *http.Request) (*http.Response, error) {
57+
return httpmock.NewJsonResponse(200, parseJson(t, *packageResponse))
6058
})
61-
})
59+
}
60+
}
61+
62+
func TestEnrichSBOM_SPDX(t *testing.T) {
63+
packageVersionResponse := `{
64+
"licenses": "MIT"
65+
}`
66+
packageResponse := `{
67+
"description": "description",
68+
"normalized_licenses": ["BSD-3-Clause"],
69+
"homepage": "https://github.com/spdx/tools-golang",
70+
"repo_metadata": {
71+
"owner_record": {
72+
"name": "Acme Corp"
73+
}
74+
}
75+
}`
76+
setupHttpmock(t, &packageVersionResponse, &packageResponse)
77+
defer httpmock.DeactivateAndReset()
6278

6379
doc, err := sbom.DecodeSBOMDocument([]byte(`{"spdxVersion":"SPDX-2.3","SPDXID":"SPDXRef-DOCUMENT"}`))
6480
require.NoError(t, err)
@@ -100,25 +116,76 @@ func TestEnrichSBOM_SPDX(t *testing.T) {
100116
require.NoError(t, doc.Encode(buf))
101117
}
102118

103-
func TestEnrichSBOM_SPDX_NoSupplierName(t *testing.T) {
104-
httpmock.Activate()
119+
func TestEnrichSBOM_MissingVersionedLicense(t *testing.T) {
120+
packageVersionResponse := `{
121+
"licenses": ""
122+
}`
123+
packageResponse := `{
124+
"description": "description",
125+
"normalized_licenses": ["BSD-3-Clause", "Apache-2.0"],
126+
"homepage": "https://github.com/spdx/tools-golang",
127+
"repo_metadata": {
128+
"owner_record": {
129+
"name": "Acme Corp"
130+
}
131+
}
132+
}`
133+
setupHttpmock(t, &packageVersionResponse, &packageResponse)
105134
defer httpmock.DeactivateAndReset()
106135

107-
httpmock.RegisterResponder("GET", `=~^https://packages.ecosyste.ms/api/v1/registries`,
108-
func(req *http.Request) (*http.Response, error) {
109-
return httpmock.NewJsonResponse(200, map[string]interface{}{
110-
"description": "description",
111-
"normalized_licenses": []string{
112-
"BSD-3-Clause",
113-
},
114-
"homepage": "https://github.com/spdx/tools-golang",
115-
"repo_metadata": map[string]interface{}{
116-
"owner_record": map[string]interface{}{
117-
"name": "",
118-
},
136+
doc, err := sbom.DecodeSBOMDocument([]byte(`{"spdxVersion":"SPDX-2.3","SPDXID":"SPDXRef-DOCUMENT"}`))
137+
require.NoError(t, err)
138+
139+
bom, ok := doc.BOM.(*v2_3.Document)
140+
require.True(t, ok)
141+
142+
bom.Packages = []*v2_3.Package{
143+
{
144+
PackageSPDXIdentifier: "pkg:golang/github.com/spdx/[email protected]",
145+
PackageName: "github.com/spdx/tools-golang",
146+
PackageVersion: "v0.5.2",
147+
PackageExternalReferences: []*v2_3.PackageExternalReference{
148+
{
149+
Category: common.CategoryPackageManager,
150+
RefType: "purl",
151+
Locator: "pkg:golang/github.com/spdx/[email protected]",
119152
},
120-
})
121-
})
153+
},
154+
},
155+
}
156+
logger := zerolog.Nop()
157+
158+
EnrichSBOM(doc, &logger)
159+
160+
pkgs := bom.Packages
161+
162+
assert.Equal(t, "description", pkgs[0].PackageDescription)
163+
assert.Equal(t, "BSD-3-Clause,Apache-2.0", pkgs[0].PackageLicenseConcluded)
164+
assert.Equal(t, "https://github.com/spdx/tools-golang", pkgs[0].PackageHomePage)
165+
assert.Equal(t, "Organization", pkgs[0].PackageSupplier.SupplierType)
166+
assert.Equal(t, "Acme Corp", pkgs[0].PackageSupplier.Supplier)
167+
168+
httpmock.GetTotalCallCount()
169+
calls := httpmock.GetCallCountInfo()
170+
assert.Equal(t, len(pkgs), calls[`GET =~^https://packages.ecosyste.ms/api/v1/registries`])
171+
172+
buf := bytes.NewBuffer(nil)
173+
require.NoError(t, doc.Encode(buf))
174+
}
175+
176+
func TestEnrichSBOM_SPDX_NoSupplierName(t *testing.T) {
177+
packageResponse := `{
178+
"description": "description",
179+
"normalized_licenses": ["BSD-3-Clause"],
180+
"homepage": "https://github.com/spdx/tools-golang",
181+
"repo_metadata": {
182+
"owner_record": {
183+
"name": ""
184+
}
185+
}
186+
}`
187+
setupHttpmock(t, nil, &packageResponse)
188+
defer httpmock.DeactivateAndReset()
122189

123190
doc, err := sbom.DecodeSBOMDocument([]byte(`{"spdxVersion":"SPDX-2.3","SPDXID":"SPDXRef-DOCUMENT"}`))
124191
require.NoError(t, err)

0 commit comments

Comments
 (0)