Skip to content

Commit c5744d8

Browse files
authored
Merge pull request #2997 from nexB/scio-npm-bug-448
Improve npm package processing
2 parents d2686b0 + c7c31bd commit c5744d8

File tree

14 files changed

+1878
-74
lines changed

14 files changed

+1878
-74
lines changed

src/packagedcode/build.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ def parse(cls, location):
177177
@classmethod
178178
def assign_package_to_resources(cls, package, resource, codebase, skip_name=None):
179179
package_uid = package.package_uid
180+
if not package_uid:
181+
return
180182
parent = resource.parent(codebase)
181183
for res in walk_build(resource=parent, codebase=codebase, skip_name=skip_name):
182184
res.for_packages.append(package_uid)

src/packagedcode/debian.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -397,20 +397,21 @@ def assemble(cls, package_data, resource, codebase):
397397
assemblable_paths = (
398398
f'usr/share/doc/{package_name}/copyright',
399399
)
400-
for res in root_resource.walk(codebase):
401-
if not res.path.endswith(assemblable_paths):
402-
continue
403-
404-
for pkgdt in res.package_data:
405-
package.update(
406-
package_data=pkgdt,
407-
datafile_path=res.path,
408-
)
409-
410-
res.for_packages.append(package_uid)
411-
res.save(codebase)
412-
413-
yield res
400+
if package_uid:
401+
for res in root_resource.walk(codebase):
402+
if not res.path.endswith(assemblable_paths):
403+
continue
404+
405+
for pkgdt in res.package_data:
406+
package.update(
407+
package_data=pkgdt,
408+
datafile_path=res.path,
409+
)
410+
411+
res.for_packages.append(package_uid)
412+
res.save(codebase)
413+
414+
yield res
414415

415416
yield package
416417

src/packagedcode/models.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,7 @@ def assign_package_to_resources(cls, package, resource, codebase):
970970
# NOTE: we do not attach files to the Package level. Instead we
971971
# update `for_packages` of a codebase resource.
972972
package_uid = package.package_uid
973-
if resource:
973+
if resource and package_uid:
974974
resource.for_packages.append(package_uid)
975975
resource.save(codebase)
976976
for res in resource.walk(codebase):
@@ -1030,8 +1030,9 @@ def assemble_from_many(cls, pkgdata_resources, codebase,):
10301030
datafile_path=resource.path,
10311031
)
10321032
package_uid = package.package_uid
1033-
resource.for_packages.append(package_uid)
1034-
resource.save(codebase)
1033+
if package_uid:
1034+
resource.for_packages.append(package_uid)
1035+
resource.save(codebase)
10351036
else:
10361037
# FIXME: What is the package_data is NOT for the same package as package?
10371038
# FIXME: What if the update did not do anything? (it does return True or False)
@@ -1040,8 +1041,9 @@ def assemble_from_many(cls, pkgdata_resources, codebase,):
10401041
package_data=package_data,
10411042
datafile_path=resource.path,
10421043
)
1043-
resource.for_packages.append(package_uid)
1044-
resource.save(codebase)
1044+
if package_uid:
1045+
resource.for_packages.append(package_uid)
1046+
resource.save(codebase)
10451047

10461048
# in all cases yield possible dependencies
10471049
dependent_packages = package_data.dependencies
@@ -1057,9 +1059,10 @@ def assemble_from_many(cls, pkgdata_resources, codebase,):
10571059
yield resource
10581060

10591061
# the whole parent subtree of the base_resource is for this package
1060-
for res in base_resource.walk(codebase):
1061-
res.for_packages.append(package_uid)
1062-
res.save(codebase)
1062+
if package_uid:
1063+
for res in base_resource.walk(codebase):
1064+
res.for_packages.append(package_uid)
1065+
res.save(codebase)
10631066

10641067
if package:
10651068
if not package.license_expression:
@@ -1336,9 +1339,10 @@ def get_packages_files(self, codebase):
13361339
Yield all the Resource of this package found in codebase.
13371340
"""
13381341
package_uid = self.package_uid
1339-
for resource in codebase.walk():
1340-
if package_uid in resource.for_packages:
1341-
yield resource
1342+
if package_uid:
1343+
for resource in codebase.walk():
1344+
if package_uid in resource.for_packages:
1345+
yield resource
13421346

13431347

13441348
@attr.attributes(slots=True)

src/packagedcode/npm.py

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,12 @@ def assemble(cls, package_data, resource, codebase):
9595
root = package_resource.parent(codebase)
9696
if root:
9797
for npm_res in cls.walk_npm(resource=root, codebase=codebase):
98-
if package_uid not in npm_res.for_packages:
98+
if package_uid and package_uid not in npm_res.for_packages:
9999
npm_res.for_packages.append(package_uid)
100100
npm_res.save(codebase)
101101
yield npm_res
102102
elif codebase.has_single_resource:
103-
if package_uid not in package_resource.for_packages:
103+
if package_uid and package_uid not in package_resource.for_packages:
104104
package_resource.for_packages.append(package_uid)
105105
package_resource.save(codebase)
106106
yield package_resource
@@ -119,7 +119,7 @@ def assemble(cls, package_data, resource, codebase):
119119
if lock_file.name in lockfile_names:
120120
yield from yield_dependencies_from_package_resource(lock_file, package_uid)
121121

122-
if package_uid not in lock_file.for_packages:
122+
if package_uid and package_uid not in lock_file.for_packages:
123123
lock_file.for_packages.append(package_uid)
124124
lock_file.save(codebase)
125125
yield lock_file
@@ -226,8 +226,7 @@ def parse(cls, location):
226226

227227
if not package.download_url:
228228
# Only add a synthetic download URL if there is none from the dist mapping.
229-
dnl_url = npm_download_url(package.namespace, package.name, package.version)
230-
package.download_url = dnl_url.strip()
229+
package.download_url = npm_download_url(package.namespace, package.name, package.version)
231230

232231
# licenses are a tad special with many different data structures
233232
lic = package_data.get('license')
@@ -781,12 +780,15 @@ def npm_homepage_url(namespace, name, registry='https://www.npmjs.com/package'):
781780
782781
>>> expected = 'https://yarnpkg.com/en/package/@ang/angular'
783782
>>> assert npm_homepage_url('@ang', 'angular', 'https://yarnpkg.com/en/package') == expected
783+
784+
>>> assert not npm_homepage_url(None, None)
784785
"""
785-
if namespace:
786-
ns_name = f'{namespace}/{name}'
787-
else:
788-
ns_name = name
789-
return f'{registry}/{ns_name}'
786+
if name:
787+
if namespace:
788+
ns_name = f'{namespace}/{name}'
789+
else:
790+
ns_name = name
791+
return f'{registry}/{ns_name}'
790792

791793

792794
def npm_download_url(namespace, name, version, registry='https://registry.npmjs.org'):
@@ -803,13 +805,15 @@ def npm_download_url(namespace, name, version, registry='https://registry.npmjs.
803805
804806
>>> expected = 'https://registry.npmjs.org/angular/-/angular-1.6.6.tgz'
805807
>>> assert npm_download_url(None, 'angular', '1.6.6') == expected
806-
"""
807-
if namespace:
808-
ns_name = f'{namespace}/{name}'
809808
810-
else:
811-
ns_name = name
812-
return f'{registry}/{ns_name}/-/{name}-{version}.tgz'
809+
>>> assert not npm_download_url(None, None, None)
810+
"""
811+
if name and version:
812+
if namespace:
813+
ns_name = f'{namespace}/{name}'
814+
else:
815+
ns_name = name
816+
return f'{registry}/{ns_name}/-/{name}-{version}.tgz'
813817

814818

815819
def npm_api_url(namespace, name, version=None, registry='https://registry.npmjs.org'):
@@ -827,20 +831,25 @@ def npm_api_url(namespace, name, version=None, registry='https://registry.npmjs.
827831
>>> assert result == 'https://registry.yarnpkg.com/@invisionag%2feslint-config-ivx'
828832
829833
>>> assert npm_api_url(None, 'angular', '1.6.6') == 'https://registry.npmjs.org/angular/1.6.6'
834+
835+
>>> assert not npm_api_url(None, None, None)
830836
"""
831-
version = version or ''
832-
if namespace:
833-
# this is a legacy wart: older registries used to always encode this /
834-
# FIXME: do NOT encode and use plain / instead
835-
ns_name = '%2f'.join([namespace, name])
836-
# there is no version-specific URL for scoped packages
837-
version = ''
838-
else:
839-
ns_name = name
837+
if name:
838+
if namespace:
839+
# this is a legacy wart: older registries used to always encode this /
840+
# FIXME: do NOT encode and use plain / instead
841+
ns_name = '%2f'.join([namespace, name])
842+
# there is no version-specific URL for scoped packages
843+
version = ''
844+
else:
845+
ns_name = name
846+
847+
if version:
848+
version = f'/{version}'
849+
else:
850+
version = ''
840851

841-
if version:
842-
version = f'/{version}'
843-
return f'{registry}/{ns_name}{version}'
852+
return f'{registry}/{ns_name}{version}'
844853

845854

846855
def is_scoped_package(name):

src/packagedcode/pypi.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -212,12 +212,12 @@ def assemble(cls, package_data, resource, codebase):
212212
for py_res in cls.walk_pypi(resource=root, codebase=codebase):
213213
if py_res.is_dir:
214214
continue
215-
if package_uid not in py_res.for_packages:
215+
if package_uid and package_uid not in py_res.for_packages:
216216
py_res.for_packages.append(package_uid)
217217
py_res.save(codebase)
218218
yield py_res
219219
elif codebase.has_single_resource:
220-
if package_uid not in package_resource.for_packages:
220+
if package_uid and package_uid not in package_resource.for_packages:
221221
package_resource.for_packages.append(package_uid)
222222
package_resource.save(codebase)
223223

@@ -320,9 +320,10 @@ def assign_package_to_resources(cls, package, resource, codebase):
320320

321321
package_uid = package.package_uid
322322

323-
# save thyself!
324-
resource.for_packages.append(package_uid)
325-
resource.save(codebase)
323+
if package_uid:
324+
# save thyself!
325+
resource.for_packages.append(package_uid)
326+
resource.save(codebase)
326327

327328
# collect actual paths based on the file references
328329
for file_ref in package_data.file_references:
@@ -342,15 +343,16 @@ def assign_package_to_resources(cls, package, resource, codebase):
342343
# TODO:w e should log these kind of things
343344
continue
344345
else:
345-
ref_resource.for_packages.append(package_uid)
346-
ref_resource.save(codebase)
346+
if package_uid:
347+
ref_resource.for_packages.append(package_uid)
348+
ref_resource.save(codebase)
347349
else:
348350
ref_resource = get_resource_for_path(
349351
path=path_ref,
350352
root=site_packages,
351353
codebase=codebase,
352354
)
353-
if ref_resource:
355+
if ref_resource and package_uid:
354356
ref_resource.for_packages.append(package_uid)
355357
ref_resource.save(codebase)
356358

src/packagedcode/rpm.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,11 @@ def assemble(cls, package_data, resource, codebase):
210210
if not res:
211211
missing_file_references.append(ref)
212212
else:
213-
# path is found and processed: remove it, so we can check if we
214-
# found all of them
215-
res.for_packages.append(package_uid)
216-
res.save(codebase)
213+
if package_uid:
214+
# path is found and processed: remove it, so we can check if we
215+
# found all of them
216+
res.for_packages.append(package_uid)
217+
res.save(codebase)
217218

218219
# if we have left over file references, add these to extra data
219220
if missing_file_references:

src/packagedcode/win_reg.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -380,8 +380,9 @@ def get_root_resource(cls, resource, codebase):
380380
@classmethod
381381
def assign_package_to_resources(cls, package, resource, codebase):
382382
package_uid = package.package_uid
383-
resource.for_packages.append(package_uid)
384-
resource.save(codebase)
383+
if package_uid:
384+
resource.for_packages.append(package_uid)
385+
resource.save(codebase)
385386

386387
refs = package.file_references
387388
if not refs:
@@ -406,11 +407,12 @@ def assign_package_to_resources(cls, package, resource, codebase):
406407
if not ref:
407408
continue
408409

409-
# path is found and processed: remove it, so we can check if we
410-
# found all of them
411-
del refs_by_path[res.path]
412-
res.for_packages.append(package_uid)
413-
res.save(codebase)
410+
if package_uid:
411+
# path is found and processed: remove it, so we can check if we
412+
# found all of them
413+
del refs_by_path[res.path]
414+
res.for_packages.append(package_uid)
415+
res.save(codebase)
414416

415417
# if we have left over file references, add these to extra data
416418
if refs_by_path:

0 commit comments

Comments
 (0)