Skip to content

Commit 33fecf8

Browse files
Add initial package instance creation
Adds the PackageInstance class and functions to group package manifests and create a package instance out of them. Adds a sample example case for python. Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 7b5cf8c commit 33fecf8

File tree

12 files changed

+934
-22
lines changed

12 files changed

+934
-22
lines changed

src/packagedcode/__init__.py

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -110,28 +110,36 @@
110110
pubspec.PubspecLock
111111
]
112112

113+
PACKAGE_INSTANCE_TYPES = [
114+
pypi.PythonPackageInstance
115+
]
116+
113117
PACKAGE_MANIFESTS_BY_TYPE = {
114-
(
115-
cls.package_manifest_type
116-
if isinstance(cls, models.PackageManifest)
117-
else cls.default_type
118-
): cls
118+
cls.default_type: cls
119119
for cls in PACKAGE_MANIFEST_TYPES
120120
}
121-
# We cannot have two package classes with the same type
122-
if len(PACKAGE_MANIFESTS_BY_TYPE) != len(PACKAGE_MANIFEST_TYPES):
123-
seen_types = {}
124-
for pmt in PACKAGE_MANIFEST_TYPES:
125-
manifest = pmt()
126-
assert manifest.package_manifest_type
127-
seen = seen_types.get(manifest.package_manifest_type)
128-
if seen:
129-
msg = ('Invalid duplicated packagedcode.Package types: '
130-
'"{}:{}" and "{}:{}" have the same type.'
131-
.format(manifest.package_manifest_type, manifest.__name__, seen.package_manifest_type, seen.__name__,))
132-
raise Exception(msg)
133-
else:
134-
seen_types[manifest.package_manifest_type] = manifest
121+
122+
PACKAGE_INSTANCES_BY_TYPE = {
123+
cls.default_type: cls
124+
for cls in PACKAGE_INSTANCE_TYPES
125+
}
126+
127+
def check_package_manifest_classes():
128+
129+
# We cannot have two package classes with the same type
130+
if len(PACKAGE_MANIFESTS_BY_TYPE) != len(PACKAGE_MANIFEST_TYPES):
131+
seen_types = {}
132+
for pmt in PACKAGE_MANIFEST_TYPES:
133+
manifest = pmt()
134+
assert manifest.package_manifest_type
135+
seen = seen_types.get(manifest.package_manifest_type)
136+
if seen:
137+
msg = ('Invalid duplicated packagedcode.Package types: '
138+
'"{}:{}" and "{}:{}" have the same type.'
139+
.format(manifest.package_manifest_type, manifest.__name__, seen.package_manifest_type, seen.__name__,))
140+
raise Exception(msg)
141+
else:
142+
seen_types[manifest.package_manifest_type] = manifest
135143

136144

137145
def get_package_class(scan_data, default=models.Package):

src/packagedcode/models.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -765,6 +765,100 @@ def recognize(cls, location):
765765
raise NotImplementedError
766766

767767

768+
@attr.s()
769+
class PackageInstance:
770+
"""
771+
A package instance mixin as represented by its package manifests, files and data
772+
from its package manifests.
773+
774+
Subclasses must extend a Package subclass for a given ecosystem.
775+
"""
776+
777+
package_uuid = String(
778+
label='Package instance UUID',
779+
help='A unique ID for package instances in a codebase scan.'
780+
'Consists of a pURL and an UUID field as a pURL qualifier.'
781+
)
782+
783+
package_manifest_paths = List(
784+
item_type=String,
785+
label='Package manifest paths',
786+
help='List of package manifest file paths for this package'
787+
)
788+
789+
files = List(
790+
item_type=PackageFile,
791+
label='Provided files',
792+
help='List of files provided by this package.'
793+
)
794+
795+
def populate_instance_from_manifests(self, package_manifests_by_path, uuid):
796+
"""
797+
Create a package instance object from one or multiple package manifests.
798+
"""
799+
for path, package_manifest in package_manifests_by_path.items():
800+
self.package_manifest_paths.append(path)
801+
802+
self.package_manifest_paths = tuple(self.package_manifest_paths)
803+
804+
# ToDo: This field would be pURL + UUID as a qualifier instead
805+
self.package_uuid = str(uuid)
806+
807+
def get_package_files(self, resource, codebase):
808+
"""
809+
Return a list of all the file paths for a package instance.
810+
811+
Sub-classes should override to implement their own package files finding methods.
812+
"""
813+
files = []
814+
815+
parent = resource.parent(codebase)
816+
817+
for resource in parent.walk(codebase):
818+
if resource.is_dir:
819+
continue
820+
821+
files.append(resource.path)
822+
823+
return files
824+
825+
def get_other_manifests_for_instance(self, resource, codebase):
826+
"""
827+
Return a dictionary of other package manifests by their paths for a given package instance.
828+
829+
Sub-classes can override to implement their own package manifest finding methods.
830+
"""
831+
package_manifests_by_path = {}
832+
833+
parent = resource.parent(codebase)
834+
835+
for resource in parent.walk(codebase):
836+
if resource.is_dir:
837+
continue
838+
839+
filename = file_name(resource.location)
840+
file_patterns = self.get_file_patterns(manifests=self.manifests)
841+
if any(fnmatch.fnmatchcase(filename, pattern) for pattern in file_patterns):
842+
if not resource.package_manifests:
843+
continue # Raise Exception(?)
844+
845+
#ToDo: Implement for multiple package manifests per path
846+
package_manifests_by_path[resource.path] = resource.package_manifests[0]
847+
848+
return package_manifests_by_path
849+
850+
def get_file_patterns(self, manifests):
851+
"""
852+
Return a list of all `file_patterns` for all the PackageManifest classes
853+
in `manifests`.
854+
"""
855+
manifest_file_patterns = []
856+
for manifest in manifests:
857+
manifest_file_patterns.extend(manifest.file_patterns)
858+
859+
return manifest_file_patterns
860+
861+
768862
# Package types
769863
# NOTE: this is somewhat redundant with extractcode archive handlers
770864
# yet the purpose and semantics are rather different here

src/packagedcode/plugin_package.py

Lines changed: 105 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010

1111
import attr
1212
import click
13+
import os
14+
import sys
15+
import uuid
1316

1417
from plugincode.scan import ScanPlugin
1518
from plugincode.scan import scan_impl
@@ -19,8 +22,30 @@
1922

2023
from packagedcode import get_package_instance
2124
from packagedcode import PACKAGE_MANIFEST_TYPES
25+
from packagedcode import PACKAGE_INSTANCES_BY_TYPE
2226

2327

28+
TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False)
29+
30+
if TRACE:
31+
32+
use_print = True
33+
34+
if use_print:
35+
printer = print
36+
else:
37+
import logging
38+
39+
logger = logging.getLogger(__name__)
40+
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
41+
logging.basicConfig(stream=sys.stdout)
42+
logger.setLevel(logging.DEBUG)
43+
printer = logger.debug
44+
45+
def logger_debug(*args):
46+
return printer(' '.join(isinstance(a, str) and a or repr(a)
47+
for a in args))
48+
2449
def print_packages(ctx, param, value):
2550
if not value or ctx.resilient_parsing:
2651
return
@@ -51,8 +76,9 @@ class PackageScanner(ScanPlugin):
5176

5277
resource_attributes = {}
5378
codebase_attributes = {}
54-
resource_attributes['package_manifests'] = attr.ib(default=attr.Factory(list), repr=False)
5579
codebase_attributes['packages'] = attr.ib(default=attr.Factory(list), repr=False)
80+
resource_attributes['package_manifests'] = attr.ib(default=attr.Factory(list), repr=False)
81+
#resource_attributes['for_packages'] = attr.ib(default=attr.Factory(list), repr=False)
5682

5783
sort_order = 6
5884

@@ -102,13 +128,90 @@ def create_packages_from_manifests(codebase, **kwargs):
102128
Create package instances from package manifests present in the codebase.
103129
"""
104130
package_manifests = []
131+
package_instances_by_paths = {}
132+
package_instance_by_identifiers = {}
105133

106134
for resource in codebase.walk(topdown=False):
107-
if resource.package_manifests:
135+
if not resource.package_manifests:
136+
continue
137+
138+
# continue if resource.path already in `package_instances_by_paths`
139+
if resource.path in package_instances_by_paths:
140+
continue
141+
142+
if TRACE:
143+
logger_debug(
144+
'create_packages_from_manifests:',
145+
'location:', resource.location,
146+
)
147+
148+
# Currently we assume there is only one PackageManifest
149+
# ToDo: Do this for multiple PackageManifests per resource
150+
manifest = resource.package_manifests[0]
151+
152+
# Check if PackageInstance is implemented
153+
pk_instance_class = PACKAGE_INSTANCES_BY_TYPE.get(manifest["type"])
154+
if not pk_instance_class:
108155
package_manifests.extend(resource.package_manifests)
156+
continue
109157

158+
# create a PackageInstance from the `default_type`
159+
pk_instance = pk_instance_class()
160+
pk_instance_uuid = uuid.uuid4()
161+
package_instance_by_identifiers[pk_instance_uuid] = pk_instance
162+
163+
# use the get_other_manifests_for_instance to get other instances
164+
package_manifests_by_path = pk_instance.get_other_manifests_for_instance(resource, codebase)
165+
package_manifests_by_path[resource.path] = manifest
166+
167+
if TRACE:
168+
logger_debug(
169+
'create_packages_from_manifests:',
170+
'package_manifests_by_path:', package_manifests_by_path,
171+
)
172+
173+
# add `path: Instance` into `package_instances_by_paths` for all manifests
174+
for path in package_manifests_by_path.keys():
175+
print(f"Path: {path}")
176+
package_instances_by_paths[path] = pk_instance
177+
178+
# populate PackageInstance with data from manifests
179+
pk_instance.populate_instance_from_manifests(package_manifests_by_path, uuid=pk_instance_uuid)
180+
181+
# get files for this PackageInstance
182+
pk_instance.files = tuple(pk_instance.get_package_files(resource, codebase))
183+
184+
# add instance uuid to `for_packages` for all manifests (and files ?)
185+
update_files_with_package_instances(package_manifests_by_path, codebase, pk_instance)
186+
187+
if TRACE:
188+
logger_debug(
189+
'create_packages_from_manifests:',
190+
'pk_instance:', pk_instance,
191+
)
192+
193+
# ToDo: replace this with PackageInstance objects once basic implementation is complete
110194
codebase.attributes.packages.extend(package_manifests)
111195

196+
if TRACE:
197+
logger_debug(
198+
'create_packages_from_manifests:',
199+
'package_instances_by_paths:', package_instances_by_paths,
200+
)
201+
202+
# Get unique PackageInstance objects from `package_instances_by_paths`
203+
204+
package_instances = list(package_instance_by_identifiers.values())
205+
codebase.attributes.packages.extend(package_instances)
206+
207+
208+
def update_files_with_package_instances(package_manifests_by_path, codebase, package_instance):
209+
210+
for path in package_manifests_by_path.keys():
211+
# Update `for_packages` attribute for resource at path with
212+
# reference to this package_instance
213+
continue
214+
112215

113216
def set_packages_root(resource, codebase):
114217
"""

src/packagedcode/pypi.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
"""
4949
Detect and collect Python packages information.
5050
"""
51-
# TODO: add support for poetry and setup.cfg
51+
# TODO: add support for poetry and setup.cfg and metadata.json
5252

5353
TRACE = False
5454

@@ -373,6 +373,23 @@ def recognize(cls, location):
373373
yield cls(dependencies=dependent_packages)
374374

375375

376+
@attr.s()
377+
class PythonPackageInstance(PythonPackage, models.PackageInstance):
378+
"""
379+
A Python PackageInstance that is created out of one/multiple python package
380+
manifests.
381+
"""
382+
383+
@property
384+
def manifests(self):
385+
return [
386+
MetadataFile,
387+
RequirementsFile,
388+
PipfileLock,
389+
DependencyFile,
390+
SetupPy
391+
]
392+
376393

377394
def get_attribute(metainfo, name, multiple=False):
378395
"""
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
Copyright 2014 Pallets
2+
3+
Redistribution and use in source and binary forms, with or without
4+
modification, are permitted provided that the following conditions are
5+
met:
6+
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
10+
2. Redistributions in binary form must reproduce the above copyright
11+
notice, this list of conditions and the following disclaimer in the
12+
documentation and/or other materials provided with the distribution.
13+
14+
3. Neither the name of the copyright holder nor the names of its
15+
contributors may be used to endorse or promote products derived from
16+
this software without specific prior written permission.
17+
18+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
21+
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24+
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26+
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
include CHANGES.rst
2+
include tox.ini
3+
include requirements/*.txt
4+
graft artwork
5+
graft docs
6+
prune docs/_build
7+
graft examples
8+
graft tests
9+
include src/click/py.typed
10+
global-exclude *.pyc

0 commit comments

Comments
 (0)