Skip to content

Commit 3ab67d9

Browse files
committed
Add Readme file
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent 6b5c508 commit 3ab67d9

File tree

2 files changed

+330
-0
lines changed

2 files changed

+330
-0
lines changed

etc/scripts/d2d/README.rst

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
==============================================================================
2+
Run ScanCode.io Pipelines in Docker (D2D Runner)
3+
==============================================================================
4+
5+
This script helps execute **ScanCode.io** pipelines in isolated Docker containers,
6+
using a local PostgreSQL database and a working directory named ``./d2d``.
7+
8+
-------------------------------------------------------------------------------
9+
Prerequisites
10+
-------------------------------------------------------------------------------
11+
12+
1. **Python 3.8+** must be installed
13+
2. **Docker** must be installed and accessible via ``sudo`` or user group
14+
15+
-------------------------------------------------------------------------------
16+
Environment Variables
17+
-------------------------------------------------------------------------------
18+
19+
.. list-table::
20+
:widths: 25 75
21+
:header-rows: 1
22+
23+
* - Variable
24+
- Description
25+
* - ``SCANCODE_DB_PASS``
26+
- Database password (default: ``scancode``)
27+
* - ``SCANCODE_DB_USER``
28+
- Database user (default: ``scancode``)
29+
30+
-------------------------------------------------------------------------------
31+
Usage Example
32+
-------------------------------------------------------------------------------
33+
34+
.. code-block:: bash
35+
36+
sudo su -
37+
python3 etc/scripts/run_d2d_scio.py \
38+
--input-file ./path/from/from-intbitset.tar.gz:from \
39+
--input-file ./path/to/to-intbitset.whl:to \
40+
--option Python \
41+
--output res1.json
42+
43+
-------------------------------------------------------------------------------
44+
Parameters
45+
-------------------------------------------------------------------------------
46+
47+
.. list-table::
48+
:widths: 25 75
49+
:header-rows: 1
50+
51+
* - Parameter
52+
- Description
53+
* - ``--input-file <path:tag>``
54+
- Required twice: one tagged ``:from``, one tagged ``:to``
55+
* - ``--option <name>``
56+
- Optional; e.g., ``Python``, ``Java``, ``Javascript``, ``Scala``, ``Kotlin``
57+
* - ``--output <file.json>``
58+
- Required; JSON output file for results
59+
60+
-------------------------------------------------------------------------------
61+
Internal Steps
62+
-------------------------------------------------------------------------------
63+
64+
1. Creates or uses the ``./d2d`` directory
65+
2. Copies ``from`` and ``to`` files into it
66+
3. Spins up a temporary **Postgres 13** container
67+
4. Waits for database readiness
68+
5. Runs **ScanCode.io** pipeline (``map_deploy_to_develop``)
69+
6. Saves pipeline output to the specified JSON file
70+
7. Cleans up containers automatically
71+
72+
-------------------------------------------------------------------------------
73+
Cleanup
74+
-------------------------------------------------------------------------------
75+
76+
Containers are auto-removed, but you can verify active containers with:
77+
78+
.. code-block:: bash
79+
80+
docker ps -a | grep scancode
81+
82+
If manual cleanup is needed:
83+
84+
.. code-block:: bash
85+
86+
docker rm -f <container_id>

etc/scripts/d2d/run_d2d_scio.py

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
22+
#
23+
24+
import argparse
25+
import os
26+
import shutil
27+
import socket
28+
import subprocess
29+
import sys
30+
import time
31+
import uuid
32+
from pathlib import Path
33+
34+
SCANCODE_IMAGE = "ghcr.io/aboutcode-org/scancode.io:latest"
35+
DB_IMAGE = "postgres:13"
36+
DB_USER = os.getenv("SCANCODE_DB_USER", "scancode")
37+
DB_PASS = os.getenv("SCANCODE_DB_PASS", "scancode")
38+
DB_NAME = "scancode"
39+
D2D_DIR = Path("d2d")
40+
41+
42+
def pull_required_images(docker_bin):
43+
"""Ensure the required Docker images are present."""
44+
print("Checking and pulling required Docker images (if missing)...")
45+
images = [DB_IMAGE, SCANCODE_IMAGE]
46+
for image in images:
47+
safe_run([docker_bin, "pull", image], silent=True)
48+
print("Docker images are ready.")
49+
50+
51+
def get_free_port():
52+
"""Find a free host port for Postgres."""
53+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
54+
s.bind(("", 0))
55+
return s.getsockname()[1]
56+
57+
58+
def safe_run(cmd, capture_output=False, silent=False):
59+
"""Run subprocess command safely with full binary path."""
60+
if not silent:
61+
print(f"Running: {' '.join(cmd)}")
62+
63+
cmd[0] = shutil.which(cmd[0]) or cmd[0]
64+
65+
try:
66+
return subprocess.run( # NOQA S603
67+
cmd,
68+
check=True,
69+
text=True,
70+
capture_output=capture_output,
71+
)
72+
except subprocess.CalledProcessError as e:
73+
print(f"Command failed: {' '.join(cmd)}")
74+
print(e.stderr or e.stdout or str(e))
75+
sys.exit(1)
76+
77+
78+
def wait_for_postgres(container_name, timeout=60):
79+
"""Wait until the Postgres container is ready."""
80+
print("Waiting for Postgres to be ready...")
81+
for _ in range(timeout):
82+
result = subprocess.run( # NOQA: S603
83+
["docker", "exec", container_name, "pg_isready", "-U", DB_USER], # NOQA: S607
84+
stdout=subprocess.DEVNULL,
85+
stderr=subprocess.DEVNULL,
86+
)
87+
if result.returncode == 0:
88+
print("Postgres is ready.")
89+
return
90+
time.sleep(1)
91+
raise RuntimeError("Postgres did not become ready in time.")
92+
93+
94+
def prepare_d2d_dir(from_file, to_file):
95+
"""Ensure d2d folder exists and contains required files."""
96+
D2D_DIR.mkdir(exist_ok=True)
97+
98+
from_dest = D2D_DIR / Path(from_file).name
99+
to_dest = D2D_DIR / Path(to_file).name
100+
101+
shutil.copy(from_file, from_dest)
102+
shutil.copy(to_file, to_dest)
103+
print(f"Files copied to: {D2D_DIR.resolve()}")
104+
105+
return from_dest.name, to_dest.name
106+
107+
108+
def main():
109+
parser = argparse.ArgumentParser(
110+
description="Run ScanCode.io pipelines in Docker with isolated Postgres DB "
111+
"(using ./d2d directory)."
112+
)
113+
parser.add_argument(
114+
"--input-file",
115+
action="append",
116+
required=True,
117+
help="Format: path/to/file:tag (tag must be 'from' or 'to')",
118+
)
119+
parser.add_argument(
120+
"--option",
121+
action="append",
122+
help="Options for the pipeline, e.g. Python, Java, Javascript",
123+
)
124+
parser.add_argument(
125+
"--output",
126+
required=True,
127+
help="Output file to write the ScanCode results (JSON format)",
128+
)
129+
args = parser.parse_args()
130+
131+
file_map = {}
132+
for f in args.input_file:
133+
try:
134+
path, tag = f.split(":")
135+
file_map[tag] = os.path.abspath(path)
136+
except ValueError:
137+
print(f"Invalid --input-file format: {f}. Use path:tag", file=sys.stderr)
138+
sys.exit(1)
139+
140+
if "from" not in file_map or "to" not in file_map:
141+
print("Both :from and :to input files are required.", file=sys.stderr)
142+
sys.exit(1)
143+
144+
docker_bin = shutil.which("docker") or "docker"
145+
pull_required_images(docker_bin)
146+
147+
from_name, to_name = prepare_d2d_dir(file_map["from"], file_map["to"])
148+
149+
db_container_name = f"scancode_db_{uuid.uuid4().hex[:6]}"
150+
db_port = get_free_port()
151+
print(f"Using Postgres host port: {db_port}")
152+
153+
project_name = f"scanpipe_{uuid.uuid4().hex[:8]}"
154+
155+
try:
156+
safe_run(
157+
[
158+
docker_bin,
159+
"run",
160+
"-d",
161+
"--name",
162+
db_container_name,
163+
"-e",
164+
f"POSTGRES_USER={DB_USER}",
165+
"-e",
166+
f"POSTGRES_PASSWORD={DB_PASS}",
167+
"-e",
168+
f"POSTGRES_DB={DB_NAME}",
169+
"-p",
170+
f"{db_port}:5432",
171+
DB_IMAGE,
172+
],
173+
silent=True,
174+
)
175+
176+
wait_for_postgres(db_container_name)
177+
db_url = (
178+
f"postgresql://{DB_USER}:{DB_PASS}@host.docker.internal:{db_port}/{DB_NAME}"
179+
)
180+
181+
pipeline_name = "map_deploy_to_develop"
182+
183+
if args.option:
184+
pipeline_name = f"{pipeline_name}:"
185+
186+
for option in args.option or []:
187+
pipeline_name += f"{option},"
188+
189+
pipeline_cmd = (
190+
f"scanpipe create-project {project_name} "
191+
f"--input-file /code/{from_name}:from "
192+
f"--input-file /code/{to_name}:to "
193+
f"--pipeline {pipeline_name} && "
194+
f"scanpipe execute --project {project_name}"
195+
)
196+
197+
docker_cmd = [
198+
docker_bin,
199+
"run",
200+
"--rm",
201+
"-v",
202+
f"{D2D_DIR.resolve()}:/code",
203+
"-e",
204+
f"DATABASE_URL={db_url}",
205+
"--network",
206+
"host",
207+
SCANCODE_IMAGE,
208+
"sh",
209+
"-c",
210+
pipeline_cmd,
211+
]
212+
213+
print("Running ScanCode pipeline:")
214+
result = safe_run(docker_cmd, capture_output=False)
215+
216+
pipeline_cmd = f"scanpipe output --project {project_name} --format json --print"
217+
218+
docker_cmd = [
219+
docker_bin,
220+
"run",
221+
"--rm",
222+
"-v",
223+
f"{D2D_DIR.resolve()}:/code",
224+
"-e",
225+
f"DATABASE_URL={db_url}",
226+
"--network",
227+
"host",
228+
SCANCODE_IMAGE,
229+
"sh",
230+
"-c",
231+
pipeline_cmd,
232+
]
233+
234+
result = safe_run(docker_cmd, capture_output=True)
235+
236+
with open(args.output, "w") as f:
237+
f.write(result.stdout)
238+
239+
finally:
240+
subprocess.run(["docker", "rm", "-f", db_container_name], check=False) # NOQA: S607, S603
241+
242+
243+
if __name__ == "__main__":
244+
main()

0 commit comments

Comments
 (0)