|
| 1 | +# SPDX-License-Identifier: Apache-2.0 |
| 2 | +# |
| 3 | +# http://nexb.com and https://github.com/aboutcode-org/scancode.io |
| 4 | +# The ScanCode.io software is licensed under the Apache License version 2.0. |
| 5 | +# Data generated with ScanCode.io is provided as-is without warranties. |
| 6 | +# ScanCode is a trademark of nexB Inc. |
| 7 | +# |
| 8 | +# You may not use this software except in compliance with the License. |
| 9 | +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 |
| 10 | +# Unless required by applicable law or agreed to in writing, software distributed |
| 11 | +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR |
| 12 | +# CONDITIONS OF ANY KIND, either express or implied. See the License for the |
| 13 | +# specific language governing permissions and limitations under the License. |
| 14 | +# |
| 15 | +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES |
| 16 | +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from |
| 17 | +# ScanCode.io should be considered or used as legal advice. Consult an Attorney |
| 18 | +# for any legal advice. |
| 19 | +# |
| 20 | +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. |
| 21 | +# Visit https://github.com/aboutcode-org/scancode.io for support and download. |
| 22 | +# |
| 23 | + |
| 24 | +import argparse |
| 25 | +import os |
| 26 | +import shutil |
| 27 | +import socket |
| 28 | +import subprocess |
| 29 | +import sys |
| 30 | +import time |
| 31 | +import uuid |
| 32 | +from pathlib import Path |
| 33 | + |
| 34 | +SCANCODE_IMAGE = "ghcr.io/aboutcode-org/scancode.io:latest" |
| 35 | +DB_IMAGE = "postgres:13" |
| 36 | +DB_USER = os.getenv("SCANCODE_DB_USER", "scancode") |
| 37 | +DB_PASS = os.getenv("SCANCODE_DB_PASS", "scancode") |
| 38 | +DB_NAME = "scancode" |
| 39 | +D2D_DIR = Path("d2d") |
| 40 | + |
| 41 | + |
| 42 | +def pull_required_images(docker_bin): |
| 43 | + """Ensure the required Docker images are present.""" |
| 44 | + print("Checking and pulling required Docker images (if missing)...") |
| 45 | + images = [DB_IMAGE, SCANCODE_IMAGE] |
| 46 | + for image in images: |
| 47 | + safe_run([docker_bin, "pull", image], silent=True) |
| 48 | + print("Docker images are ready.") |
| 49 | + |
| 50 | + |
| 51 | +def get_free_port(): |
| 52 | + """Find a free host port for Postgres.""" |
| 53 | + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: |
| 54 | + s.bind(("", 0)) |
| 55 | + return s.getsockname()[1] |
| 56 | + |
| 57 | + |
| 58 | +def safe_run(cmd, capture_output=False, silent=False): |
| 59 | + """Run subprocess command safely with full binary path.""" |
| 60 | + if not silent: |
| 61 | + print(f"Running: {' '.join(cmd)}") |
| 62 | + |
| 63 | + cmd[0] = shutil.which(cmd[0]) or cmd[0] |
| 64 | + |
| 65 | + try: |
| 66 | + return subprocess.run( # NOQA S603 |
| 67 | + cmd, |
| 68 | + check=True, |
| 69 | + text=True, |
| 70 | + capture_output=capture_output, |
| 71 | + ) |
| 72 | + except subprocess.CalledProcessError as e: |
| 73 | + print(f"Command failed: {' '.join(cmd)}") |
| 74 | + print(e.stderr or e.stdout or str(e)) |
| 75 | + sys.exit(1) |
| 76 | + |
| 77 | + |
| 78 | +def wait_for_postgres(container_name, timeout=60): |
| 79 | + """Wait until the Postgres container is ready.""" |
| 80 | + print("Waiting for Postgres to be ready...") |
| 81 | + for _ in range(timeout): |
| 82 | + result = subprocess.run( # NOQA: S603 |
| 83 | + ["docker", "exec", container_name, "pg_isready", "-U", DB_USER], # NOQA: S607 |
| 84 | + stdout=subprocess.DEVNULL, |
| 85 | + stderr=subprocess.DEVNULL, |
| 86 | + ) |
| 87 | + if result.returncode == 0: |
| 88 | + print("Postgres is ready.") |
| 89 | + return |
| 90 | + time.sleep(1) |
| 91 | + raise RuntimeError("Postgres did not become ready in time.") |
| 92 | + |
| 93 | + |
| 94 | +def prepare_d2d_dir(from_file, to_file): |
| 95 | + """Ensure d2d folder exists and contains required files.""" |
| 96 | + D2D_DIR.mkdir(exist_ok=True) |
| 97 | + |
| 98 | + from_dest = D2D_DIR / Path(from_file).name |
| 99 | + to_dest = D2D_DIR / Path(to_file).name |
| 100 | + |
| 101 | + shutil.copy(from_file, from_dest) |
| 102 | + shutil.copy(to_file, to_dest) |
| 103 | + print(f"Files copied to: {D2D_DIR.resolve()}") |
| 104 | + |
| 105 | + return from_dest.name, to_dest.name |
| 106 | + |
| 107 | + |
| 108 | +def main(): |
| 109 | + parser = argparse.ArgumentParser( |
| 110 | + description="Run ScanCode.io pipelines in Docker with isolated Postgres DB " |
| 111 | + "(using ./d2d directory)." |
| 112 | + ) |
| 113 | + parser.add_argument( |
| 114 | + "--input-file", |
| 115 | + action="append", |
| 116 | + required=True, |
| 117 | + help="Format: path/to/file:tag (tag must be 'from' or 'to')", |
| 118 | + ) |
| 119 | + parser.add_argument( |
| 120 | + "--option", |
| 121 | + action="append", |
| 122 | + help="Options for the pipeline, e.g. Python, Java, Javascript", |
| 123 | + ) |
| 124 | + parser.add_argument( |
| 125 | + "--output", |
| 126 | + required=True, |
| 127 | + help="Output file to write the ScanCode results (JSON format)", |
| 128 | + ) |
| 129 | + args = parser.parse_args() |
| 130 | + |
| 131 | + file_map = {} |
| 132 | + for f in args.input_file: |
| 133 | + try: |
| 134 | + path, tag = f.split(":") |
| 135 | + file_map[tag] = os.path.abspath(path) |
| 136 | + except ValueError: |
| 137 | + print(f"Invalid --input-file format: {f}. Use path:tag", file=sys.stderr) |
| 138 | + sys.exit(1) |
| 139 | + |
| 140 | + if "from" not in file_map or "to" not in file_map: |
| 141 | + print("Both :from and :to input files are required.", file=sys.stderr) |
| 142 | + sys.exit(1) |
| 143 | + |
| 144 | + docker_bin = shutil.which("docker") or "docker" |
| 145 | + pull_required_images(docker_bin) |
| 146 | + |
| 147 | + from_name, to_name = prepare_d2d_dir(file_map["from"], file_map["to"]) |
| 148 | + |
| 149 | + db_container_name = f"scancode_db_{uuid.uuid4().hex[:6]}" |
| 150 | + db_port = get_free_port() |
| 151 | + print(f"Using Postgres host port: {db_port}") |
| 152 | + |
| 153 | + project_name = f"scanpipe_{uuid.uuid4().hex[:8]}" |
| 154 | + |
| 155 | + try: |
| 156 | + safe_run( |
| 157 | + [ |
| 158 | + docker_bin, |
| 159 | + "run", |
| 160 | + "-d", |
| 161 | + "--name", |
| 162 | + db_container_name, |
| 163 | + "-e", |
| 164 | + f"POSTGRES_USER={DB_USER}", |
| 165 | + "-e", |
| 166 | + f"POSTGRES_PASSWORD={DB_PASS}", |
| 167 | + "-e", |
| 168 | + f"POSTGRES_DB={DB_NAME}", |
| 169 | + "-p", |
| 170 | + f"{db_port}:5432", |
| 171 | + DB_IMAGE, |
| 172 | + ], |
| 173 | + silent=True, |
| 174 | + ) |
| 175 | + |
| 176 | + wait_for_postgres(db_container_name) |
| 177 | + db_url = ( |
| 178 | + f"postgresql://{DB_USER}:{DB_PASS}@host.docker.internal:{db_port}/{DB_NAME}" |
| 179 | + ) |
| 180 | + |
| 181 | + pipeline_name = "map_deploy_to_develop" |
| 182 | + |
| 183 | + if args.option: |
| 184 | + pipeline_name = f"{pipeline_name}:" |
| 185 | + |
| 186 | + for option in args.option or []: |
| 187 | + pipeline_name += f"{option}," |
| 188 | + |
| 189 | + pipeline_cmd = ( |
| 190 | + f"scanpipe create-project {project_name} " |
| 191 | + f"--input-file /code/{from_name}:from " |
| 192 | + f"--input-file /code/{to_name}:to " |
| 193 | + f"--pipeline {pipeline_name} && " |
| 194 | + f"scanpipe execute --project {project_name}" |
| 195 | + ) |
| 196 | + |
| 197 | + docker_cmd = [ |
| 198 | + docker_bin, |
| 199 | + "run", |
| 200 | + "--rm", |
| 201 | + "-v", |
| 202 | + f"{D2D_DIR.resolve()}:/code", |
| 203 | + "-e", |
| 204 | + f"DATABASE_URL={db_url}", |
| 205 | + "--network", |
| 206 | + "host", |
| 207 | + SCANCODE_IMAGE, |
| 208 | + "sh", |
| 209 | + "-c", |
| 210 | + pipeline_cmd, |
| 211 | + ] |
| 212 | + |
| 213 | + print("Running ScanCode pipeline:") |
| 214 | + result = safe_run(docker_cmd, capture_output=False) |
| 215 | + |
| 216 | + pipeline_cmd = f"scanpipe output --project {project_name} --format json --print" |
| 217 | + |
| 218 | + docker_cmd = [ |
| 219 | + docker_bin, |
| 220 | + "run", |
| 221 | + "--rm", |
| 222 | + "-v", |
| 223 | + f"{D2D_DIR.resolve()}:/code", |
| 224 | + "-e", |
| 225 | + f"DATABASE_URL={db_url}", |
| 226 | + "--network", |
| 227 | + "host", |
| 228 | + SCANCODE_IMAGE, |
| 229 | + "sh", |
| 230 | + "-c", |
| 231 | + pipeline_cmd, |
| 232 | + ] |
| 233 | + |
| 234 | + result = safe_run(docker_cmd, capture_output=True) |
| 235 | + |
| 236 | + with open(args.output, "w") as f: |
| 237 | + f.write(result.stdout) |
| 238 | + |
| 239 | + finally: |
| 240 | + subprocess.run(["docker", "rm", "-f", db_container_name], check=False) # NOQA: S607, S603 |
| 241 | + |
| 242 | + |
| 243 | +if __name__ == "__main__": |
| 244 | + main() |
0 commit comments