Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/macaron/config/defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,17 @@ hostname = gitlab.com
# [git_service.local_repo]
# hostname = example.org

[builder]
# Skip detecting build tool configuration files in paths containing the following keywords.
build_tool_path_filters =
test
example
sample
doc
demo
spec
mock

# This is the spec for trusted Maven build tools.
[builder.maven]
entry_conf = settings.xml
Expand Down
1 change: 1 addition & 0 deletions src/macaron/repo_verifier/repo_verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def verify_repo(
version=version,
reported_repo_url=reported_repo_url,
reported_repo_fs=reported_repo_fs,
build_tool=build_tool,
provenance_repo_url=provenance_repo_url,
)

Expand Down
64 changes: 4 additions & 60 deletions src/macaron/repo_verifier/repo_verifier_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,66 +4,14 @@
"""This module contains the base class and core data models for repository verification."""
import abc
import logging
import os
from collections import deque
from dataclasses import dataclass
from enum import Enum
from pathlib import Path

from macaron.slsa_analyzer.build_tool import BaseBuildTool

logger = logging.getLogger(__name__)


def find_file_in_repo(root_dir: Path, filename: str) -> Path | None:
"""Find the highest level file with a given name in a local repository.

This function ignores certain paths that are not under the main source code directories.

Parameters
----------
root_dir : Path
The root directory of the repository.
filename : str
The name of the file to search for.

Returns
-------
Path | None
The path to the file if it exists, otherwise
"""
# TODO: Consider using BaseBuildTool.get_build_dirs.
# + Refactor 'get_build_dirs' to skip certain directories
# that are most likely not part of the main codebase (e.g., sample).
# + Need to find a way to look for other
# files (e.g., gradle.properties) for the purpose of repo verification
# without breaking the current logic of finding build directories.
# + Add the capability to return the content/path of the file.
if not os.path.isdir(root_dir):
return None

queue: deque[Path] = deque()
queue.append(Path(root_dir))
while queue:
current_dir = queue.popleft()

# Don't look through non-main directories.
if any(
keyword in current_dir.name.lower()
for keyword in ["test", "example", "sample", "doc", "demo", "spec", "mock"]
):
continue

if Path(current_dir, filename).exists():
return Path(current_dir, filename)

# Ignore symlinks to prevent potential infinite loop.
sub_dirs = [Path(it) for it in current_dir.iterdir() if it.is_dir() and not it.is_symlink()]
queue.extend(sub_dirs)

return None


class RepositoryVerificationStatus(str, Enum):
"""A class to store the status of the repo verification."""

Expand Down Expand Up @@ -167,18 +115,14 @@ class RepoVerifierToolSpecific(RepoVerifierFromProvenance, abc.ABC):
From-provenance verification is inherited from the parent class.
"""

@property
@abc.abstractmethod
def specific_tool(self) -> BaseBuildTool:
"""Define the build tool used to build the package."""

def __init__(
self,
namespace: str | None,
name: str,
version: str,
reported_repo_url: str,
reported_repo_fs: str,
build_tool: BaseBuildTool,
provenance_repo_url: str | None,
):
"""Instantiate the class.
Expand All @@ -195,12 +139,12 @@ def __init__(
The URL of the repository reported by the publisher.
reported_repo_fs : str
The file system path of the reported repository.
build_tool : BaseBuildTool
The build tool used to build the package.
provenance_repo_url : str | None
The URL of the repository from a provenance file, or None if it, or the provenance, is not present.
"""
super().__init__(
namespace, name, version, reported_repo_url, reported_repo_fs, provenance_repo_url, self.specific_tool
)
super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs, provenance_repo_url, build_tool)

def verify_repo(self) -> RepositoryVerificationResult:
"""Verify the repository as per the base class method."""
Expand Down
57 changes: 39 additions & 18 deletions src/macaron/repo_verifier/repo_verifier_gradle.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@
RepositoryVerificationResult,
RepositoryVerificationStatus,
RepoVerifierToolSpecific,
find_file_in_repo,
)
from macaron.repo_verifier.repo_verifier_maven import RepoVerifierMaven
from macaron.slsa_analyzer.build_tool import Gradle
from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, file_exists
from macaron.slsa_analyzer.package_registry.maven_central_registry import same_organization

logger = logging.getLogger(__name__)
Expand All @@ -22,15 +21,14 @@
class RepoVerifierGradle(RepoVerifierToolSpecific):
"""A class to verify whether a repository with Gradle build tool links back to the artifact."""

specific_tool = Gradle()

def __init__(
self,
namespace: str,
name: str,
version: str,
reported_repo_url: str,
reported_repo_fs: str,
build_tool: BaseBuildTool,
provenance_repo_url: str | None,
):
"""Initialize a RepoVerifierGradle instance.
Expand All @@ -47,17 +45,20 @@ def __init__(
The URL of the repository reported by the publisher.
reported_repo_fs : str
The file system path of the reported repository.
build_tool : BaseBuildTool
The build tool used to build the package.
provenance_repo_url : str | None
The URL of the repository from a provenance file, or None if it, or the provenance, is not present.
"""
super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs, provenance_repo_url)
super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs, build_tool, provenance_repo_url)

self.maven_verifier = RepoVerifierMaven(
namespace=namespace,
name=name,
version=version,
reported_repo_url=reported_repo_url,
reported_repo_fs=reported_repo_fs,
build_tool=build_tool,
provenance_repo_url=provenance_repo_url,
)

Expand All @@ -81,11 +82,11 @@ def verify_by_tool(self) -> RepositoryVerificationResult:
if recognized_services_verification_result.status == RepositoryVerificationStatus.PASSED:
return recognized_services_verification_result

gradle_group_id = self._extract_group_id_from_properties()
gradle_group_id = self.extract_group_id_from_properties()
if not gradle_group_id:
gradle_group_id = self._extract_group_id_from_build_groovy()
gradle_group_id = self.extract_group_id_from_build_groovy()
if not gradle_group_id:
gradle_group_id = self._extract_group_id_from_build_kotlin()
gradle_group_id = self.extract_group_id_from_build_kotlin()
if not gradle_group_id:
logger.debug("Could not find group from gradle manifests for %s", self.reported_repo_url)
return RepositoryVerificationResult(
Expand Down Expand Up @@ -149,17 +150,37 @@ def _extract_group_id_from_gradle_manifest(

return None

def _extract_group_id_from_properties(self) -> str | None:
"""Extract the group id from the gradle.properties file."""
gradle_properties = find_file_in_repo(Path(self.reported_repo_fs), "gradle.properties")
def extract_group_id_from_properties(self) -> str | None:
"""Extract the group id from the gradle.properties file.

Returns
-------
str | None
The extracted group id if found, otherwise None.
"""
gradle_properties = file_exists(
self.reported_repo_fs, "gradle.properties", filters=self.build_tool.path_filters
)
return self._extract_group_id_from_gradle_manifest(gradle_properties)

def _extract_group_id_from_build_groovy(self) -> str | None:
"""Extract the group id from the build.gradle file."""
build_gradle = find_file_in_repo(Path(self.reported_repo_fs), "build.gradle")
return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={"'", '"'}, delimiter=" ")
def extract_group_id_from_build_groovy(self) -> str | None:
"""Extract the group id from the build.gradle file.

def _extract_group_id_from_build_kotlin(self) -> str | None:
"""Extract the group id from the build.gradle.kts file."""
build_gradle = find_file_in_repo(Path(self.reported_repo_fs), "build.gradle.kts")
Returns
-------
str | None
The extracted group id if found, otherwise None.
"""
build_gradle = file_exists(self.reported_repo_fs, "build.gradle", filters=self.build_tool.path_filters)
return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={"'", '"'}, delimiter="=")

def extract_group_id_from_build_kotlin(self) -> str | None:
"""Extract the group id from the build.gradle.kts file.

Returns
-------
str | None
The extracted group id if found, otherwise None.
"""
build_gradle = file_exists(self.reported_repo_fs, "build.gradle.kts", filters=self.build_tool.path_filters)
return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={'"'}, delimiter="=")
57 changes: 31 additions & 26 deletions src/macaron/repo_verifier/repo_verifier_maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,15 @@

"""This module contains code to verify whether a reported Maven-based repository can be linked back to the artifact."""
import logging
from pathlib import Path
from urllib.parse import urlparse

from macaron.parsers.pomparser import parse_pom_string
from macaron.repo_verifier.repo_verifier_base import (
RepositoryVerificationResult,
RepositoryVerificationStatus,
RepoVerifierToolSpecific,
find_file_in_repo,
)
from macaron.slsa_analyzer.build_tool import Maven
from macaron.slsa_analyzer.build_tool.base_build_tool import file_exists
from macaron.slsa_analyzer.package_registry.maven_central_registry import (
RECOGNIZED_CODE_HOSTING_SERVICES,
same_organization,
Expand All @@ -25,8 +23,6 @@
class RepoVerifierMaven(RepoVerifierToolSpecific):
"""A class to verify whether a repository with Maven build tool links back to the artifact."""

specific_tool = Maven()

def verify_by_tool(self) -> RepositoryVerificationResult:
"""Verify whether the reported repository links back to the Maven artifact.

Expand All @@ -45,43 +41,52 @@ def verify_by_tool(self) -> RepositoryVerificationResult:
if recognized_services_verification_result.status == RepositoryVerificationStatus.PASSED:
return recognized_services_verification_result

pom_group_id = self.extract_group_id_from_pom()
if pom_group_id is None:
logger.debug("Could not find groupId from the pom.xml in %s", self.reported_repo_url)
return RepositoryVerificationResult(
status=RepositoryVerificationStatus.UNKNOWN, reason="no_group_id_in_pom", build_tool=self.build_tool
)
if not same_organization(pom_group_id, self.namespace):
logger.debug("Group id in pom.xml does not match the provided group id for: %s", self.reported_repo_url)
return RepositoryVerificationResult(
status=RepositoryVerificationStatus.FAILED, reason="group_id_mismatch", build_tool=self.build_tool
)

return RepositoryVerificationResult(
status=RepositoryVerificationStatus.PASSED, reason="group_id_match", build_tool=self.build_tool
)

def extract_group_id_from_pom(self) -> str | None:
"""Extract the group id from the pom.xml file.

Returns
-------
str | None
The extracted group id if found, otherwise None.
"""
# TODO: check other pom files. Think about how to decide in case of contradicting evidence.
# Check if repo contains pom.xml.
pom_file = find_file_in_repo(Path(self.reported_repo_fs), "pom.xml")
pom_file = file_exists(self.reported_repo_fs, "pom.xml", filters=self.build_tool.path_filters)
if not pom_file:
logger.debug("Could not find any pom.xml in the repository: %s", self.reported_repo_url)
return RepositoryVerificationResult(
status=RepositoryVerificationStatus.UNKNOWN, reason="no_pom", build_tool=self.build_tool
)
return None

pom_content = pom_file.read_text(encoding="utf-8")
pom_root = parse_pom_string(pom_content)

if not pom_root:
if pom_root is None:
logger.debug("Could not parse pom.xml: %s", pom_file.as_posix())
return RepositoryVerificationResult(
status=RepositoryVerificationStatus.UNKNOWN, reason="not_parsed_pom", build_tool=self.build_tool
)
return None

# Find the group id in the pom (project/groupId).
# The closing curly brace represents the end of the XML namespace.
pom_group_id_elem = next((ch for ch in pom_root if ch.tag.endswith("}groupId")), None)
if pom_group_id_elem is None or not pom_group_id_elem.text:
logger.debug("Could not find groupId in pom.xml: %s", pom_file)
return RepositoryVerificationResult(
status=RepositoryVerificationStatus.UNKNOWN, reason="no_group_id_in_pom", build_tool=self.build_tool
)
return None

pom_group_id = pom_group_id_elem.text.strip()
if not same_organization(pom_group_id, self.namespace):
logger.debug("Group id in pom.xml does not match the provided group id: %s", pom_file)
return RepositoryVerificationResult(
status=RepositoryVerificationStatus.FAILED, reason="group_id_mismatch", build_tool=self.build_tool
)

return RepositoryVerificationResult(
status=RepositoryVerificationStatus.PASSED, reason="group_id_match", build_tool=self.build_tool
)
return pom_group_id_elem.text.strip()

def verify_domains_from_recognized_code_hosting_services(self) -> RepositoryVerificationResult:
"""Verify repository link by comparing the maven domain name and the account on code hosting services.
Expand Down
Loading
Loading