Pipelex · lchoquel · Sep 3, 2025 · Sep 3, 2025 · Sep 3, 2025 · Sep 5, 2025
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -17,5 +17,19 @@
             ],
             "console": "integratedTerminal"
         },
+        {
+            "name": "Debug cocode CLI",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${workspaceFolder}/.venv/bin/pipelex",
+            "args": [
+                "validate",
+                "all",
+                "-c",
+                "cocode/pipelex_libraries",
+            ],
+            "console": "integratedTerminal",
+            "justMyCode": false
+        }
     ]
 }
diff --git a/AGENTS.md b/AGENTS.md
@@ -118,6 +118,8 @@ Always fix any issues reported by these tools before proceeding.
 - **Pipelines**: `cocode/pipelex_libraries/pipelines/`
 - **Tests**: `tests/` directory
 - **Documentation**: `docs/` directory
+
+
 # Pipeline Guide
 
 - Always first write your "plan" in natural langage, then transcribe it in pipelex.

diff --git a/analyze_hackathon_repos.sh b/analyze_hackathon_repos.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+
+# Hackathon Repository Analysis Script using GNU Parallel
+# Usage: ./analyze_hackathon_repos.sh [repos_file] [parallel_jobs] [output_dir]
+
+set -e  # Exit on any error
+
+# Default values
+REPOS_FILE="${1:-repos.txt}"
+PARALLEL_JOBS="${2:-4}"
+OUTPUT_DIR="${3:-results}"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VENV_PATH="$SCRIPT_DIR/.venv"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+echo_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+echo_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+echo_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+echo_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Function to check if we're in the right directory
+check_environment() {
+    if [[ ! -f "pyproject.toml" ]] || [[ ! -d ".venv" ]]; then
+        echo_error "This script must be run from the project root directory with a .venv folder"
+        exit 1
+    fi
+
+    if [[ ! -f "$REPOS_FILE" ]]; then
+        echo_error "Repository list file '$REPOS_FILE' not found!"
+        echo_info "Create a text file with one repository path/URL per line"
+        echo_info "Example content:"
+        echo "  https://github.com/user/repo1"
+        echo "  /path/to/local/repo2"
+        echo "  user/repo3"
+        exit 1
+    fi
+
+    # Check if virtual environment exists and has cocode
+    if [[ ! -f "$VENV_PATH/bin/python" ]]; then
+        echo_error "Virtual environment not found at $VENV_PATH"
+        exit 1
+    fi
+
+    # Test if cocode is available in venv
+    if ! "$VENV_PATH/bin/cocode" --help > /dev/null 2>&1; then
+        echo_error "cocode command not available in virtual environment"
+        echo_info "Make sure you've installed the project: pip install -e ."
+        exit 1
+    fi
+}
+
+# Function to analyze a single repository
+analyze_repo() {
+    local repo="$1"
+    local output_dir="$2"
+    local venv_path="$3"
+
+    echo_info "Analyzing: $repo"
+
+    # Use the virtual environment's cocode binary
+    if "$venv_path/bin/cocode" hackathon analyze "$repo" -o "$output_dir"; then
+        echo_success "Completed: $repo"
+        return 0
+    else
+        echo_error "Failed: $repo"
+        return 1
+    fi
+}
+
+# Export the function so parallel can use it
+export -f analyze_repo
+export -f echo_info
+export -f echo_success
+export -f echo_error
+
+# Main execution
+main() {
+    echo_info "Starting Hackathon Repository Analysis"
+    echo_info "Repository list: $REPOS_FILE"
+    echo_info "Parallel jobs: $PARALLEL_JOBS"
+    echo_info "Output directory: $OUTPUT_DIR"
+    echo ""
+
+    # Check environment
+    check_environment
+
+    # Count repositories
+    REPO_COUNT=$(wc -l < "$REPOS_FILE" | tr -d ' ')
+    echo_info "Found $REPO_COUNT repositories to analyze"
+
+    # Create output directory
+    mkdir -p "$OUTPUT_DIR"
+
+    # Create log directory
+    LOG_DIR="$OUTPUT_DIR/logs"
+    mkdir -p "$LOG_DIR"
+
+    echo_info "Starting parallel analysis..."
+    echo_warning "This may take a while depending on repository sizes and complexity"
+    echo ""
+
+    # Use GNU Parallel to process repositories
+    # --progress: Show progress bar
+    # --joblog: Log job execution details
+    # --results: Store stdout/stderr for each job
+    # --halt: Continue on errors but report them
+    # -j: Number of parallel jobs
+    if parallel \
+        --progress \
+        --joblog "$LOG_DIR/parallel_jobs.log" \
+        --results "$LOG_DIR/job_outputs" \
+        --halt never \
+        -j "$PARALLEL_JOBS" \
+        analyze_repo {} "$OUTPUT_DIR" "$VENV_PATH" :::: "$REPOS_FILE"; then
+
+        echo ""
+        echo_success "Parallel analysis completed!"
+    else
+        echo ""
+        echo_warning "Parallel analysis completed with some failures"
+    fi
+
+    # Summary
+    echo ""
+    echo_info "=== ANALYSIS SUMMARY ==="
+
+    # Count successful vs failed jobs from the joblog
+    if [[ -f "$LOG_DIR/parallel_jobs.log" ]]; then
+        SUCCESSFUL=$(awk 'NR>1 && $7==0 {count++} END {print count+0}' "$LOG_DIR/parallel_jobs.log")
+        FAILED=$(awk 'NR>1 && $7!=0 {count++} END {print count+0}' "$LOG_DIR/parallel_jobs.log")
+
+        echo_info "Total repositories: $REPO_COUNT"
+        echo_success "Successful analyses: $SUCCESSFUL"
+        if [[ $FAILED -gt 0 ]]; then
+            echo_error "Failed analyses: $FAILED"
+
+            # Extract and display failed repository names
+            FAILED_REPOS=$(awk 'NR>1 && $7!=0 {print $9}' "$LOG_DIR/parallel_jobs.log")
+            if [[ -n "$FAILED_REPOS" ]]; then
+                echo_error "Failed repositories:"
+                while IFS= read -r repo; do
+                    echo_error "  - $repo"
+                done <<< "$FAILED_REPOS"
+            fi
+
+            echo_info "Check detailed logs: grep -v '^1' '$LOG_DIR/parallel_jobs.log' | awk '\$7!=0 {print \$9}'"
+        fi
+    fi
+
+    echo_info "Results saved to: $OUTPUT_DIR"
+    echo_info "Job logs saved to: $LOG_DIR"
+    echo ""
+    echo_info "To retry failed jobs, you can extract failed repo paths and create a new input file:"
+    echo_info "awk 'NR>1 && \$7!=0 {print \$9}' '$LOG_DIR/parallel_jobs.log' > failed_repos.txt"
+}
+
+# Show usage if --help is passed
+if [[ "$1" == "--help" ]] || [[ "$1" == "-h" ]]; then
+    echo "Usage: $0 [repos_file] [parallel_jobs] [output_dir]"
+    echo ""
+    echo "Arguments:"
+    echo "  repos_file     Path to text file containing repository paths/URLs (default: repos.txt)"
+    echo "  parallel_jobs  Number of parallel analysis jobs to run (default: 4)"
+    echo "  output_dir     Directory to save analysis results (default: results)"
+    echo ""
+    echo "Examples:"
+    echo "  $0                                    # Use defaults"
+    echo "  $0 my_repos.txt 8 hackathon_results  # Custom settings"
+    echo ""
+    echo "The repos file should contain one repository per line:"
+    echo "  https://github.com/user/repo1"
+    echo "  /path/to/local/repo2"
+    echo "  user/repo3"
+    exit 0
+fi
+
+# Run main function
+main "$@"
diff --git a/cocode/cli.py b/cocode/cli.py
@@ -12,6 +12,7 @@
 from typing_extensions import override
 
 from cocode.github.github_cli import github_app
+from cocode.hackathon.hackathon_cli import hackathon_app
 from cocode.repox.repox_cli import repox_app
 from cocode.swe.swe_cli import swe_app
 from cocode.validation_cli import validation_app
@@ -48,6 +49,7 @@ def get_command(self, ctx: Context, cmd_name: str) -> Optional[Command]:
 # Add command groups
 app.add_typer(repox_app, name="repox", help="Repository processing and analysis commands")
 app.add_typer(swe_app, name="swe", help="Software Engineering analysis and automation commands")
+app.add_typer(hackathon_app, name="hackathon", help="Hackathon codebase analysis and evaluation commands")
 app.add_typer(validation_app, name="validation", help="Pipeline validation and setup commands")
 app.add_typer(github_app, name="github", help="GitHub-related operations and utilities")
 

diff --git a/cocode/hackathon/__init__.py b/cocode/hackathon/__init__.py
@@ -0,0 +1 @@
+"""Hackathon analysis module."""
diff --git a/cocode/hackathon/hackathon_cli.py b/cocode/hackathon/hackathon_cli.py
@@ -0,0 +1,81 @@
+"""
+Hackathon analysis CLI commands.
+"""
+
+import asyncio
+from typing import Annotated, List, Optional
+
+import typer
+
+from cocode.common import get_output_dir, validate_repo_path
+from cocode.repox.models import OutputStyle
+from cocode.repox.process_python import PythonProcessingRule
+
+from .hackathon_cmd import hackathon_analyze_repo
+
+hackathon_app = typer.Typer(
+    name="hackathon",
+    help="Hackathon codebase analysis commands",
+    add_completion=False,
+    rich_markup_mode="rich",
+)
+
+
+@hackathon_app.command("analyze")
+def hackathon_analyze(
+    repo_path: Annotated[
+        str,
+        typer.Argument(help="Repository path (local directory) or GitHub URL/identifier (owner/repo or https://github.com/owner/repo)"),
+    ] = ".",
+    output_dir: Annotated[
+        Optional[str],
+        typer.Option("--output-dir", "-o", help="Output directory path. Use 'stdout' to print to console. Defaults to config value if not provided"),
+    ] = None,
+    output_filename: Annotated[
+        str,
+        typer.Option("--output-filename", "-n", help="Output filename for HTML report"),
+    ] = "hackathon-analysis.html",
+    ignore_patterns: Annotated[
+        Optional[List[str]],
+        typer.Option("--ignore-pattern", "-i", help="List of patterns to ignore (in gitignore format)"),
+    ] = None,
+    python_processing_rule: Annotated[
+        PythonProcessingRule,
+        typer.Option("--python-rule", "-p", help="Python processing rule to apply", case_sensitive=False),
+    ] = PythonProcessingRule.INTERFACE,
+    output_style: Annotated[
+        OutputStyle,
+        typer.Option(
+            "--output-style", "-s", help="One of: repo_map, flat (contents only), or import_list (for --python-rule imports)", case_sensitive=False
+        ),
+    ] = OutputStyle.REPO_MAP,
+    include_patterns: Annotated[
+        Optional[List[str]],
+        typer.Option("--include-pattern", "-r", help="Optional pattern to filter files in the tree structure (glob pattern) - can be repeated"),
+    ] = None,
+    path_pattern: Annotated[
+        Optional[str],
+        typer.Option("--path-pattern", "-pp", help="Optional pattern to filter paths in the tree structure (regex pattern)"),
+    ] = None,
+    dry_run: Annotated[
+        bool,
+        typer.Option("--dry", help="Run pipeline in dry mode (no actual execution)"),
+    ] = False,
+) -> None:
+    """Analyze a hackathon codebase for features, architecture, quality, security, and X-factors. Generates an HTML report."""
+    repo_path = validate_repo_path(repo_path)
+    output_dir = get_output_dir(output_dir)
+
+    asyncio.run(
+        hackathon_analyze_repo(
+            repo_path=repo_path,
+            ignore_patterns=ignore_patterns,
+            include_patterns=include_patterns,
+            path_pattern=path_pattern,
+            python_processing_rule=python_processing_rule,
+            output_style=output_style,
+            output_filename=output_filename,
+            output_dir=output_dir,
+            dry_run=dry_run,
+        )
+    )