Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,19 @@
],
"console": "integratedTerminal"
},
{
"name": "Debug cocode CLI",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/.venv/bin/pipelex",
"args": [
"validate",
"all",
"-c",
"cocode/pipelex_libraries",
],
"console": "integratedTerminal",
"justMyCode": false
}
]
}
2 changes: 2 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ Always fix any issues reported by these tools before proceeding.
- **Pipelines**: `cocode/pipelex_libraries/pipelines/`
- **Tests**: `tests/` directory
- **Documentation**: `docs/` directory


# Pipeline Guide

- Always first write your "plan" in natural langage, then transcribe it in pipelex.
Expand Down
195 changes: 195 additions & 0 deletions analyze_hackathon_repos.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
#!/bin/bash

# Hackathon Repository Analysis Script using GNU Parallel
# Usage: ./analyze_hackathon_repos.sh [repos_file] [parallel_jobs] [output_dir]

set -e # Exit on any error

# Default values
REPOS_FILE="${1:-repos.txt}"
PARALLEL_JOBS="${2:-4}"
OUTPUT_DIR="${3:-results}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
VENV_PATH="$SCRIPT_DIR/.venv"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

echo_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}

echo_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}

echo_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}

echo_error() {
echo -e "${RED}[ERROR]${NC} $1"
}

# Function to check if we're in the right directory
check_environment() {
if [[ ! -f "pyproject.toml" ]] || [[ ! -d ".venv" ]]; then
echo_error "This script must be run from the project root directory with a .venv folder"
exit 1
fi

if [[ ! -f "$REPOS_FILE" ]]; then
echo_error "Repository list file '$REPOS_FILE' not found!"
echo_info "Create a text file with one repository path/URL per line"
echo_info "Example content:"
echo " https://github.com/user/repo1"
echo " /path/to/local/repo2"
echo " user/repo3"
exit 1
fi

# Check if virtual environment exists and has cocode
if [[ ! -f "$VENV_PATH/bin/python" ]]; then
echo_error "Virtual environment not found at $VENV_PATH"
exit 1
fi

# Test if cocode is available in venv
if ! "$VENV_PATH/bin/cocode" --help > /dev/null 2>&1; then
echo_error "cocode command not available in virtual environment"
echo_info "Make sure you've installed the project: pip install -e ."
exit 1
fi
}

# Function to analyze a single repository
analyze_repo() {
local repo="$1"
local output_dir="$2"
local venv_path="$3"

echo_info "Analyzing: $repo"

# Use the virtual environment's cocode binary
if "$venv_path/bin/cocode" hackathon analyze "$repo" -o "$output_dir"; then
echo_success "Completed: $repo"
return 0
else
echo_error "Failed: $repo"
return 1
fi
}

# Export the function so parallel can use it
export -f analyze_repo
export -f echo_info
export -f echo_success
export -f echo_error

# Main execution
main() {
echo_info "Starting Hackathon Repository Analysis"
echo_info "Repository list: $REPOS_FILE"
echo_info "Parallel jobs: $PARALLEL_JOBS"
echo_info "Output directory: $OUTPUT_DIR"
echo ""

# Check environment
check_environment

# Count repositories
REPO_COUNT=$(wc -l < "$REPOS_FILE" | tr -d ' ')
echo_info "Found $REPO_COUNT repositories to analyze"

# Create output directory
mkdir -p "$OUTPUT_DIR"

# Create log directory
LOG_DIR="$OUTPUT_DIR/logs"
mkdir -p "$LOG_DIR"

echo_info "Starting parallel analysis..."
echo_warning "This may take a while depending on repository sizes and complexity"
echo ""

# Use GNU Parallel to process repositories
# --progress: Show progress bar
# --joblog: Log job execution details
# --results: Store stdout/stderr for each job
# --halt: Continue on errors but report them
# -j: Number of parallel jobs
if parallel \
--progress \
--joblog "$LOG_DIR/parallel_jobs.log" \
--results "$LOG_DIR/job_outputs" \
--halt never \
-j "$PARALLEL_JOBS" \
analyze_repo {} "$OUTPUT_DIR" "$VENV_PATH" :::: "$REPOS_FILE"; then

echo ""
echo_success "Parallel analysis completed!"
else
echo ""
echo_warning "Parallel analysis completed with some failures"
fi

# Summary
echo ""
echo_info "=== ANALYSIS SUMMARY ==="

# Count successful vs failed jobs from the joblog
if [[ -f "$LOG_DIR/parallel_jobs.log" ]]; then
SUCCESSFUL=$(awk 'NR>1 && $7==0 {count++} END {print count+0}' "$LOG_DIR/parallel_jobs.log")
FAILED=$(awk 'NR>1 && $7!=0 {count++} END {print count+0}' "$LOG_DIR/parallel_jobs.log")

echo_info "Total repositories: $REPO_COUNT"
echo_success "Successful analyses: $SUCCESSFUL"
if [[ $FAILED -gt 0 ]]; then
echo_error "Failed analyses: $FAILED"

# Extract and display failed repository names
FAILED_REPOS=$(awk 'NR>1 && $7!=0 {print $9}' "$LOG_DIR/parallel_jobs.log")
if [[ -n "$FAILED_REPOS" ]]; then
echo_error "Failed repositories:"
while IFS= read -r repo; do
echo_error " - $repo"
done <<< "$FAILED_REPOS"
fi

echo_info "Check detailed logs: grep -v '^1' '$LOG_DIR/parallel_jobs.log' | awk '\$7!=0 {print \$9}'"
fi
fi

echo_info "Results saved to: $OUTPUT_DIR"
echo_info "Job logs saved to: $LOG_DIR"
echo ""
echo_info "To retry failed jobs, you can extract failed repo paths and create a new input file:"
echo_info "awk 'NR>1 && \$7!=0 {print \$9}' '$LOG_DIR/parallel_jobs.log' > failed_repos.txt"
}

# Show usage if --help is passed
if [[ "$1" == "--help" ]] || [[ "$1" == "-h" ]]; then
echo "Usage: $0 [repos_file] [parallel_jobs] [output_dir]"
echo ""
echo "Arguments:"
echo " repos_file Path to text file containing repository paths/URLs (default: repos.txt)"
echo " parallel_jobs Number of parallel analysis jobs to run (default: 4)"
echo " output_dir Directory to save analysis results (default: results)"
echo ""
echo "Examples:"
echo " $0 # Use defaults"
echo " $0 my_repos.txt 8 hackathon_results # Custom settings"
echo ""
echo "The repos file should contain one repository per line:"
echo " https://github.com/user/repo1"
echo " /path/to/local/repo2"
echo " user/repo3"
exit 0
fi

# Run main function
main "$@"
2 changes: 2 additions & 0 deletions cocode/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from typing_extensions import override

from cocode.github.github_cli import github_app
from cocode.hackathon.hackathon_cli import hackathon_app
from cocode.repox.repox_cli import repox_app
from cocode.swe.swe_cli import swe_app
from cocode.validation_cli import validation_app
Expand Down Expand Up @@ -48,6 +49,7 @@ def get_command(self, ctx: Context, cmd_name: str) -> Optional[Command]:
# Add command groups
app.add_typer(repox_app, name="repox", help="Repository processing and analysis commands")
app.add_typer(swe_app, name="swe", help="Software Engineering analysis and automation commands")
app.add_typer(hackathon_app, name="hackathon", help="Hackathon codebase analysis and evaluation commands")
app.add_typer(validation_app, name="validation", help="Pipeline validation and setup commands")
app.add_typer(github_app, name="github", help="GitHub-related operations and utilities")

Expand Down
1 change: 1 addition & 0 deletions cocode/hackathon/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Hackathon analysis module."""
81 changes: 81 additions & 0 deletions cocode/hackathon/hackathon_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
Hackathon analysis CLI commands.
"""

import asyncio
from typing import Annotated, List, Optional

import typer

from cocode.common import get_output_dir, validate_repo_path
from cocode.repox.models import OutputStyle
from cocode.repox.process_python import PythonProcessingRule

from .hackathon_cmd import hackathon_analyze_repo

hackathon_app = typer.Typer(
name="hackathon",
help="Hackathon codebase analysis commands",
add_completion=False,
rich_markup_mode="rich",
)


@hackathon_app.command("analyze")
def hackathon_analyze(
repo_path: Annotated[
str,
typer.Argument(help="Repository path (local directory) or GitHub URL/identifier (owner/repo or https://github.com/owner/repo)"),
] = ".",
output_dir: Annotated[
Optional[str],
typer.Option("--output-dir", "-o", help="Output directory path. Use 'stdout' to print to console. Defaults to config value if not provided"),
] = None,
output_filename: Annotated[
str,
typer.Option("--output-filename", "-n", help="Output filename for HTML report"),
] = "hackathon-analysis.html",
ignore_patterns: Annotated[
Optional[List[str]],
typer.Option("--ignore-pattern", "-i", help="List of patterns to ignore (in gitignore format)"),
] = None,
python_processing_rule: Annotated[
PythonProcessingRule,
typer.Option("--python-rule", "-p", help="Python processing rule to apply", case_sensitive=False),
] = PythonProcessingRule.INTERFACE,
output_style: Annotated[
OutputStyle,
typer.Option(
"--output-style", "-s", help="One of: repo_map, flat (contents only), or import_list (for --python-rule imports)", case_sensitive=False
),
] = OutputStyle.REPO_MAP,
include_patterns: Annotated[
Optional[List[str]],
typer.Option("--include-pattern", "-r", help="Optional pattern to filter files in the tree structure (glob pattern) - can be repeated"),
] = None,
path_pattern: Annotated[
Optional[str],
typer.Option("--path-pattern", "-pp", help="Optional pattern to filter paths in the tree structure (regex pattern)"),
] = None,
dry_run: Annotated[
bool,
typer.Option("--dry", help="Run pipeline in dry mode (no actual execution)"),
] = False,
) -> None:
"""Analyze a hackathon codebase for features, architecture, quality, security, and X-factors. Generates an HTML report."""
repo_path = validate_repo_path(repo_path)
output_dir = get_output_dir(output_dir)

asyncio.run(
hackathon_analyze_repo(
repo_path=repo_path,
ignore_patterns=ignore_patterns,
include_patterns=include_patterns,
path_pattern=path_pattern,
python_processing_rule=python_processing_rule,
output_style=output_style,
output_filename=output_filename,
output_dir=output_dir,
dry_run=dry_run,
)
)
Loading
Loading