codegen-sh · jayhack · Feb 7, 2025 · Feb 7, 2025 · Feb 7, 2025 · Feb 7, 2025
diff --git a/.codegen/.gitignore b/.codegen/.gitignore
@@ -0,0 +1,17 @@
+# Codegen
+docs/
+examples/
+prompts/
+jupyter/
+.venv/
+codegen-system-prompt.txt
+
+# Python cache files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Keep config.toml and codemods
+!config.toml
+!codemods/
+!codemods/**
diff --git a/.codegen/config.toml b/.codegen/config.toml
@@ -0,0 +1,2 @@
+organization_name = "codegen-sh"
+repo_name = "codegen-examples"
diff --git a/examples/cyclomatic_complexity/README.md b/examples/cyclomatic_complexity/README.md
@@ -0,0 +1,137 @@
+# Cyclomatic Complexity Analyzer
+
+This example demonstrates how to analyze the cyclomatic complexity of Python codebases using Codegen. The script provides detailed insights into code complexity by analyzing control flow structures and providing a comprehensive report.
+
+> [!NOTE]
+> The cyclomatic complexity metric helps identify complex code that might need refactoring. A higher score indicates more complex code with multiple decision points.
+
+## How the Analysis Script Works
+
+The script (`run.py`) performs the complexity analysis in several key steps:
+
+1. **Codebase Loading**
+   ```python
+   codebase = Codebase.from_repo("fastapi/fastapi")
+   ```
+   - Loads any Python codebase into Codegen's analysis engine
+   - Works with local or remote Git repositories
+   - Supports analyzing specific commits
+
+2. **Complexity Calculation**
+   ```python
+   def calculate_cyclomatic_complexity(code_block):
+       complexity = 1  # Base complexity
+       for statement in code_block.statements:
+           if isinstance(statement, IfBlockStatement):
+               complexity += 1 + len(statement.elif_statements)
+   ```
+   - Analyzes control flow structures (if/elif/else, loops, try/except)
+   - Calculates complexity based on decision points
+   - Handles nested structures appropriately
+
+3. **Function Analysis**
+   ```python
+   callables = codebase.functions + [m for c in codebase.classes for m in c.methods]
+   for function in callables:
+       complexity = calculate_cyclomatic_complexity(function.code_block)
+   ```
+   - Processes both standalone functions and class methods
+   - Calculates complexity for each callable
+   - Tracks file locations and function names
+
+4. **Report Generation**
+   ```python
+   print("\n📊 Cyclomatic Complexity Analysis")
+   print(f"  • Total Functions: {total_functions}")
+   print(f"  • Average Complexity: {average:.2f}")
+   ```
+   - Provides comprehensive complexity statistics
+   - Shows distribution of complexity across functions
+   - Identifies the most complex functions
+
+## Output
+```
+📊 Cyclomatic Complexity Analysis
+============================================================
+
+📈 Overall Stats:
+  • Total Functions: 3538
+  • Average Complexity: 1.27
+  • Total Complexity: 4478
+
+🔍 Top 10 Most Complex Functions:
+------------------------------------------------------------
+  • jsonable_encoder                16 | fastapi/encoders.py
+  • get_openapi                     13 | fastapi/openapi/utils.py
+  • __init__                        12 | fastapi/routing.py
+  • solve_dependencies              10 | fastapi/dependencies/utils.py
+  • main                             9 | scripts/notify_translations.py
+  • analyze_param                    9 | fastapi/dependencies/utils.py
+  • __init__                         8 | fastapi/params.py
+  • __init__                         8 | fastapi/params.py
+  • main                             7 | scripts/deploy_docs_status.py
+  • create_model_field               7 | fastapi/utils.py
+
+📉 Complexity Distribution:
+  • Low (1-5): 3514 functions (99.3%)
+  • Medium (6-10): 21 functions (0.6%)
+  • High (>10): 3 functions (0.1%)
+```
+
+## Complexity Metrics
+
+The analyzer tracks several key metrics:
+
+### Complexity Sources
+- If statements (+1)
+- Elif statements (+1 each)
+- Else statements (+1)
+- Loops (while/for) (+1)
+- Try-except blocks (+1 per except)
+
+### Complexity Categories
+- Low (1-5): Generally clean and maintainable code
+- Medium (6-10): Moderate complexity, may need attention
+- High (>10): Complex code that should be reviewed
+
+## Running the Analysis
+
+```bash
+# Install Codegen
+pip install codegen
+
+# Run the analysis
+python run.py
+```
+
+## Example Output
+
+```
+📊 Cyclomatic Complexity Analysis
+============================================================
+
+📈 Overall Stats:
+  • Total Functions: 150
+  • Average Complexity: 3.45
+  • Total Complexity: 518
+
+🔍 Top 10 Most Complex Functions:
+------------------------------------------------------------
+  • validate_response               12 | ...api/endpoints/auth.py
+  • process_request                 10 | ...core/middleware.py
+  • handle_exception                 9 | ...utils/error_handlers.py
+
+📉 Complexity Distribution:
+  • Low (1-5): 105 functions (70.0%)
+  • Medium (6-10): 35 functions (23.3%)
+  • High (>10): 10 functions (6.7%)
+```
+
+## Learn More
+
+- [About Cyclomatic Complexity](https://en.wikipedia.org/wiki/Cyclomatic_complexity)
+- [Codegen Documentation](https://docs.codegen.com)
+
+## Contributing
+
+Feel free to submit issues and enhancement requests!
diff --git a/examples/cyclomatic_complexity/run.py b/examples/cyclomatic_complexity/run.py
@@ -0,0 +1,87 @@
+import codegen
+from codegen import Codebase
+from codegen.sdk.core.statements.for_loop_statement import ForLoopStatement
+from codegen.sdk.core.statements.if_block_statement import IfBlockStatement
+from codegen.sdk.core.statements.try_catch_statement import TryCatchStatement
+from codegen.sdk.core.statements.while_statement import WhileStatement
+
+
+@codegen.function("cyclomatic-complexity")
+def run(codebase: Codebase):
+    def calculate_cyclomatic_complexity(code_block):
+        # Initialize cyclomatic complexity count
+        complexity = 1  # Start with one for the default path
+
+        # Count decision points
+        for statement in code_block.statements:
+            if isinstance(statement, IfBlockStatement):
+                complexity += 1 + len(statement.elif_statements)  # +1 for if, each elif adds another path
+                if statement.else_statement:
+                    complexity += 1
+            elif isinstance(statement, WhileStatement) or isinstance(statement, ForLoopStatement):
+                complexity += 1  # Loops introduce a new path
+            elif isinstance(statement, TryCatchStatement):
+                complexity += 1  # try-catch introduces a new path
+                # Count except blocks by counting nested code blocks after the first one (try block)
+                complexity += len(statement.nested_code_blocks) - 1  # -1 to exclude the try block itself
+
+        return complexity
+
+    # Initialize total complexity
+    total_complexity = 0
+    # Count total functions
+    total_functions = 0
+    # Store results for sorting
+    results = []
+
+    # Get all functions or methods
+    callables = codebase.functions + [m for c in codebase.classes for m in c.methods]
+
+    # Analyze each function
+    for function in callables:
+        complexity = calculate_cyclomatic_complexity(function.code_block)
+        results.append((function.name, complexity, function.filepath))
+        total_complexity += complexity
+        total_functions += 1
+
+    # Sort by complexity (highest first)
+    results.sort(key=lambda x: x[1], reverse=True)
+
+    # Print summary
+    print("\n📊 Cyclomatic Complexity Analysis")
+    print("=" * 60)
+
+    if total_functions > 0:
+        average = total_complexity / total_functions
+        print("\n📈 Overall Stats:")
+        print(f"  • Total Functions: {total_functions}")
+        print(f"  • Average Complexity: {average:.2f}")
+        print(f"  • Total Complexity: {total_complexity}")
+
+        print("\n🔍 Top 10 Most Complex Functions:")
+        print("-" * 60)
+        for name, complexity, filepath in results[:10]:
+            # Truncate filepath if too long
+            if len(filepath) > 40:
+                filepath = "..." + filepath[-37:]
+            print(f"  • {name:<30} {complexity:>3} | {filepath}")
+
+        # Complexity distribution
+        low = sum(1 for _, c, _ in results if c <= 5)
+        medium = sum(1 for _, c, _ in results if 5 < c <= 10)
+        high = sum(1 for _, c, _ in results if c > 10)
+
+        print("\n📉 Complexity Distribution:")
+        print(f"  • Low (1-5): {low} functions ({low / total_functions * 100:.1f}%)")
+        print(f"  • Medium (6-10): {medium} functions ({medium / total_functions * 100:.1f}%)")
+        print(f"  • High (>10): {high} functions ({high / total_functions * 100:.1f}%)")
+    else:
+        print("❌ No functions found in the codebase to analyze.")
+
+
+if __name__ == "__main__":
+    print("🔍 Analyzing codebase...")
+    codebase = Codebase.from_repo("fastapi/fastapi", commit="887270ff8a54bb58c406b0651678a27589793d2f")
+
+    print("Running analysis...")
+    run(codebase)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		organization_name = "codegen-sh"
		repo_name = "codegen-examples"