Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ CodeWiki demonstrates significant improvements in high-level and managed languag
Install CodeWiki CLI from source:

```bash
pip install https://github.com/FSoft-AI4Code/CodeWiki.git
pip install git+https://github.com/FSoft-AI4Code/CodeWiki.git
```

Verify installation:
Expand Down
46 changes: 46 additions & 0 deletions codewiki/cli/adapters/doc_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import time
import asyncio
import os
import logging
import sys


from codewiki.cli.utils.progress import ProgressTracker
Expand Down Expand Up @@ -64,6 +66,50 @@ def __init__(
cluster_model=config.get('cluster_model', ''),
base_url=config.get('base_url', '')
)

# Configure backend logging
self._configure_backend_logging()

def _configure_backend_logging(self):
"""Configure backend logger for CLI use with colored output."""
from codewiki.src.be.dependency_analyzer.utils.logging_config import ColoredFormatter

# Get backend logger (parent of all backend modules)
backend_logger = logging.getLogger('codewiki.src.be')

# Remove existing handlers to avoid duplicates
backend_logger.handlers.clear()

if self.verbose:
# In verbose mode, show INFO and above
backend_logger.setLevel(logging.INFO)

# Create console handler with formatting
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO)

# Use colored formatter for better readability
colored_formatter = ColoredFormatter()
console_handler.setFormatter(colored_formatter)

# Add handler to logger
backend_logger.addHandler(console_handler)
else:
# In non-verbose mode, suppress backend logs (use WARNING level to hide INFO/DEBUG)
backend_logger.setLevel(logging.WARNING)

# Create console handler for warnings and errors only
console_handler = logging.StreamHandler(sys.stderr)
console_handler.setLevel(logging.WARNING)

# Use colored formatter even for warnings/errors
colored_formatter = ColoredFormatter()
console_handler.setFormatter(colored_formatter)

backend_logger.addHandler(console_handler)

# Prevent propagation to root logger to avoid duplicate messages
backend_logger.propagate = False

def generate(self) -> DocumentationJob:
"""
Expand Down
26 changes: 22 additions & 4 deletions codewiki/cli/utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,30 @@ def detect_supported_languages(directory: Path) -> List[Tuple[str, int]]:
'C#': ['.cs'],
}

# Directories to exclude from counting
excluded_dirs = {
'node_modules', '__pycache__', '.git', 'build', 'dist',
'.venv', 'venv', 'env', '.env', 'target', 'bin', 'obj',
'.pytest_cache', '.mypy_cache', '.tox', 'coverage',
'htmlcov', '.eggs', '*.egg-info', 'vendor', 'bower_components',
'.idea', '.vscode', '.gradle', '.mvn'
}

def should_exclude_file(file_path: Path) -> bool:
"""Check if file is in an excluded directory."""
parts = file_path.parts
return any(excluded_dir in parts for excluded_dir in excluded_dirs)

language_counts = {}

for language, extensions in language_extensions.items():
count = 0
for ext in extensions:
count += len(list(directory.rglob(f"*{ext}")))
# Filter out files in excluded directories
count += sum(
1 for f in directory.rglob(f"*{ext}")
if f.is_file() and not should_exclude_file(f)
)

if count > 0:
language_counts[language] = count
Expand All @@ -199,10 +217,10 @@ def is_top_tier_model(model: str) -> bool:
"""
top_tier_models = [
'claude-opus',
'claude-sonnet-4',
'claude-sonnet',
'gpt-4',
'gpt-4-turbo',
'gemini-1.5-pro',
'gpt-5',
'gemini-2.5',
]

model_lower = model.lower()
Expand Down
8 changes: 4 additions & 4 deletions codewiki/src/be/agent_orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Dict, List, Any

# Configure logging and monitoring
logging.basicConfig(level=logging.INFO)

logger = logging.getLogger(__name__)

# try:
Expand Down Expand Up @@ -89,7 +89,7 @@ def create_agent(self, module_name: str, components: Dict[str, Any],
async def process_module(self, module_name: str, components: Dict[str, Node],
core_component_ids: List[str], module_path: List[str], working_dir: str) -> Dict[str, Any]:
"""Process a single module and generate its documentation."""
logger.debug(f"Processing module: {module_name}")
logger.info(f"Processing module: {module_name}")

# Load or create module tree
module_tree_path = os.path.join(working_dir, MODULE_TREE_FILENAME)
Expand All @@ -115,13 +115,13 @@ async def process_module(self, module_name: str, components: Dict[str, Node],
# check if overview docs already exists
overview_docs_path = os.path.join(working_dir, OVERVIEW_FILENAME)
if os.path.exists(overview_docs_path):
logger.info(f"Overview docs already exists at {overview_docs_path}")
logger.info(f"Overview docs already exists at {overview_docs_path}")
return module_tree

# check if module docs already exists
docs_path = os.path.join(working_dir, f"{module_name}.md")
if os.path.exists(docs_path):
logger.info(f"Module docs already exists at {docs_path}")
logger.info(f"Module docs already exists at {docs_path}")
return module_tree

# Run agent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
from codewiki.src.be.cluster_modules import format_potential_core_components
from codewiki.src.config import MAX_TOKEN_PER_LEAF_MODULE

import logging
logger = logging.getLogger(__name__)



async def generate_sub_module_documentation(
Expand Down Expand Up @@ -36,6 +39,12 @@ async def generate_sub_module_documentation(

for sub_module_name, core_component_ids in sub_module_specs.items():

# Create visual indentation for nested modules
indent = " " * deps.current_depth
arrow = "└─" if deps.current_depth > 0 else "→"

logger.info(f"{indent}{arrow} Generating documentation for sub-module: {sub_module_name}")

num_tokens = count_tokens(format_potential_core_components(core_component_ids, ctx.deps.components)[-1])

if is_complex_module(ctx.deps.components, core_component_ids) and ctx.deps.current_depth < ctx.deps.max_depth and num_tokens >= MAX_TOKEN_PER_LEAF_MODULE:
Expand Down
2 changes: 1 addition & 1 deletion codewiki/src/be/agent_tools/str_replace_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import logging

# Configure logging and monitoring
logging.basicConfig(level=logging.INFO)

logger = logging.getLogger(__name__)

from pydantic_ai import RunContext, Tool
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def analyze_code_files(self, code_files: List[Dict], base_dir: str) -> Dict:
self.functions = {}
self.call_relationships = []

logger.debug("Analyzing all code files")
files_analyzed = 0
for file_info in code_files:
logger.debug(f"Analyzing: {file_info['path']}")
Expand Down Expand Up @@ -111,19 +110,13 @@ def _analyze_code_file(self, repo_dir: str, file_info: Dict):
repo_dir: Repository directory path
file_info: File information dictionary
"""
# file_path = Path(repo_dir) / file_info["path"]

# logger.debug(f"Reading content of {file_path}")
# try:
# with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
# content = f.read()
base = Path(repo_dir)
file_path = base / file_info["path"]
logger.debug(f"Reading content of {file_path}")

try:
content = safe_open_text(base, file_path)
language = file_info["language"]
logger.debug(f"Analyzing {language} file: {file_path}")
if language == "python":
self._analyze_python_file(file_path, content, repo_dir)
elif language == "javascript":
Expand All @@ -138,10 +131,10 @@ def _analyze_code_file(self, repo_dir: str, file_info: Dict):
self._analyze_c_file(file_path, content, repo_dir)
elif language == "cpp":
self._analyze_cpp_file(file_path, content, repo_dir)
else:
logger.warning(
f"Unsupported language for call graph analysis: {language} for file {file_path}"
)
# else:
# logger.warning(
# f"Unsupported language for call graph analysis: {language} for file {file_path}"
# )

except Exception as e:
logger.error(f"⚠️ Error analyzing {file_path}: {str(e)}")
Expand Down Expand Up @@ -180,18 +173,13 @@ def _analyze_javascript_file(self, file_path: str, content: str, repo_dir: str):
repo_dir: Repository base directory
"""
try:
logger.debug(f"Starting tree-sitter JavaScript analysis for {file_path}")

from codewiki.src.be.dependency_analyzer.analyzers.javascript import analyze_javascript_file_treesitter

functions, relationships = analyze_javascript_file_treesitter(
file_path, content, repo_path=repo_dir
)

logger.debug(
f"Tree-sitter JavaScript analysis completed for {file_path}: {len(functions)} functions, {len(relationships)} relationships"
)

for func in functions:
func_id = func.id if func.id else f"{file_path}:{func.name}"
self.functions[func_id] = func
Expand All @@ -210,18 +198,13 @@ def _analyze_typescript_file(self, file_path: str, content: str, repo_dir: str):
content: File content string
"""
try:
logger.debug(f"Starting tree-sitter TypeScript analysis for {file_path}")

from codewiki.src.be.dependency_analyzer.analyzers.typescript import analyze_typescript_file_treesitter

functions, relationships = analyze_typescript_file_treesitter(
file_path, content, repo_path=repo_dir
)

logger.debug(
f"Tree-sitter TypeScript analysis completed for {file_path}: {len(functions)} functions, {len(relationships)} relationships"
)

for func in functions:
func_id = func.id if func.id else f"{file_path}:{func.name}"
self.functions[func_id] = func
Expand Down Expand Up @@ -285,9 +268,6 @@ def _analyze_java_file(self, file_path: str, content: str, repo_dir: str):

try:
functions, relationships = analyze_java_file(file_path, content, repo_path=repo_dir)
logger.debug(
f"Found {len(functions)} functions and {len(relationships)} relationships in {file_path}"
)
for func in functions:
func_id = func.id if func.id else f"{file_path}:{func.name}"
self.functions[func_id] = func
Expand All @@ -309,9 +289,6 @@ def _analyze_csharp_file(self, file_path: str, content: str, repo_dir: str):

try:
functions, relationships = analyze_csharp_file(file_path, content, repo_path=repo_dir)
logger.debug(
f"Found {len(functions)} functions and {len(relationships)} relationships in {file_path}"
)

for func in functions:
func_id = func.id if func.id else f"{file_path}:{func.name}"
Expand All @@ -328,7 +305,6 @@ def _resolve_call_relationships(self):
Attempts to match function calls to actual function definitions,
handling cross-language calls where possible.
"""
logger.debug("Building function lookup table for resolving relationships.")
func_lookup = {}
for func_id, func_info in self.functions.items():
func_lookup[func_id] = func_id
Expand Down Expand Up @@ -375,9 +351,6 @@ def _deduplicate_relationships(self):
seen.add(key)
unique_relationships.append(rel)

logger.debug(
f"Removed {len(self.call_relationships) - len(unique_relationships)} duplicate relationships."
)
self.call_relationships = unique_relationships

def _generate_visualization_data(self) -> Dict:
Expand All @@ -391,7 +364,6 @@ def _generate_visualization_data(self) -> Dict:
"""
cytoscape_elements = []

logger.debug(f"Adding {len(self.functions)} function nodes.")
for func_id, func_info in self.functions.items():
node_classes = []
if func_info.node_type == "method":
Expand Down Expand Up @@ -425,7 +397,6 @@ def _generate_visualization_data(self) -> Dict:
)

resolved_rels = [r for r in self.call_relationships if r.is_resolved]
logger.debug(f"Adding {len(resolved_rels)} relationship edges.")
for rel in resolved_rels:
cytoscape_elements.append(
{
Expand Down Expand Up @@ -493,9 +464,6 @@ def _select_most_connected_nodes(self, target_count: int):
target_count: The number of nodes to select
"""
if len(self.functions) <= target_count:
logger.debug(
f"Have {len(self.functions)} functions, target is {target_count} - keeping all"
)
return

if not self.call_relationships:
Expand Down Expand Up @@ -537,8 +505,3 @@ def _select_most_connected_nodes(self, target_count: int):
if rel.caller in selected_func_ids and rel.callee in selected_func_ids
]

logger.debug(
f"Node selection: {original_func_count} -> {len(self.functions)} functions, "
f"{original_rel_count} -> {len(self.call_relationships)} relationships"
)
logger.debug(f"Kept {len(selected_func_ids)} most connected nodes (target: {target_count})")
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def build_dependency_graph(self) -> tuple[Dict[str, Any], List[str]]:
if components[leaf_node].component_type in ["class", "interface", "struct"]:
keep_leaf_nodes.append(leaf_node)
else:
logger.debug(f"Leaf node {leaf_node} is a {components[leaf_node].component_type}, removing it")
# logger.debug(f"Leaf node {leaf_node} is a {components[leaf_node].component_type}, removing it")
pass
else:
logger.warning(f"Leaf node {leaf_node} not found in components, removing it")

Expand Down
6 changes: 4 additions & 2 deletions codewiki/src/be/dependency_analyzer/topo_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,9 +313,11 @@ def concise_node(leaf_nodes: Set[str]) -> Set[str]:
if components[leaf_node].component_type in ["class", "interface", "struct"]:
keep_leaf_nodes.append(leaf_node)
else:
logger.debug(f"Leaf node {leaf_node} is a {components[leaf_node].component_type}, removing it")
# logger.debug(f"Leaf node {leaf_node} is a {components[leaf_node].component_type}, removing it")
pass
else:
logger.debug(f"Leaf node {leaf_node} not found in components, removing it")
# logger.debug(f"Leaf node {leaf_node} not found in components, removing it")
pass

return keep_leaf_nodes

Expand Down
Loading