Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions aider/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,13 @@ def get_parser(default_config_files, git_root):
help="Run tests, fix problems found and then exit",
default=False,
)
group.add_argument(
"--stats",
metavar="REVISIONS",
nargs="?",
const="HEAD",
help="Show code changes statistics between revisions",
)

##########
group = parser.add_argument_group("Analytics")
Expand Down
92 changes: 91 additions & 1 deletion aider/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import subprocess
import sys
import tempfile
from collections import OrderedDict
from collections import OrderedDict, defaultdict
from os.path import expanduser
from pathlib import Path

Expand All @@ -23,6 +23,7 @@
from aider.run_cmd import run_cmd
from aider.scrape import Scraper, install_playwright
from aider.utils import is_image_file
from aider.stats import hash_len, get_all_commit_hashes_between_tags, get_commit_authors, get_counts_for_file

from .dump import dump # noqa: F401

Expand Down Expand Up @@ -1484,6 +1485,95 @@ def cmd_multiline_mode(self, args):
"Toggle multiline mode (swaps behavior of Enter and Meta+Enter)"
self.io.toggle_multiline_mode()

def cmd_stats(self, args):
"""Show statistics about code changes and aider's contributions by counting lines of code through git blame.

Usage:
/stats Compare against main/master branch
/stats <revision> Compare against specific revision
/stats rev1..rev2 Compare between two specific revisions

Examples:
/stats Show stats vs main/master branch
/stats HEAD~5 Show stats vs 5 commits ago
/stats v1.0.0 Show stats vs version 1.0.0
/stats main..HEAD Show stats between main and current HEAD

Lines are attributed to aider when the git author or committer contains "(aider)".
Binary files (images, audio, etc.) are excluded from the analysis.
"""
if not self.coder.repo:
self.io.tool_error("No git repository found.")
return

try:
# Get the revision range
if not args:
# Default to comparing against main/master branch
for default_branch in ["main", "master"]:
try:
self.coder.repo.repo.rev_parse(default_branch)
args = default_branch
break
except:
continue
if not args:
self.io.tool_error("No main or master branch found. Please specify a revision.")
return
source_revision, target_revision = args.split("..") if ".." in args else (args, "HEAD")
commits = get_all_commit_hashes_between_tags(source_revision, target_revision)
commits = [commit[:hash_len] for commit in commits] if commits else []
if not commits:
self.io.tool_error(
f"There are no commits between the specified revisions from {source_revision} to {target_revision}."
)
return
authors = get_commit_authors(commits)

# Get files changed between revisions
diff_files = self.coder.repo.repo.git.diff(
"--name-only", f"{source_revision}..{target_revision}"
).splitlines()
# Filter out media files
files = [f for f in diff_files if not any(f.lower().endswith(ext) for ext in (
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.ico', '.svg', # images
'.mp3', '.wav', '.ogg', '.m4a', '.flac', # audio
'.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', # video
'.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', # documents
'.zip', '.tar', '.gz', '.7z', '.rar', # archives
'.ttf', '.otf', '.woff', '.woff2', '.eot' # fonts
))]
self.io.tool_output(f"Found {len(files)} non-binary tracked files in the repository.")

all_file_counts = {}
grand_total = defaultdict(int)
aider_total = 0
for file in files:
file_counts = get_counts_for_file(source_revision, target_revision, authors, file)
if file_counts:
all_file_counts[file] = file_counts
for author, count in file_counts.items():
grand_total[author] += count
if "(aider)" in author.lower():
aider_total += count
total_lines = sum(grand_total.values())
aider_percentage = (aider_total / total_lines) * 100 if total_lines > 0 else 0

# Calculate percentages
if total_lines > 0:
# Output overall statistics
self.io.tool_output(f"\nAnalysis from {source_revision} to {target_revision}:")
self.io.tool_output(f"Total lines analyzed: {total_lines:,}")
self.io.tool_output(f"Lines by aider: {aider_total:,} ({aider_percentage:.1f}%)")
self.io.tool_output(f"Lines by humans: {total_lines - aider_total:,} ({100 - aider_percentage:.1f}%)")

else:
self.io.tool_output("No lines of code found in the repository.")

except Exception as e:
self.io.tool_error(f"Error analyzing aider statistics: {e}")


def cmd_copy(self, args):
"Copy the last assistant message to the clipboard"
all_messages = self.coder.done_messages + self.coder.cur_messages
Expand Down
5 changes: 5 additions & 0 deletions aider/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1142,6 +1142,11 @@ def get_io(pretty):
analytics.event("exit", reason="Completed --message-file")
return

if args.stats:
commands.cmd_stats(args.stats)
analytics.event("exit", reason="Completed --stats")
return

if args.exit:
analytics.event("exit", reason="Exit flag set")
return
Expand Down
121 changes: 121 additions & 0 deletions aider/stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import subprocess
import sys

from collections import defaultdict

# Length of abbreviated git hash used in blame output
hash_len = len("44e6fefc2")

def run(cmd):
"""Execute a git command and return its output.

Args:
cmd: List containing the command and its arguments

Returns:
String output of the command
"""
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return result.stdout

def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
"""Get all commit hashes between two tags or from a tag to HEAD.

Args:
start_tag: Starting tag or commit hash
end_tag: Ending tag or commit hash (defaults to HEAD)

Returns:
List of commit hashes or None if no commits found
"""
if end_tag:
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
else:
res = run(["git", "rev-list", f"{start_tag}..HEAD"])

if res:
commit_hashes = res.strip().split("\n")
return commit_hashes
return None

def get_commit_authors(commits):
"""Map commit hashes to their authors, marking aider-generated commits.

Args:
commits: List of commit hashes

Returns:
Dictionary mapping commit hashes to author names
"""
commit_to_author = dict()
for commit in commits:
author = run(["git", "show", "-s", "--format=%an", commit]).strip()
commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
if commit_message.lower().startswith("aider:"):
author += " (aider)"
commit_to_author[commit] = author
return commit_to_author

def get_counts_for_file(start_tag, end_tag, authors, fname):
"""Count lines attributed to each author in a file using git blame.

Args:
start_tag: Starting tag or commit hash
end_tag: Ending tag or commit hash (defaults to HEAD)
authors: Dictionary mapping commit hashes to author names
fname: File path to analyze

Returns:
Dictionary mapping author names to line counts, or None if file not found
"""
try:
if end_tag:
text = run(
[
"git",
"blame",
"-M100", # Detect moved lines within a file with 100% similarity
"-C100", # Detect moves across files with 100% similarity
"-C", # Increase detection effort
"-C", # Increase detection effort even more
"--abbrev=9",
f"{start_tag}..{end_tag}",
"--",
fname,
]
)
else:
text = run(
[
"git",
"blame",
"-M100", # Detect moved lines within a file with 100% similarity
"-C100", # Detect moves across files with 100% similarity
"-C", # Increase detection effort
"-C", # Increase detection effort even more
"--abbrev=9",
f"{start_tag}..HEAD",
"--",
fname,
]
)
if not text:
return None
text = text.splitlines()
line_counts = defaultdict(int)
for line in text:
if line.startswith("^"):
continue
hsh = line[:hash_len]
author = authors.get(hsh, "Unknown")
line_counts[author] += 1

return dict(line_counts)
except subprocess.CalledProcessError as e:
if "no such path" in str(e).lower():
# File doesn't exist in this revision range, which is okay
return None
else:
# Some other error occurred
print(f"Warning: Unable to blame file {fname}. Error: {e}", file=sys.stderr)
return None
34 changes: 2 additions & 32 deletions scripts/blame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import argparse
import os
import subprocess
import sys
from collections import defaultdict
from datetime import datetime
from operator import itemgetter
Expand All @@ -12,6 +10,8 @@
import yaml
from tqdm import tqdm

from aider.stats import get_all_commit_hashes_between_tags, run, hash_len, get_commit_authors, get_counts_for_file

website_files = [
"aider/website/index.html",
"aider/website/share/index.md",
Expand Down Expand Up @@ -68,36 +68,6 @@ def blame(start_tag, end_tag=None):
return all_file_counts, grand_total, total_lines, aider_total, aider_percentage, end_date


def get_all_commit_hashes_between_tags(start_tag, end_tag=None):
if end_tag:
res = run(["git", "rev-list", f"{start_tag}..{end_tag}"])
else:
res = run(["git", "rev-list", f"{start_tag}..HEAD"])

if res:
commit_hashes = res.strip().split("\n")
return commit_hashes


def run(cmd):
# Get all commit hashes since the specified tag
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return result.stdout


def get_commit_authors(commits):
commit_to_author = dict()
for commit in commits:
author = run(["git", "show", "-s", "--format=%an", commit]).strip()
commit_message = run(["git", "show", "-s", "--format=%s", commit]).strip()
if commit_message.lower().startswith("aider:"):
author += " (aider)"
commit_to_author[commit] = author
return commit_to_author


hash_len = len("44e6fefc2")


def process_all_tags_since(start_tag):
tags = get_all_tags_since(start_tag)
Expand Down
Loading