-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathgit_tracker.py
More file actions
165 lines (140 loc) · 4.96 KB
/
git_tracker.py
File metadata and controls
165 lines (140 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# mcp-codebase-index - Structural codebase indexer with MCP server
# Copyright (C) 2026 Michael Doyle
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# Commercial licensing available. See COMMERCIAL-LICENSE.md for details.
"""Git change detection for incremental re-indexing."""
from __future__ import annotations
import subprocess
from dataclasses import dataclass, field
@dataclass
class GitChangeSet:
"""Set of files changed since a given git ref."""
modified: list[str] = field(default_factory=list)
added: list[str] = field(default_factory=list)
deleted: list[str] = field(default_factory=list)
@property
def is_empty(self) -> bool:
return not self.modified and not self.added and not self.deleted
def is_git_repo(root_path: str) -> bool:
"""Check if the given path is inside a git work tree."""
try:
result = subprocess.run(
["git", "rev-parse", "--is-inside-work-tree"],
cwd=root_path,
capture_output=True,
text=True,
timeout=10,
)
return result.returncode == 0 and result.stdout.strip() == "true"
except (FileNotFoundError, subprocess.TimeoutExpired):
return False
def get_head_commit(root_path: str) -> str | None:
"""Get the current HEAD commit hash."""
try:
result = subprocess.run(
["git", "rev-parse", "HEAD"],
cwd=root_path,
capture_output=True,
text=True,
timeout=10,
)
if result.returncode == 0:
return result.stdout.strip()
return None
except (FileNotFoundError, subprocess.TimeoutExpired):
return None
def get_changed_files(root_path: str, since_ref: str | None) -> GitChangeSet:
"""Get files changed since a given git ref.
Combines committed changes (since_ref..HEAD), staged changes,
unstaged changes, and untracked files into a single GitChangeSet.
"""
if since_ref is None:
return GitChangeSet()
modified: set[str] = set()
added: set[str] = set()
deleted: set[str] = set()
# 1. Committed changes since the ref
_parse_diff_output(root_path, ["git", "diff", "--name-status", since_ref, "HEAD"],
modified, added, deleted)
# 2. Unstaged changes
_parse_diff_output(root_path, ["git", "diff", "--name-status"],
modified, added, deleted)
# 3. Staged changes
_parse_diff_output(root_path, ["git", "diff", "--name-status", "--cached"],
modified, added, deleted)
# 4. Untracked files
try:
result = subprocess.run(
["git", "ls-files", "--others", "--exclude-standard"],
cwd=root_path,
capture_output=True,
text=True,
timeout=10,
)
if result.returncode == 0:
for line in result.stdout.strip().splitlines():
path = line.strip()
if path:
added.add(path)
except (FileNotFoundError, subprocess.TimeoutExpired):
pass
# Resolve overlaps: file in both added and deleted → modified
overlap = added & deleted
modified |= overlap
added -= overlap
deleted -= overlap
return GitChangeSet(
modified=sorted(modified),
added=sorted(added),
deleted=sorted(deleted),
)
def _parse_diff_output(
root_path: str,
cmd: list[str],
modified: set[str],
added: set[str],
deleted: set[str],
) -> None:
"""Parse git diff --name-status output into modified/added/deleted sets."""
try:
result = subprocess.run(
cmd,
cwd=root_path,
capture_output=True,
text=True,
timeout=10,
)
if result.returncode != 0:
return
except (FileNotFoundError, subprocess.TimeoutExpired):
return
for line in result.stdout.strip().splitlines():
parts = line.split("\t")
if len(parts) < 2:
continue
status = parts[0]
path = parts[1]
if status == "M":
modified.add(path)
elif status == "A":
added.add(path)
elif status == "D":
deleted.add(path)
elif status.startswith("R"):
# Rename: delete old path, add new path
deleted.add(path)
if len(parts) >= 3:
added.add(parts[2])