Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve directory mtime support #61

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 65 additions & 14 deletions git-restore-mtime
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,37 @@ def get_mtime_ns(secs: int, idx: int):
def get_mtime_path(path):
return os.path.getmtime(path)

class DirectoryMtimes(object):
"""
Track mtimes for path.
Lets the newer mtime bubble up to the parent entries.
"""

def __init__(self):
self.mtimes = {}

def set_path_mtime(self, path, mtime):
"""Set mtime of path. Also updates parent paths if mtime is newer."""
path_parts = os.path.normpath(path).split(os.sep)

# Update mtimes for path and parent paths.
# Note: '' is a valid path we track an mtime for
while True:
key = tuple(path_parts)
current_mtime = self.mtimes.get(key)
if current_mtime is None or current_mtime < mtime:
self.mtimes[key] = mtime
else:
# Assume parent mtimes are already newer
break

if len(path_parts) == 0: break
path_parts = path_parts[:-1]

def get_path_mtime(self, path):
"""Get mtime recorded for a path"""
path_parts = os.path.normpath(path).split(os.sep)
return self.mtimes.get(tuple(path_parts))

# Git class and parse_log(), the heart of the script ##########################

Expand Down Expand Up @@ -366,8 +397,19 @@ class Git:
class Error(subprocess.CalledProcessError):
"""Error from git executable"""

def touch_dir(dirname, mtime, datestr, stats, git):
if args.debug:
log.debug("%d\t%d\t-\t%s\t%s",
stats['loglines'], stats['commits'],
datestr, "{}/".format(dirname or '.'))
try:
touch(os.path.join(git.workdir, dirname), mtime)
stats['dirtouches'] += 1
except Exception as e:
log.error("ERROR: %s: %s", e, dirname)
stats['direrrors'] += 1

def parse_log(filelist, dirlist, stats, git, merge=False, filterlist=None):
def parse_log(filelist, dirlist, dir_times, stats, git, merge=False, filterlist=None):
mtime = 0
datestr = isodate(0)
for line in git.log(
Expand Down Expand Up @@ -425,27 +467,20 @@ def parse_log(filelist, dirlist, stats, git, merge=False, filterlist=None):
stats['errors'] += 1

def do_dir():
if args.debug:
log.debug("%d\t%d\t-\t%s\t%s",
stats['loglines'], stats['commits'],
datestr, "{}/".format(dirname or '.'))
try:
touch(os.path.join(git.workdir, dirname), mtime)
stats['dirtouches'] += 1
except Exception as e:
log.error("ERROR: %s: %s", e, dirname)
stats['direrrors'] += 1
touch_dir(dirname, mtime, datestr, stats, git)

if file in filelist:
stats['files'] -= 1
filelist.remove(file)
do_file()
dir_times.set_path_mtime(file, mtime)

if args.dirs and status in ('A', 'D'):
dirname = os.path.dirname(file)
if dirname in dirlist:
dirlist.remove(dirname)
do_dir()
dir_times.set_path_mtime(dirname, mtime)

# All files done?
if not stats['files']:
Expand Down Expand Up @@ -516,7 +551,15 @@ def main():
filelist -= dirty

# Build dir list to be processed
dirlist = set(os.path.dirname(_) for _ in filelist) if args.dirs else set()
# For every file name, also add _all_ parent directories to set, so we also consider directories
# with just subdirectories, but no files.
dirlist = set()
if args.dirs:
for fn in filelist:
current_dir = os.path.dirname(fn)
while current_dir != "":
dirlist.add(current_dir)
current_dir = os.path.dirname(current_dir)

stats['totalfiles'] = stats['files'] = len(filelist)
log.info("{0:,} files to be processed in work dir".format(stats['totalfiles']))
Expand All @@ -525,9 +568,11 @@ def main():
# Nothing to do. Exit silently and without errors, just like git does
return

dir_times = DirectoryMtimes()

# Process the log until all files are 'touched'
log.debug("Line #\tLog #\tF.Left\tModification Time\tFile Name")
parse_log(filelist, dirlist, stats, git, args.merge, args.pathspec)
parse_log(filelist, dirlist, dir_times, stats, git, args.merge, args.pathspec)

# Missing files
if filelist:
Expand All @@ -538,13 +583,19 @@ def main():
missing = len(filterlist)
log.info("{0:,} files not found in log, trying merge commits".format(missing))
for i in range(0, missing, STEPMISSING):
parse_log(filelist, dirlist, stats, git,
parse_log(filelist, dirlist, dir_times, stats, git,
merge=True, filterlist=filterlist[i:i + STEPMISSING])

# Still missing some?
for file in filelist:
log.warning("WARNING: not found in the log: %s", file)

# Directories not appearing in log
for dir in dirlist:
dir_mtime = dir_times.get_path_mtime(dir)
if dir_mtime is not None:
touch_dir(dir, dir_mtime, isodate(dir_mtime), stats, git)

# Final statistics
# Suggestion: use git-log --before=mtime to brag about skipped log entries
def log_info(msg, *a, width=13):
Expand Down