-
Notifications
You must be signed in to change notification settings - Fork 0
/
libwyag.py
1559 lines (1242 loc) · 48.7 KB
/
libwyag.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# This file is part of wyag <https://wyag.thb.lt>
# Copyright (c) 2018-2023 Thibault Polge <thibault@thb.lt>
# All rights reserved
#
# Wyag is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Wyag is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Wyag. If not, see <https://www.gnu.org/licenses/>.
#
import argparse
import collections
import configparser
from datetime import datetime
import grp, pwd
from fnmatch import fnmatch
import hashlib
from math import ceil
import os
import re
import sys
import zlib
argparser = argparse.ArgumentParser(description="The stupidest content tracker")
argsubparsers = argparser.add_subparsers(title="Commands", dest="command")
argsubparsers.required = True
def main(argv=sys.argv[1:]):
args = argparser.parse_args(argv)
match args.command:
case "add" : cmd_add(args)
case "cat-file" : cmd_cat_file(args)
case "check-ignore" : cmd_check_ignore(args)
case "checkout" : cmd_checkout(args)
case "commit" : cmd_commit(args)
case "hash-object" : cmd_hash_object(args)
case "init" : cmd_init(args)
case "log" : cmd_log(args)
case "ls-files" : cmd_ls_files(args)
case "ls-tree" : cmd_ls_tree(args)
case "rev-parse" : cmd_rev_parse(args)
case "rm" : cmd_rm(args)
case "show-ref" : cmd_show_ref(args)
case "status" : cmd_status(args)
case "tag" : cmd_tag(args)
case _ : print("Bad command.")
class GitRepository (object):
"""A git repository"""
worktree = None
gitdir = None
conf = None
def __init__(self, path, force=False):
self.worktree = path
self.gitdir = os.path.join(path, ".git")
if not (force or os.path.isdir(self.gitdir)):
raise Exception("Not a Git repository %s" % path)
# Read configuration file in .git/config
self.conf = configparser.ConfigParser()
cf = repo_file(self, "config")
if cf and os.path.exists(cf):
self.conf.read([cf])
elif not force:
raise Exception("Configuration file missing")
if not force:
vers = int(self.conf.get("core", "repositoryformatversion"))
if vers != 0:
raise Exception("Unsupported repositoryformatversion %s" % vers)
def repo_path(repo, *path):
"""Compute path under repo's gitdir."""
return os.path.join(repo.gitdir, *path)
def repo_file(repo, *path, mkdir=False):
"""Same as repo_path, but create dirname(*path) if absent. For
example, repo_file(r, \"refs\", \"remotes\", \"origin\", \"HEAD\") will create
.git/refs/remotes/origin."""
if repo_dir(repo, *path[:-1], mkdir=mkdir):
return repo_path(repo, *path)
def repo_dir(repo, *path, mkdir=False):
"""Same as repo_path, but mkdir *path if absent if mkdir."""
path = repo_path(repo, *path)
if os.path.exists(path):
if (os.path.isdir(path)):
return path
else:
raise Exception("Not a directory %s" % path)
if mkdir:
os.makedirs(path)
return path
else:
return None
def repo_create(path):
"""Create a new repository at path."""
repo = GitRepository(path, True)
# First, we make sure the path either doesn't exist or is an
# empty dir.
if os.path.exists(repo.worktree):
if not os.path.isdir(repo.worktree):
raise Exception ("%s is not a directory!" % path)
if os.path.exists(repo.gitdir) and os.listdir(repo.gitdir):
raise Exception("%s is not empty!" % path)
else:
os.makedirs(repo.worktree)
assert repo_dir(repo, "branches", mkdir=True)
assert repo_dir(repo, "objects", mkdir=True)
assert repo_dir(repo, "refs", "tags", mkdir=True)
assert repo_dir(repo, "refs", "heads", mkdir=True)
# .git/description
with open(repo_file(repo, "description"), "w") as f:
f.write("Unnamed repository; edit this file 'description' to name the repository.\n")
# .git/HEAD
with open(repo_file(repo, "HEAD"), "w") as f:
f.write("ref: refs/heads/master\n")
with open(repo_file(repo, "config"), "w") as f:
config = repo_default_config()
config.write(f)
return repo
def repo_default_config():
ret = configparser.ConfigParser()
ret.add_section("core")
ret.set("core", "repositoryformatversion", "0")
ret.set("core", "filemode", "false")
ret.set("core", "bare", "false")
return ret
argsp = argsubparsers.add_parser("init", help="Initialize a new, empty repository.")
argsp.add_argument("path",
metavar="directory",
nargs="?",
default=".",
help="Where to create the repository.")
def cmd_init(args):
repo_create(args.path)
def repo_find(path=".", required=True):
path = os.path.realpath(path)
if os.path.isdir(os.path.join(path, ".git")):
return GitRepository(path)
# If we haven't returned, recurse in parent, if w
parent = os.path.realpath(os.path.join(path, ".."))
if parent == path:
# Bottom case
# os.path.join("/", "..") == "/":
# If parent==path, then path is root.
if required:
raise Exception("No git directory.")
else:
return None
# Recursive case
return repo_find(parent, required)
class GitObject (object):
def __init__(self, data=None):
if data != None:
self.deserialize(data)
else:
self.init()
def serialize(self, repo):
"""This function MUST be implemented by subclasses.
It must read the object's contents from self.data, a byte string, and do
whatever it takes to convert it into a meaningful representation. What exactly that means depend on each subclass."""
raise Exception("Unimplemented!")
def deserialize(self, data):
raise Exception("Unimplemented!")
def init(self):
pass # Just do nothing. This is a reasonable default!
def object_read(repo, sha):
"""Read object sha from Git repository repo. Return a
GitObject whose exact type depends on the object."""
path = repo_file(repo, "objects", sha[0:2], sha[2:])
if not os.path.isfile(path):
return None
with open (path, "rb") as f:
raw = zlib.decompress(f.read())
# Read object type
x = raw.find(b' ')
fmt = raw[0:x]
# Read and validate object size
y = raw.find(b'\x00', x)
size = int(raw[x:y].decode("ascii"))
if size != len(raw)-y-1:
raise Exception("Malformed object {0}: bad length".format(sha))
# Pick constructor
match fmt:
case b'commit' : c=GitCommit
case b'tree' : c=GitTree
case b'tag' : c=GitTag
case b'blob' : c=GitBlob
case _:
raise Exception("Unknown type {0} for object {1}".format(fmt.decode("ascii"), sha))
# Call constructor and return object
return c(raw[y+1:])
def object_write(obj, repo=None):
# Serialize object data
data = obj.serialize()
# Add header
result = obj.fmt + b' ' + str(len(data)).encode() + b'\x00' + data
# Compute hash
sha = hashlib.sha1(result).hexdigest()
if repo:
# Compute path
path=repo_file(repo, "objects", sha[0:2], sha[2:], mkdir=True)
if not os.path.exists(path):
with open(path, 'wb') as f:
# Compress and write
f.write(zlib.compress(result))
return sha
class GitBlob(GitObject):
fmt=b'blob'
def serialize(self):
return self.blobdata
def deserialize(self, data):
self.blobdata = data
argsp = argsubparsers.add_parser("cat-file",
help="Provide content of repository objects")
argsp.add_argument("type",
metavar="type",
choices=["blob", "commit", "tag", "tree"],
help="Specify the type")
argsp.add_argument("object",
metavar="object",
help="The object to display")
def cmd_cat_file(args):
repo = repo_find()
cat_file(repo, args.object, fmt=args.type.encode())
def cat_file(repo, obj, fmt=None):
obj = object_read(repo, object_find(repo, obj, fmt=fmt))
sys.stdout.buffer.write(obj.serialize())
argsp = argsubparsers.add_parser(
"hash-object",
help="Compute object ID and optionally creates a blob from a file")
argsp.add_argument("-t",
metavar="type",
dest="type",
choices=["blob", "commit", "tag", "tree"],
default="blob",
help="Specify the type")
argsp.add_argument("-w",
dest="write",
action="store_true",
help="Actually write the object into the database")
argsp.add_argument("path",
help="Read object from <file>")
def cmd_hash_object(args):
if args.write:
repo = repo_find()
else:
repo = None
with open(args.path, "rb") as fd:
sha = object_hash(fd, args.type.encode(), repo)
print(sha)
def object_hash(fd, fmt, repo=None):
""" Hash object, writing it to repo if provided."""
data = fd.read()
# Choose constructor according to fmt argument
match fmt:
case b'commit' : obj=GitCommit(data)
case b'tree' : obj=GitTree(data)
case b'tag' : obj=GitTag(data)
case b'blob' : obj=GitBlob(data)
case _: raise Exception("Unknown type %s!" % fmt)
return object_write(obj, repo)
def kvlm_parse(raw, start=0, dct=None):
if not dct:
dct = collections.OrderedDict()
# You CANNOT declare the argument as dct=OrderedDict() or all
# call to the functions will endlessly grow the same dict.
# This function is recursive: it reads a key/value pair, then call
# itself back with the new position. So we first need to know
# where we are: at a keyword, or already in the messageQ
# We search for the next space and the next newline.
spc = raw.find(b' ', start)
nl = raw.find(b'\n', start)
# If space appears before newline, we have a keyword. Otherwise,
# it's the final message, which we just read to the end of the file.
# Base case
# =========
# If newline appears first (or there's no space at all, in which
# case find returns -1), we assume a blank line. A blank line
# means the remainder of the data is the message. We store it in
# the dictionary, with None as the key, and return.
if (spc < 0) or (nl < spc):
assert nl == start
dct[None] = raw[start+1:]
return dct
# Recursive case
# ==============
# we read a key-value pair and recurse for the next.
key = raw[start:spc]
# Find the end of the value. Continuation lines begin with a
# space, so we loop until we find a "\n" not followed by a space.
end = start
while True:
end = raw.find(b'\n', end+1)
if raw[end+1] != ord(' '): break
# Grab the value
# Also, drop the leading space on continuation lines
value = raw[spc+1:end].replace(b'\n ', b'\n')
# Don't overwrite existing data contents
if key in dct:
if type(dct[key]) == list:
dct[key].append(value)
else:
dct[key] = [ dct[key], value ]
else:
dct[key]=value
return kvlm_parse(raw, start=end+1, dct=dct)
def kvlm_serialize(kvlm):
ret = b''
# Output fields
for k in kvlm.keys():
# Skip the message itself
if k == None: continue
val = kvlm[k]
# Normalize to a list
if type(val) != list:
val = [ val ]
for v in val:
ret += k + b' ' + (v.replace(b'\n', b'\n ')) + b'\n'
# Append message
ret += b'\n' + kvlm[None] + b'\n'
return ret
class GitCommit(GitObject):
fmt=b'commit'
def deserialize(self, data):
self.kvlm = kvlm_parse(data)
def serialize(self):
return kvlm_serialize(self.kvlm)
def init(self):
self.kvlm = dict()
argsp = argsubparsers.add_parser("log", help="Display history of a given commit.")
argsp.add_argument("commit",
default="HEAD",
nargs="?",
help="Commit to start at.")
def cmd_log(args):
repo = repo_find()
print("digraph wyaglog{")
print(" node[shape=rect]")
log_graphviz(repo, object_find(repo, args.commit), set())
print("}")
def log_graphviz(repo, sha, seen):
if sha in seen:
return
seen.add(sha)
commit = object_read(repo, sha)
short_hash = sha[0:8]
message = commit.kvlm[None].decode("utf8").strip()
message = message.replace("\\", "\\\\")
message = message.replace("\"", "\\\"")
if "\n" in message: # Keep only the first line
message = message[:message.index("\n")]
print(" c_{0} [label=\"{1}: {2}\"]".format(sha, sha[0:7], message))
# assert commit.fmt==b'commit'
assert isinstance(commit, GitCommit)
if not b'parent' in commit.kvlm.keys():
# Base case: the initial commit.
return
parents = commit.kvlm[b'parent']
if type(parents) != list:
parents = [ parents ]
for p in parents:
p = p.decode("ascii")
print (" c_{0} -> c_{1};".format(sha, p))
log_graphviz(repo, p, seen)
class GitTreeLeaf (object):
def __init__(self, mode, path, sha):
self.mode = mode
self.path = path
self.sha = sha
def tree_parse_one(raw, start=0):
# Find the space terminator of the mode
x = raw.find(b' ', start)
assert x-start == 5 or x-start==6
# Read the mode
mode = raw[start:x]
if len(mode) == 5:
# Normalize to six bytes.
mode = b" " + mode
# Find the NULL terminator of the path
y = raw.find(b'\x00', x)
# and read the path
path = raw[x+1:y]
# Read the SHA and convert to a hex string
sha = format(int.from_bytes(raw[y+1:y+21], "big"), "040x")
return y+21, GitTreeLeaf(mode, path.decode("utf8"), sha)
def tree_parse(raw):
pos = 0
max = len(raw)
ret = list()
while pos < max:
pos, data = tree_parse_one(raw, pos)
ret.append(data)
return ret
# Notice this isn't a comparison function, but a conversion function.
# Python's default sort doesn't accept a custom comparison function,
# like in most languages, but a `key` arguments that returns a new
# value, which is compared using the default rules. So we just return
# the leaf name, with an extra / if it's a directory.
def tree_leaf_sort_key(leaf):
if leaf.mode.startswith(b"10"):
return leaf.path
else:
return leaf.path + "/"
def tree_serialize(obj):
obj.items.sort(key=tree_leaf_sort_key)
ret = b''
for i in obj.items:
ret += i.mode
ret += b' '
ret += i.path.encode("utf8")
ret += b'\x00'
sha = int(i.sha, 16)
ret += sha.to_bytes(20, byteorder="big")
return ret
class GitTree(GitObject):
fmt=b'tree'
def deserialize(self, data):
self.items = tree_parse(data)
def serialize(self):
return tree_serialize(self)
def init(self):
self.items = list()
argsp = argsubparsers.add_parser("ls-tree", help="Pretty-print a tree object.")
argsp.add_argument("-r",
dest="recursive",
action="store_true",
help="Recurse into sub-trees")
argsp.add_argument("tree",
help="A tree-ish object.")
def cmd_ls_tree(args):
repo = repo_find()
ls_tree(repo, args.tree, args.recursive)
def ls_tree(repo, ref, recursive=None, prefix=""):
sha = object_find(repo, ref, fmt=b"tree")
obj = object_read(repo, sha)
for item in obj.items:
if len(item.mode) == 5:
type = item.mode[0:1]
else:
type = item.mode[0:2]
match type: # Determine the type.
case b'04': type = "tree"
case b'10': type = "blob" # A regular file.
case b'12': type = "blob" # A symlink. Blob contents is link target.
case b'16': type = "commit" # A submodule
case _: raise Exception("Weird tree leaf mode {}".format(item.mode))
if not (recursive and type=='tree'): # This is a leaf
print("{0} {1} {2}\t{3}".format(
"0" * (6 - len(item.mode)) + item.mode.decode("ascii"),
# Git's ls-tree displays the type
# of the object pointed to. We can do that too :)
type,
item.sha,
os.path.join(prefix, item.path)))
else: # This is a branch, recurse
ls_tree(repo, item.sha, recursive, os.path.join(prefix, item.path))
argsp = argsubparsers.add_parser("checkout", help="Checkout a commit inside of a directory.")
argsp.add_argument("commit",
default="HEAD",
nargs="?",
help="The commit or tree to checkout.")
argsp.add_argument("path",
default=".",
nargs="?",
help="The EMPTY directory to checkout on.")
def cmd_checkout(args):
repo = repo_find()
obj = object_read(repo, object_find(repo, args.commit))
# If the object is a commit, we grab its tree
if obj.fmt == b'commit':
obj = object_read(repo, obj.kvlm[b'tree'].decode("ascii"))
# Verify that path is an empty directory
if os.path.exists(args.path):
if not os.path.isdir(args.path):
raise Exception("Not a directory {0}!".format(args.path))
# TODO. may overlap file
# if os.listdir(args.path):
# raise Exception("Not empty {0}!".format(args.path))
else:
os.makedirs(args.path)
tree_checkout(repo, obj, os.path.realpath(args.path))
def tree_checkout(repo, tree, path):
for item in tree.items:
obj = object_read(repo, item.sha)
dest = os.path.join(path, item.path)
if obj.fmt == b'tree':
os.mkdir(dest)
tree_checkout(repo, obj, dest)
elif obj.fmt == b'blob':
# @TODO Support symlinks (identified by mode 12****)
with open(dest, 'wb') as f:
f.write(obj.blobdata)
def ref_resolve(repo, ref):
path = repo_file(repo, ref)
# Sometimes, an indirect reference may be broken. This is normal
# in one specific case: we're looking for HEAD on a new repository
# with no commits. In that case, .git/HEAD points to "ref:
# refs/heads/main", but .git/refs/heads/main doesn't exist yet
# (since there's no commit for it to refer to).
if not os.path.isfile(path):
return None
with open(path, 'r') as fp:
data = fp.read()[:-1]
# Drop final \n ^^^^^
if data.startswith("ref: "):
return ref_resolve(repo, data[5:])
else:
return data
def ref_list(repo, path=None):
if not path:
path = repo_dir(repo, "refs")
ret = collections.OrderedDict()
# Git shows refs sorted. To do the same, we use
# an OrderedDict and sort the output of listdir
for f in sorted(os.listdir(path)):
can = os.path.join(path, f)
if os.path.isdir(can):
ret[f] = ref_list(repo, can)
else:
ret[f] = ref_resolve(repo, can)
return ret
argsp = argsubparsers.add_parser("show-ref", help="List references.")
def cmd_show_ref(args):
repo = repo_find()
refs = ref_list(repo)
show_ref(repo, refs, prefix="refs")
def show_ref(repo, refs, with_hash=True, prefix=""):
for k, v in refs.items():
if type(v) == str:
print ("{0}{1}{2}".format(
v + " " if with_hash else "",
prefix + "/" if prefix else "",
k))
else:
show_ref(repo, v, with_hash=with_hash, prefix="{0}{1}{2}".format(prefix, "/" if prefix else "", k))
class GitTag(GitCommit):
fmt = b'tag'
argsp = argsubparsers.add_parser(
"tag",
help="List and create tags")
argsp.add_argument("-a",
action="store_true",
dest="create_tag_object",
help="Whether to create a tag object")
argsp.add_argument("name",
nargs="?",
help="The new tag's name")
argsp.add_argument("object",
default="HEAD",
nargs="?",
help="The object the new tag will point to")
def cmd_tag(args):
repo = repo_find()
if args.name:
tag_create(repo,
args.name,
args.object,
args.create_tag_object)
else:
refs = ref_list(repo)
show_ref(repo, refs["tags"], with_hash=False)
def tag_create(repo, name, ref, create_tag_object=False):
# get the GitObject from the object reference
sha = object_find(repo, ref)
if create_tag_object:
# create tag object (commit)
tag = GitTag()
tag.kvlm = collections.OrderedDict()
tag.kvlm[b'object'] = sha.encode()
tag.kvlm[b'type'] = b'commit'
tag.kvlm[b'tag'] = name.encode()
# Feel free to let the user give their name!
# Notice you can fix this after commit, read on!
tag.kvlm[b'tagger'] = b'Wyag <wyag@example.com>'
# …and a tag message!
tag.kvlm[None] = b"A tag generated by wyag, which won't let you customize the message!"
tag_sha = object_write(tag, repo)
# create reference
ref_create(repo, "tags/" + name, tag_sha)
else:
# create lightweight tag (ref)
ref_create(repo, "tags/" + name, sha)
def ref_create(repo, ref_name, sha):
with open(repo_file(repo, "refs/" + ref_name), 'w') as fp:
fp.write(sha + "\n")
def object_resolve(repo, name):
"""Resolve name to an object hash in repo.
This function is aware of:
- the HEAD literal
- short and long hashes
- tags
- branches
- remote branches"""
candidates = list()
hashRE = re.compile(r"^[0-9A-Fa-f]{4,40}$")
# Empty string? Abort.
if not name.strip():
return None
# Head is nonambiguous
if name == "HEAD":
return [ ref_resolve(repo, "HEAD") ]
# If it's a hex string, try for a hash.
if hashRE.match(name):
# This may be a hash, either small or full. 4 seems to be the
# minimal length for git to consider something a short hash.
# This limit is documented in man git-rev-parse
name = name.lower()
prefix = name[0:2]
path = repo_dir(repo, "objects", prefix, mkdir=False)
if path:
rem = name[2:]
for f in os.listdir(path):
if f.startswith(rem):
# Notice a string startswith() itself, so this
# works for full hashes.
candidates.append(prefix + f)
# Try for references.
as_tag = ref_resolve(repo, "refs/tags/" + name)
if as_tag: # Did we find a tag?
candidates.append(as_tag)
as_branch = ref_resolve(repo, "refs/heads/" + name)
if as_branch: # Did we find a branch?
candidates.append(as_branch)
return candidates
def object_find(repo, name, fmt=None, follow=True):
sha = object_resolve(repo, name)
if not sha:
raise Exception("No such reference {0}.".format(name))
if len(sha) > 1:
raise Exception("Ambiguous reference {0}: Candidates are:\n - {1}.".format(name, "\n - ".join(sha)))
sha = sha[0]
if not fmt:
return sha
while True:
obj = object_read(repo, sha)
# ^^^^^^^^^^^ < this is a bit agressive: we're reading
# the full object just to get its type. And we're doing
# that in a loop, albeit normally short. Don't expect
# high performance here.
if obj.fmt == fmt:
return sha
if not follow:
return None
# Follow tags
if obj.fmt == b'tag':
sha = obj.kvlm[b'object'].decode("ascii")
elif obj.fmt == b'commit' and fmt == b'tree':
sha = obj.kvlm[b'tree'].decode("ascii")
else:
return None
argsp = argsubparsers.add_parser(
"rev-parse",
help="Parse revision (or other objects) identifiers")
argsp.add_argument("--wyag-type",
metavar="type",
dest="type",
choices=["blob", "commit", "tag", "tree"],
default=None,
help="Specify the expected type")
argsp.add_argument("name",
help="The name to parse")
def cmd_rev_parse(args):
if args.type:
fmt = args.type.encode()
else:
fmt = None
repo = repo_find()
print (object_find(repo, args.name, fmt, follow=True))
class GitIndexEntry (object):
def __init__(self, ctime=None, mtime=None, dev=None, ino=None,
mode_type=None, mode_perms=None, uid=None, gid=None,
fsize=None, sha=None, flag_assume_valid=None,
flag_stage=None, name=None):
# The last time a file's metadata changed. This is a pair
# (timestamp in seconds, nanoseconds)
self.ctime = ctime
# The last time a file's data changed. This is a pair
# (timestamp in seconds, nanoseconds)
self.mtime = mtime
# The ID of device containing this file
self.dev = dev
# The file's inode number
self.ino = ino
# The object type, either b1000 (regular), b1010 (symlink),
# b1110 (gitlink).
self.mode_type = mode_type
# The object permissions, an integer.
self.mode_perms = mode_perms
# User ID of owner
self.uid = uid
# Group ID of ownner
self.gid = gid
# Size of this object, in bytes
self.fsize = fsize
# The object's SHA
self.sha = sha
self.flag_assume_valid = flag_assume_valid
self.flag_stage = flag_stage
# Name of the object (full path this time!)
self.name = name
class GitIndex (object):
version = None
entries = []
# ext = None
# sha = None
def __init__(self, version=2, entries=None):
if not entries:
entries = list()
self.version = version
self.entries = entries
def index_read(repo):
index_file = repo_file(repo, "index")
# New repositories have no index!
if not os.path.exists(index_file):
return GitIndex()
with open(index_file, 'rb') as f:
raw = f.read()
header = raw[:12]
signature = header[:4]
assert signature == b"DIRC" # Stands for "DirCache"
version = int.from_bytes(header[4:8], "big")
assert version == 2, "wyag only supports index file version 2"
count = int.from_bytes(header[8:12], "big")
entries = list()
content = raw[12:]
idx = 0
for i in range(0, count):
# Read creation time, as a unix timestamp (seconds since
# 1970-01-01 00:00:00, the "epoch")
ctime_s = int.from_bytes(content[idx: idx+4], "big")
# Read creation time, as nanoseconds after that timestamps,
# for extra precision.
ctime_ns = int.from_bytes(content[idx+4: idx+8], "big")
# Same for modification time: first seconds from epoch.
mtime_s = int.from_bytes(content[idx+8: idx+12], "big")
# Then extra nanoseconds
mtime_ns = int.from_bytes(content[idx+12: idx+16], "big")
# Device ID
dev = int.from_bytes(content[idx+16: idx+20], "big")
# Inode
ino = int.from_bytes(content[idx+20: idx+24], "big")
# Ignored.
unused = int.from_bytes(content[idx+24: idx+26], "big")
assert 0 == unused
mode = int.from_bytes(content[idx+26: idx+28], "big")
mode_type = mode >> 12
assert mode_type in [0b1000, 0b1010, 0b1110]
mode_perms = mode & 0b0000000111111111
# User ID
uid = int.from_bytes(content[idx+28: idx+32], "big")
# Group ID
gid = int.from_bytes(content[idx+32: idx+36], "big")
# Size
fsize = int.from_bytes(content[idx+36: idx+40], "big")
# SHA (object ID). We'll store it as a lowercase hex string
# for consistency.
sha = format(int.from_bytes(content[idx+40: idx+60], "big"), "040x")
# Flags we're going to ignore
flags = int.from_bytes(content[idx+60: idx+62], "big")
# Parse flags
flag_assume_valid = (flags & 0b1000000000000000) != 0
flag_extended = (flags & 0b0100000000000000) != 0
assert not flag_extended
flag_stage = flags & 0b0011000000000000
# Length of the name. This is stored on 12 bits, some max
# value is 0xFFF, 4095. Since names can occasionally go
# beyond that length, git treats 0xFFF as meaning at least
# 0xFFF, and looks for the final 0x00 to find the end of the
# name --- at a small, and probably very rare, performance
# cost.
name_length = flags & 0b0000111111111111
# We've read 62 bytes so far.
idx += 62
if name_length < 0xFFF:
assert content[idx + name_length] == 0x00
raw_name = content[idx:idx+name_length]
idx += name_length + 1
else:
print("Notice: Name is 0x{:X} bytes long.".format(name_length))
# This probably wasn't tested enough. It works with a
# path of exactly 0xFFF bytes. Any extra bytes broke
# something between git, my shell and my filesystem.
null_idx = content.find(b'\x00', idx + 0xFFF)
raw_name = content[idx: null_idx]
idx = null_idx + 1
# Just parse the name as utf8.
name = raw_name.decode("utf8")
# Data is padded on multiples of eight bytes for pointer
# alignment, so we skip as many bytes as we need for the next
# read to start at the right position.
idx = 8 * ceil(idx / 8)
# And we add this entry to our list.
entries.append(GitIndexEntry(ctime=(ctime_s, ctime_ns),
mtime=(mtime_s, mtime_ns),
dev=dev,
ino=ino,
mode_type=mode_type,
mode_perms=mode_perms,