Skip to content

Commit 47ee254

Browse files
committed
Duplicate parent lowlevel
1 parent 22d8ed1 commit 47ee254

File tree

3 files changed

+122
-46
lines changed

3 files changed

+122
-46
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
Git does not let a commit have twice the same parent, but GitHub does, and normally shows it.
5+
But as of 2016-05-17 they didn't page this edge case, and it 502's the commit for large numbers of links.
6+
"""
7+
8+
import itertools
9+
10+
import util
11+
12+
util.init()
13+
14+
tree = util.create_tree_with_one_file()
15+
commit, _, _ = util.save_commit_object(tree, author_name=b'a')
16+
commit, _, _ = util.save_commit_object(tree, itertools.repeat(commit, 1000000), author_name=b'b')
17+
18+
# Finish.
19+
util.create_master(commit)
20+
util.clone()
Lines changed: 14 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,54 @@
11
#!/usr/bin/env python3
22

33
"""
4-
Create objects very manually. Goals:
5-
6-
- learn the object file
7-
- be faster than Git to generate large repos
4+
Create objects very manually to try and be faster than Git to generate large repos.
85
"""
96

10-
import datetime
11-
import subprocess
12-
import hashlib
13-
import zlib
14-
import os
15-
167
import util
178

189
util.init()
19-
git_dir = b'.git'
20-
objects_dir = os.path.join(git_dir, b'objects')
2110

2211
# Directory parameters.
2312
blob_content = b'a'
2413
blob_basename = b'a'
2514
blob_mode = b'100644'
2615

16+
# Commit parameters.
2717
name = b'a'
2818
email = b'a@a.com'
2919
# 2000-01-01T00:00:00+0000
3020
date = b'946684800 +0000'
31-
3221
author_date = date
3322
author_email = email
3423
author_name = name
3524
committer_date = date
3625
committer_email = email
3726
committer_name = name
3827
message = b'a'
39-
40-
def get_object_and_sha(obj_type, content):
41-
obj = b'%s %s\0%s' % (obj_type, str(len(content)).encode('ascii'), content)
42-
hash = hashlib.sha1(obj)
43-
return (obj, hash.hexdigest().encode('ascii'), hash.digest())
44-
45-
def save_object(obj_type, content):
46-
obj, sha_ascii, sha = get_object_and_sha(obj_type, content)
47-
obj_dir = os.path.join(objects_dir, sha_ascii[:2])
48-
obj_path = os.path.join(obj_dir, sha_ascii[2:])
49-
os.makedirs(obj_dir, exist_ok=True)
50-
with open(obj_path, 'wb') as f:
51-
f.write(zlib.compress(obj))
52-
53-
def get_git_hash_object(obj_type, input):
54-
cmd = [b'git', b'hash-object', b'--stdin', b'-t', obj_type]
55-
return subprocess.check_output(cmd, input=input).rstrip()
28+
# ASCII hex of parents.
29+
parents = ()
5630

5731
# Blob.
58-
save_object(b'blob', blob_content)
59-
obj, blob_sha_ascii, blob_sha = get_object_and_sha(b'blob', blob_content)
32+
blob_sha_ascii, blob_sha = util.save_object(b'blob', blob_content)
6033
# Check sha matches Git.
61-
blob_sha_git = get_git_hash_object(b'blob', blob_content)
34+
blob_sha_git = util.get_git_hash_object(b'blob', blob_content)
6235
assert blob_sha_ascii == blob_sha_git
6336

6437
# Tree.
65-
tree_content = b'%s %s\0%s' % (blob_mode, blob_basename, blob_sha)
66-
save_object(b'tree', tree_content)
38+
tree_sha_ascii, tree_sha, tree_content = util.save_tree_object(blob_mode, blob_basename, blob_sha)
6739
# Check sha matches Git.
68-
obj, tree_sha_ascii, tree_sha = get_object_and_sha(b'tree', tree_content)
69-
tree_sha_git = get_git_hash_object(b'tree', tree_content)
40+
tree_sha_git = util.get_git_hash_object(b'tree', tree_content)
7041
assert tree_sha_ascii == tree_sha_git
7142

7243
# Commit.
73-
commit_content = b'tree %s\nauthor %s <%s> %s\ncommitter %s <%s> %s\n\n%s\n' % (
74-
tree_sha_ascii,
44+
commit_sha_ascii, commit_sha, commit_content = util.save_commit_object(
45+
tree_sha_ascii, parents,
7546
author_name, author_email, author_date,
7647
committer_name, committer_email, committer_date,
7748
message)
78-
save_object(b'commit', commit_content)
79-
# Check sha matches Git.
80-
obj, commit_sha_ascii, commit_sha = get_object_and_sha(b'commit', commit_content)
81-
commit_sha_git = get_git_hash_object(b'commit', commit_content)
49+
commit_sha_git = util.get_git_hash_object(b'commit', commit_content)
8250
assert commit_sha_ascii == commit_sha_git
8351

84-
# Create master branch.
85-
subprocess.check_output(['git', 'branch', 'master', commit_sha_ascii])
86-
subprocess.check_output(['git', 'clone', '.', '../clone.tmp'])
52+
# Finish.
53+
util.create_master(commit_sha_ascii)
54+
util.clone()

other-test-repos/util.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,33 @@
1+
import hashlib
12
import os
23
import shutil
34
import subprocess
5+
import zlib
6+
7+
git_dir = b'.git'
8+
objects_dir = os.path.join(git_dir, b'objects')
9+
10+
# Tree parameters.
11+
default_blob_basename = b'a'
12+
default_blob_content = b'a'
13+
default_blob_mode = b'100644'
14+
15+
# Commit parameters.
16+
default_name = b'a'
17+
default_email = b'a@a.com'
18+
# 2000-01-01T00:00:00+0000
19+
default_date_s = 946684800
20+
default_date_format = b'%s +0000'
21+
default_date = b'%s +0000' % str(default_date_s).encode('ascii')
22+
default_author_date = default_date
23+
default_author_email = default_email
24+
default_author_name = default_name
25+
default_committer_date = default_date
26+
default_committer_email = default_email
27+
default_committer_name = default_name
28+
default_message = b'a'
29+
# ASCII hex of parents.
30+
default_parents = ()
431

532
def init():
633
repo = 'repo.tmp'
@@ -9,3 +36,64 @@ def init():
936
os.mkdir(repo)
1037
os.chdir(repo)
1138
subprocess.check_output(['git', 'init', '-q'])
39+
40+
def get_object_and_sha(obj_type, content):
41+
obj = b'%s %s\0%s' % (obj_type, str(len(content)).encode('ascii'), content)
42+
hash = hashlib.sha1(obj)
43+
return (obj, hash.hexdigest().encode('ascii'), hash.digest())
44+
45+
def save_object(obj_type, content):
46+
obj, sha_ascii, sha = get_object_and_sha(obj_type, content)
47+
obj_dir = os.path.join(objects_dir, sha_ascii[:2])
48+
obj_path = os.path.join(obj_dir, sha_ascii[2:])
49+
os.makedirs(obj_dir, exist_ok=True)
50+
with open(obj_path, 'wb') as f:
51+
f.write(zlib.compress(obj))
52+
return sha_ascii, sha
53+
54+
# TODO multiple children object.
55+
def save_tree_object(mode, basename, sha):
56+
tree_content = b'%s %s\0%s' % (mode, basename, sha)
57+
return save_object(b'tree', tree_content) + (tree_content,)
58+
59+
def save_commit_object(
60+
tree_sha_ascii,
61+
parents=default_parents,
62+
author_name=default_author_name,
63+
author_email=default_author_email,
64+
author_date=default_author_date,
65+
committer_name=default_committer_name,
66+
committer_email=default_committer_email,
67+
committer_date=default_committer_date,
68+
message=default_message):
69+
if parents:
70+
parents_bytes = b''
71+
sep = b'\nparent '
72+
parents_bytes = sep + sep.join(parents) + b'\n'
73+
else:
74+
parents_bytes = b'\n'
75+
commit_content = b'tree %s%sauthor %s <%s> %s\ncommitter %s <%s> %s\n\n%s\n' % (
76+
tree_sha_ascii, parents_bytes,
77+
author_name, author_email, author_date,
78+
committer_name, committer_email, committer_date,
79+
message)
80+
return save_object(b'commit', commit_content) + (commit_content,)
81+
82+
def get_git_hash_object(obj_type, input):
83+
cmd = [b'git', b'hash-object', b'--stdin', b'-t', obj_type]
84+
return subprocess.check_output(cmd, input=input).rstrip()
85+
86+
def create_master(commit_sha_ascii):
87+
subprocess.check_output(['git', 'branch', 'master', commit_sha_ascii])
88+
89+
def clone():
90+
subprocess.check_output(['git', 'clone', '-q', '.', '../clone.tmp'])
91+
92+
def create_tree_with_one_file(
93+
blob_mode=default_blob_mode,
94+
blob_basename=default_blob_basename,
95+
blob_content=default_blob_content,
96+
):
97+
blob_sha_ascii, blob_sha = save_object(b'blob', blob_content)
98+
tree_sha_ascii, tree_sha, tree_content = save_tree_object(blob_mode, blob_basename, blob_sha)
99+
return tree_sha_ascii

0 commit comments

Comments
 (0)