Skip to content

Commit c0d2eeb

Browse files
authored
enhance: add functionality to only checkout a subdirectory of the repo (hashicorp#1)
This is useful for module references like: `github.com/terraform-aws-modules/terraform-aws-rds//modules/db_instance`, and means the whole repository doesn't need to be pulled if just a subdirectory is needed. This doesn't handle any references within that submodule. For example, if it references modules located in other directories in the repo or if it symlinks into any other directories in the repo.
1 parent 4f07d24 commit c0d2eeb

File tree

2 files changed

+122
-7
lines changed

2 files changed

+122
-7
lines changed

get_git.go

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,9 @@ func (g *GitGetter) Get(dst string, u *url.URL) error {
6464
}
6565

6666
// Extract some query parameters we use
67-
var ref, sshKey string
67+
var ref, sshKey, subdir string
6868
depth := 0 // 0 means "don't use shallow clone"
69+
6970
q := u.Query()
7071
if len(q) > 0 {
7172
ref = q.Get("ref")
@@ -74,6 +75,12 @@ func (g *GitGetter) Get(dst string, u *url.URL) error {
7475
sshKey = q.Get("sshkey")
7576
q.Del("sshkey")
7677

78+
subdir = q.Get("subdir")
79+
q.Del("subdir")
80+
if subdir != "" {
81+
depth = 1
82+
}
83+
7784
if n, err := strconv.Atoi(q.Get("depth")); err == nil {
7885
depth = n
7986
}
@@ -127,7 +134,7 @@ func (g *GitGetter) Get(dst string, u *url.URL) error {
127134
if err == nil {
128135
err = g.update(ctx, dst, sshKeyFile, u, ref, depth)
129136
} else {
130-
err = g.clone(ctx, dst, sshKeyFile, u, ref, depth)
137+
err = g.clone(ctx, dst, sshKeyFile, u, ref, depth, subdir)
131138
}
132139
if err != nil {
133140
return err
@@ -189,17 +196,27 @@ func (g *GitGetter) checkout(ctx context.Context, dst string, ref string) error
189196
// positives on short branch names that happen to also be "hex words".
190197
var gitCommitIDRegex = regexp.MustCompile("^[0-9a-fA-F]{7,40}$")
191198

192-
func (g *GitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.URL, ref string, depth int) error {
199+
func (g *GitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.URL, ref string, depth int, subdir string) error {
193200
args := []string{"clone"}
194201

202+
isCommitID := gitCommitIDRegex.MatchString(ref)
203+
195204
originalRef := ref // we handle an unspecified ref differently than explicitly selecting the default branch below
196205
if ref == "" {
197206
ref = findRemoteDefaultBranch(ctx, u)
198207
}
199208
if depth > 0 {
200209
args = append(args, "--depth", strconv.Itoa(depth))
201-
args = append(args, "--branch", ref)
210+
if subdir == "" || !isCommitID {
211+
args = append(args, "--branch", ref)
212+
}
202213
}
214+
if subdir != "" {
215+
args = append(args, "--filter=blob:none")
216+
args = append(args, "--sparse")
217+
args = append(args, "--no-checkout")
218+
}
219+
203220
args = append(args, "--", u.String(), dst)
204221

205222
cmd := exec.CommandContext(ctx, "git", args...)
@@ -212,13 +229,33 @@ func (g *GitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.UR
212229
// We can't accurately recognize the resulting error here without
213230
// hard-coding assumptions about git's human-readable output, but
214231
// we can at least try a heuristic.
215-
if gitCommitIDRegex.MatchString(originalRef) {
232+
if isCommitID {
216233
return fmt.Errorf("%w (note that setting 'depth' requires 'ref' to be a branch or tag name)", err)
217234
}
218235
}
219236
return err
220237
}
221238

239+
if subdir != "" {
240+
cmd = exec.CommandContext(ctx, "git", "sparse-checkout", "set", subdir)
241+
cmd.Dir = dst
242+
err = getRunCommand(cmd)
243+
if err != nil {
244+
return err
245+
}
246+
247+
if isCommitID {
248+
cmd = exec.CommandContext(ctx, "git", "fetch", "origin", ref, "--depth", "1")
249+
cmd.Dir = dst
250+
err = getRunCommand(cmd)
251+
if err != nil {
252+
return err
253+
}
254+
}
255+
256+
return g.checkout(ctx, dst, ref)
257+
}
258+
222259
if depth < 1 && originalRef != "" {
223260
// If we didn't add --depth and --branch above then we will now be
224261
// on the remote repository's default branch, rather than the selected

get_git_test.go

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -892,7 +892,7 @@ func TestGitGetter_BadGitConfig(t *testing.T) {
892892
err = g.update(ctx, dst, testGitToken, url, "main", 1)
893893
} else {
894894
// Clone a repository with a git config file
895-
err = g.clone(ctx, dst, testGitToken, url, "main", 1)
895+
err = g.clone(ctx, dst, testGitToken, url, "main", 1, "")
896896
if err != nil {
897897
t.Fatalf(err.Error())
898898
}
@@ -950,7 +950,7 @@ func TestGitGetter_BadGitDirName(t *testing.T) {
950950
}
951951
} else {
952952
// Clone a repository with a git directory
953-
err = g.clone(ctx, dst, testGitToken, url, "main", 1)
953+
err = g.clone(ctx, dst, testGitToken, url, "main", 1, "")
954954
if err != nil {
955955
t.Fatalf(err.Error())
956956
}
@@ -984,6 +984,77 @@ func TestGitGetter_BadGitDirName(t *testing.T) {
984984
}
985985
}
986986

987+
func TestGitGetter_sparseCheckout(t *testing.T) {
988+
if !testHasGit {
989+
t.Skip("git not found, skipping")
990+
}
991+
992+
g := new(GitGetter)
993+
dst := tempDir(t)
994+
995+
repo := testGitRepo(t, "sparse-checkout")
996+
repo.commitFile("subdir1/file1.txt", "hello")
997+
repo.commitFile("subdir2/file2.txt", "world")
998+
999+
q := repo.url.Query()
1000+
q.Add("subdir", "subdir1")
1001+
repo.url.RawQuery = q.Encode()
1002+
1003+
if err := g.Get(dst, repo.url); err != nil {
1004+
t.Fatalf("err: %s", err)
1005+
}
1006+
1007+
// Verify the file in subdir1 exists
1008+
mainPath := filepath.Join(dst, "subdir1/file1.txt")
1009+
if _, err := os.Stat(mainPath); err != nil {
1010+
t.Fatalf("err: %s", err)
1011+
}
1012+
1013+
// Verify the file in subdir2 does not exist
1014+
mainPath = filepath.Join(dst, "subdir2/file2.txt")
1015+
if _, err := os.Stat(mainPath); err == nil {
1016+
t.Fatalf("expected subdir2 file to not exist")
1017+
}
1018+
}
1019+
1020+
func TestGitGetter_sparseCheckoutWithCommitID(t *testing.T) {
1021+
if !testHasGit {
1022+
t.Skip("git not found, skipping")
1023+
}
1024+
1025+
g := new(GitGetter)
1026+
dst := tempDir(t)
1027+
1028+
repo := testGitRepo(t, "sparse-checkout-commit-id")
1029+
repo.commitFile("subdir1/file1.txt", "hello")
1030+
repo.commitFile("subdir2/file2.txt", "world")
1031+
commitID, err := repo.latestCommit()
1032+
if err != nil {
1033+
t.Fatal(err)
1034+
}
1035+
1036+
q := repo.url.Query()
1037+
q.Add("ref", commitID)
1038+
q.Add("subdir", "subdir1")
1039+
repo.url.RawQuery = q.Encode()
1040+
1041+
if err := g.Get(dst, repo.url); err != nil {
1042+
t.Fatalf("err: %s", err)
1043+
}
1044+
1045+
// Verify the file in subdir1 exists
1046+
mainPath := filepath.Join(dst, "subdir1/file1.txt")
1047+
if _, err := os.Stat(mainPath); err != nil {
1048+
t.Fatalf("err: %s", err)
1049+
}
1050+
1051+
// Verify the file in subdir2 does not exist
1052+
mainPath = filepath.Join(dst, "subdir2/file2.txt")
1053+
if _, err := os.Stat(mainPath); err == nil {
1054+
t.Fatalf("expected subdir2 file to not exist")
1055+
}
1056+
}
1057+
9871058
// gitRepo is a helper struct which controls a single temp git repo.
9881059
type gitRepo struct {
9891060
t *testing.T
@@ -1035,6 +1106,13 @@ func (r *gitRepo) git(args ...string) {
10351106
// commitFile writes and commits a text file to the repo.
10361107
func (r *gitRepo) commitFile(file, content string) {
10371108
path := filepath.Join(r.dir, file)
1109+
1110+
// Ensure the directory structure exists
1111+
dir := filepath.Dir(path)
1112+
if err := os.MkdirAll(dir, 0755); err != nil {
1113+
r.t.Fatal(err)
1114+
}
1115+
10381116
if err := ioutil.WriteFile(path, []byte(content), 0600); err != nil {
10391117
r.t.Fatal(err)
10401118
}

0 commit comments

Comments
 (0)