|
8 | 8 | from gitingest.config import DEFAULT_TIMEOUT
|
9 | 9 | from gitingest.utils.git_utils import (
|
10 | 10 | check_repo_exists,
|
| 11 | + checkout_partial_clone, |
11 | 12 | create_git_auth_header,
|
12 | 13 | create_git_command,
|
13 | 14 | ensure_git_installed,
|
14 | 15 | is_github_host,
|
| 16 | + resolve_commit, |
15 | 17 | run_command,
|
16 | 18 | )
|
17 |
| -from gitingest.utils.os_utils import ensure_directory |
| 19 | +from gitingest.utils.os_utils import ensure_directory_exists_or_create |
18 | 20 | from gitingest.utils.timeout_wrapper import async_timeout
|
19 | 21 |
|
20 | 22 | if TYPE_CHECKING:
|
@@ -45,71 +47,42 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
|
45 | 47 | # Extract and validate query parameters
|
46 | 48 | url: str = config.url
|
47 | 49 | local_path: str = config.local_path
|
48 |
| - commit: str | None = config.commit |
49 |
| - branch: str | None = config.branch |
50 |
| - tag: str | None = config.tag |
51 | 50 | partial_clone: bool = config.subpath != "/"
|
52 | 51 |
|
53 |
| - # Create parent directory if it doesn't exist |
54 |
| - await ensure_directory(Path(local_path).parent) |
| 52 | + await ensure_git_installed() |
| 53 | + await ensure_directory_exists_or_create(Path(local_path).parent) |
55 | 54 |
|
56 |
| - # Check if the repository exists |
57 | 55 | if not await check_repo_exists(url, token=token):
|
58 | 56 | msg = "Repository not found. Make sure it is public or that you have provided a valid token."
|
59 | 57 | raise ValueError(msg)
|
60 | 58 |
|
| 59 | + commit = await resolve_commit(config, token=token) |
| 60 | + |
61 | 61 | clone_cmd = ["git"]
|
62 | 62 | if token and is_github_host(url):
|
63 | 63 | clone_cmd += ["-c", create_git_auth_header(token, url=url)]
|
64 | 64 |
|
65 |
| - clone_cmd += ["clone", "--single-branch"] |
66 |
| - |
67 |
| - if config.include_submodules: |
68 |
| - clone_cmd += ["--recurse-submodules"] |
69 |
| - |
| 65 | + clone_cmd += ["clone", "--single-branch", "--no-checkout", "--depth=1"] |
70 | 66 | if partial_clone:
|
71 | 67 | clone_cmd += ["--filter=blob:none", "--sparse"]
|
72 | 68 |
|
73 |
| - # Shallow clone unless a specific commit is requested |
74 |
| - if not commit: |
75 |
| - clone_cmd += ["--depth=1"] |
76 |
| - |
77 |
| - # Prefer tag over branch when both are provided |
78 |
| - if tag: |
79 |
| - clone_cmd += ["--branch", tag] |
80 |
| - elif branch and branch.lower() not in ("main", "master"): |
81 |
| - clone_cmd += ["--branch", branch] |
82 |
| - |
83 | 69 | clone_cmd += [url, local_path]
|
84 | 70 |
|
85 | 71 | # Clone the repository
|
86 |
| - await ensure_git_installed() |
87 | 72 | await run_command(*clone_cmd)
|
88 | 73 |
|
89 | 74 | # Checkout the subpath if it is a partial clone
|
90 | 75 | if partial_clone:
|
91 |
| - await _checkout_partial_clone(config, token) |
| 76 | + await checkout_partial_clone(config, token=token) |
92 | 77 |
|
93 |
| - # Checkout the commit if it is provided |
94 |
| - if commit: |
95 |
| - checkout_cmd = create_git_command(["git"], local_path, url, token) |
96 |
| - await run_command(*checkout_cmd, "checkout", commit) |
| 78 | + git = create_git_command(["git"], local_path, url, token) |
97 | 79 |
|
| 80 | + # Ensure the commit is locally available |
| 81 | + await run_command(*git, "fetch", "--depth=1", "origin", commit) |
98 | 82 |
|
99 |
| -async def _checkout_partial_clone(config: CloneConfig, token: str | None) -> None: |
100 |
| - """Configure sparse-checkout for a partially cloned repository. |
| 83 | + # Write the work-tree at that commit |
| 84 | + await run_command(*git, "checkout", commit) |
101 | 85 |
|
102 |
| - Parameters |
103 |
| - ---------- |
104 |
| - config : CloneConfig |
105 |
| - The configuration for cloning the repository, including subpath and blob flag. |
106 |
| - token : str | None |
107 |
| - GitHub personal access token (PAT) for accessing private repositories. |
108 |
| -
|
109 |
| - """ |
110 |
| - subpath = config.subpath.lstrip("/") |
111 |
| - if config.blob: |
112 |
| - # Remove the file name from the subpath when ingesting from a file url (e.g. blob/branch/path/file.txt) |
113 |
| - subpath = str(Path(subpath).parent.as_posix()) |
114 |
| - checkout_cmd = create_git_command(["git"], config.local_path, config.url, token) |
115 |
| - await run_command(*checkout_cmd, "sparse-checkout", "set", subpath) |
| 86 | + # Update submodules |
| 87 | + if config.include_submodules: |
| 88 | + await run_command(*git, "submodule", "update", "--init", "--recursive", "--depth=1") |
0 commit comments