|
1 | 1 | import asyncio
|
2 |
| -from typing import Any, Dict, Tuple |
| 2 | +from dataclasses import dataclass |
| 3 | +from typing import Optional, Tuple |
3 | 4 |
|
4 |
| -from gitingest.utils import async_timeout |
| 5 | +from gitingest.utils import AsyncTimeoutError, async_timeout |
5 | 6 |
|
6 | 7 | CLONE_TIMEOUT = 20
|
7 | 8 |
|
8 | 9 |
|
| 10 | +@dataclass |
| 11 | +class CloneConfig: |
| 12 | + url: str |
| 13 | + local_path: str |
| 14 | + commit: Optional[str] = None |
| 15 | + branch: Optional[str] = None |
| 16 | + |
| 17 | + |
9 | 18 | async def check_repo_exists(url: str) -> bool:
|
| 19 | + """ |
| 20 | + Check if a repository exists at the given URL using an HTTP HEAD request. |
| 21 | +
|
| 22 | + Parameters |
| 23 | + ---------- |
| 24 | + url : str |
| 25 | + The URL of the repository. |
| 26 | +
|
| 27 | + Returns |
| 28 | + ------- |
| 29 | + bool |
| 30 | + True if the repository exists, False otherwise. |
| 31 | + """ |
10 | 32 | proc = await asyncio.create_subprocess_exec(
|
11 | 33 | "curl",
|
12 | 34 | "-I",
|
13 | 35 | url,
|
14 | 36 | stdout=asyncio.subprocess.PIPE,
|
15 | 37 | stderr=asyncio.subprocess.PIPE,
|
16 | 38 | )
|
17 |
| - stdout, stderr = await proc.communicate() |
| 39 | + stdout, _ = await proc.communicate() |
18 | 40 | if proc.returncode != 0:
|
19 | 41 | return False
|
20 | 42 | # Check if stdout contains "404" status code
|
21 | 43 | stdout_str = stdout.decode()
|
22 | 44 | return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str
|
23 | 45 |
|
24 | 46 |
|
25 |
| -@async_timeout(CLONE_TIMEOUT) |
26 |
| -async def clone_repo(query: Dict[str, Any]) -> Tuple[bytes, bytes]: |
27 |
| - if not await check_repo_exists(query['url']): |
28 |
| - raise ValueError("Repository not found, make sure it is public") |
| 47 | +async def run_git_command(*args: str) -> Tuple[bytes, bytes]: |
| 48 | + """ |
| 49 | + Executes a git command asynchronously and captures its output. |
29 | 50 |
|
30 |
| - if query['commit']: |
31 |
| - proc = await asyncio.create_subprocess_exec( |
32 |
| - "git", |
33 |
| - "clone", |
34 |
| - "--single-branch", |
35 |
| - query['url'], |
36 |
| - query['local_path'], |
37 |
| - stdout=asyncio.subprocess.PIPE, |
38 |
| - stderr=asyncio.subprocess.PIPE, |
39 |
| - ) |
40 |
| - stdout, stderr = await proc.communicate() |
41 |
| - |
42 |
| - proc = await asyncio.create_subprocess_exec( |
43 |
| - "git", |
44 |
| - "-C", |
45 |
| - query['local_path'], |
46 |
| - "checkout", |
47 |
| - query['branch'], |
48 |
| - stdout=asyncio.subprocess.PIPE, |
49 |
| - stderr=asyncio.subprocess.PIPE, |
50 |
| - ) |
51 |
| - stdout, stderr = await proc.communicate() |
52 |
| - elif query['branch'] != 'main' and query['branch'] != 'master' and query['branch']: |
53 |
| - proc = await asyncio.create_subprocess_exec( |
54 |
| - "git", |
55 |
| - "clone", |
56 |
| - "--depth=1", |
57 |
| - "--single-branch", |
58 |
| - "--branch", |
59 |
| - query['branch'], |
60 |
| - query['url'], |
61 |
| - query['local_path'], |
62 |
| - stdout=asyncio.subprocess.PIPE, |
63 |
| - stderr=asyncio.subprocess.PIPE, |
64 |
| - ) |
65 |
| - else: |
66 |
| - proc = await asyncio.create_subprocess_exec( |
67 |
| - "git", |
68 |
| - "clone", |
69 |
| - "--depth=1", |
70 |
| - "--single-branch", |
71 |
| - query['url'], |
72 |
| - query['local_path'], |
73 |
| - stdout=asyncio.subprocess.PIPE, |
74 |
| - stderr=asyncio.subprocess.PIPE, |
75 |
| - ) |
| 51 | + Parameters |
| 52 | + ---------- |
| 53 | + *args : str |
| 54 | + The git command and its arguments to execute. |
76 | 55 |
|
| 56 | + Returns |
| 57 | + ------- |
| 58 | + Tuple[bytes, bytes] |
| 59 | + A tuple containing the stdout and stderr of the git command. |
| 60 | +
|
| 61 | + Raises |
| 62 | + ------ |
| 63 | + RuntimeError |
| 64 | + If the git command exits with a non-zero status. |
| 65 | + """ |
| 66 | + proc = await asyncio.create_subprocess_exec( |
| 67 | + *args, |
| 68 | + stdout=asyncio.subprocess.PIPE, |
| 69 | + stderr=asyncio.subprocess.PIPE, |
| 70 | + ) |
77 | 71 | stdout, stderr = await proc.communicate()
|
| 72 | + if proc.returncode != 0: |
| 73 | + error_message = stderr.decode().strip() |
| 74 | + raise RuntimeError(f"Git command failed: {' '.join(args)}\nError: {error_message}") |
78 | 75 |
|
79 | 76 | return stdout, stderr
|
| 77 | + |
| 78 | + |
| 79 | +@async_timeout(CLONE_TIMEOUT) |
| 80 | +async def clone_repo(config: CloneConfig) -> Tuple[bytes, bytes]: |
| 81 | + """ |
| 82 | + Clones a repository to a local path based on the provided query parameters. |
| 83 | +
|
| 84 | + Parameters |
| 85 | + ---------- |
| 86 | + config : CloneConfig |
| 87 | + A dictionary containing the following keys: |
| 88 | + - url (str): The URL of the repository. |
| 89 | + - local_path (str): The local path to clone the repository to. |
| 90 | + - commit (Optional[str]): The specific commit hash to checkout. |
| 91 | + - branch (Optional[str]): The branch to clone. Defaults to 'main' or 'master' if not provided. |
| 92 | +
|
| 93 | + Returns |
| 94 | + ------- |
| 95 | + Tuple[bytes, bytes] |
| 96 | + A tuple containing the stdout and stderr of the git commands executed. |
| 97 | +
|
| 98 | + Raises |
| 99 | + ------ |
| 100 | + ValueError |
| 101 | + If the repository does not exist or if required query parameters are missing. |
| 102 | + RuntimeError |
| 103 | + If any git command fails during execution. |
| 104 | + AsyncTimeoutError |
| 105 | + If the cloning process exceeds the specified timeout. |
| 106 | + """ |
| 107 | + # Extract and validate query parameters |
| 108 | + url: str = config.url |
| 109 | + local_path: str = config.local_path |
| 110 | + commit: Optional[str] = config.commit |
| 111 | + branch: Optional[str] = config.branch |
| 112 | + |
| 113 | + if not url: |
| 114 | + raise ValueError("The 'url' parameter is required.") |
| 115 | + |
| 116 | + if not local_path: |
| 117 | + raise ValueError("The 'local_path' parameter is required.") |
| 118 | + |
| 119 | + # Check if the repository exists |
| 120 | + if not await check_repo_exists(url): |
| 121 | + raise ValueError("Repository not found, make sure it is public") |
| 122 | + |
| 123 | + try: |
| 124 | + if commit: |
| 125 | + # Scenario 1: Clone and checkout a specific commit |
| 126 | + # Clone the repository without depth to ensure full history for checkout |
| 127 | + clone_cmd = ["git", "clone", "--single-branch", url, local_path] |
| 128 | + await run_git_command(*clone_cmd) |
| 129 | + |
| 130 | + # Checkout the specific commit |
| 131 | + checkout_cmd = ["git", "-C", local_path, "checkout", commit] |
| 132 | + return await run_git_command(*checkout_cmd) |
| 133 | + |
| 134 | + if branch and branch.lower() not in ('main', 'master'): |
| 135 | + # Scenario 2: Clone a specific branch with shallow depth |
| 136 | + clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch, url, local_path] |
| 137 | + return await run_git_command(*clone_cmd) |
| 138 | + |
| 139 | + # Scenario 3: Clone the default branch with shallow depth |
| 140 | + clone_cmd = ["git", "clone", "--depth=1", "--single-branch", url, local_path] |
| 141 | + return await run_git_command(*clone_cmd) |
| 142 | + |
| 143 | + except (RuntimeError, asyncio.TimeoutError, AsyncTimeoutError): |
| 144 | + raise # Re-raise the exception |
0 commit comments