Skip to content

Commit fc956ee

Browse files
author
Paolo Tranquilli
committed
CI: use git-lfs fork for git_lfs_probe.py
1 parent 570260b commit fc956ee

File tree

3 files changed

+92
-173
lines changed

3 files changed

+92
-173
lines changed

MODULE.bazel

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,21 @@ go_deps = use_extension("@gazelle//:extensions.bzl", "go_deps")
166166
go_deps.from_file(go_mod = "//go/extractor:go.mod")
167167
use_repo(go_deps, "org_golang_x_mod", "org_golang_x_tools")
168168

169+
git_lfs_binary = use_repo_rule("//misc/bazel:lfs.bzl", "git_lfs_binary")
170+
171+
# to update, check out dsp-testing/codeql-git-lfs, do changes there, and push a tag with
172+
# `git tag $(git describe)-ls-urls && git push --tags`
173+
# then wait for https://github.com/dsp-testing/codeql-git-lfs/actions/runs/11800398535 to end,
174+
# then copy here information from https://github.com/dsp-testing/codeql-git-lfs/releases/latest
175+
git_lfs_binary(
176+
name = "git-lfs",
177+
sha256_linux = "08b75033a98f77f7e60b0928e160a6f0a5c5cd9d91b8605537969eec6980219a",
178+
sha256_macos_arm64 = "8a17c488c975dbd050610a0b2692567064dbfef33b6c58ee89ea02f649cc0114",
179+
sha256_macos_x86 = "9fc7265c5345901ca5cb83707ed5374fc6dfbf7ed45d2c047d5929bfe0b5f64a",
180+
sha256_windows = "ef2f5794667584b155786291d4f839c59bfe10fcc5f870902c64f3063ffd9923",
181+
version = "v3.5.0-179-gfd031ea1",
182+
)
183+
169184
lfs_files = use_repo_rule("//misc/bazel:lfs.bzl", "lfs_files")
170185

171186
lfs_files(

misc/bazel/internal/git_lfs_probe.py

Lines changed: 28 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -24,201 +24,57 @@
2424
import argparse
2525

2626
def options():
27+
def resolved_path(path):
28+
return pathlib.Path(path).expanduser().resolve()
2729
p = argparse.ArgumentParser(description=__doc__)
28-
p.add_argument("--hash-only", action="store_true")
29-
p.add_argument("sources", type=pathlib.Path, nargs="+")
30-
return p.parse_args()
31-
32-
33-
TIMEOUT = 20
34-
35-
def warn(message: str) -> None:
36-
print(f"WARNING: {message}", file=sys.stderr)
37-
38-
39-
@dataclass
40-
class Endpoint:
41-
name: str
42-
href: str
43-
ssh: typing.Optional[str] = None
44-
headers: typing.Dict[str, str] = dataclasses.field(default_factory=dict)
45-
46-
def update_headers(self, d: typing.Iterable[typing.Tuple[str, str]]):
47-
self.headers.update((k.capitalize(), v) for k, v in d)
48-
49-
50-
class NoEndpointsFound(Exception):
51-
pass
52-
53-
54-
opts = options()
55-
sources = [p.resolve() for p in opts.sources]
56-
source_dir = pathlib.Path(os.path.commonpath(src.parent for src in sources))
57-
source_dir = subprocess.check_output(
58-
["git", "rev-parse", "--show-toplevel"], cwd=source_dir, text=True
59-
).strip()
30+
excl = p.add_mutually_exclusive_group(required=True)
31+
excl.add_argument("--hash-only", action="store_true")
32+
excl.add_argument("--git-lfs", type=resolved_path)
33+
p.add_argument("sources", type=resolved_path, nargs="+")
34+
opts = p.parse_args()
35+
source_dir = pathlib.Path(os.path.commonpath(src.parent for src in opts.sources))
36+
opts.source_dir = subprocess.check_output(
37+
["git", "rev-parse", "--show-toplevel"], cwd=source_dir, text=True
38+
).strip()
39+
return opts
6040

6141

6242
def get_env(s: str, sep: str = "=") -> typing.Iterable[typing.Tuple[str, str]]:
6343
for m in re.finditer(rf"(.*?){sep}(.*)", s, re.M):
6444
yield m.groups()
6545

6646

67-
def git(*args, **kwargs):
68-
proc = subprocess.run(
69-
("git",) + args, stdout=subprocess.PIPE, text=True, cwd=source_dir, **kwargs
70-
)
71-
return proc.stdout.strip() if proc.returncode == 0 else None
72-
73-
74-
endpoint_re = re.compile(r"^Endpoint(?: \((.*)\))?$")
75-
76-
77-
def get_endpoint_addresses() -> typing.Iterable[Endpoint]:
78-
"""Get all lfs endpoints, including SSH if present"""
79-
lfs_env_items = get_env(
80-
subprocess.check_output(["git", "lfs", "env"], text=True, cwd=source_dir)
81-
)
82-
current_endpoint = None
83-
for k, v in lfs_env_items:
84-
m = endpoint_re.match(k)
85-
if m:
86-
if current_endpoint:
87-
yield current_endpoint
88-
href, _, _ = v.partition(" ")
89-
current_endpoint = Endpoint(name=m[1] or "default", href=href)
90-
elif k == " SSH" and current_endpoint:
91-
current_endpoint.ssh = v
92-
if current_endpoint:
93-
yield current_endpoint
94-
95-
96-
def get_endpoints() -> typing.Iterable[Endpoint]:
97-
for endpoint in get_endpoint_addresses():
98-
endpoint.headers = {
99-
"Content-Type": "application/vnd.git-lfs+json",
100-
"Accept": "application/vnd.git-lfs+json",
101-
}
102-
if endpoint.ssh:
103-
# see https://github.com/git-lfs/git-lfs/blob/main/docs/api/authentication.md
104-
server, _, path = endpoint.ssh.partition(":")
105-
ssh_command = shutil.which(
106-
os.environ.get("GIT_SSH", os.environ.get("GIT_SSH_COMMAND", "ssh"))
107-
)
108-
assert ssh_command, "no ssh command found"
109-
cmd = [
110-
ssh_command,
111-
"-oStrictHostKeyChecking=accept-new",
112-
server,
113-
"git-lfs-authenticate",
114-
path,
115-
"download",
116-
]
117-
try:
118-
res = subprocess.run(cmd, stdout=subprocess.PIPE, timeout=TIMEOUT)
119-
except subprocess.TimeoutExpired:
120-
warn(f"ssh timed out when connecting to {server}, ignoring {endpoint.name} endpoint")
121-
continue
122-
if res.returncode != 0:
123-
warn(f"ssh failed when connecting to {server}, ignoring {endpoint.name} endpoint")
124-
continue
125-
ssh_resp = json.loads(res.stdout)
126-
endpoint.href = ssh_resp.get("href", endpoint)
127-
endpoint.update_headers(ssh_resp.get("header", {}).items())
128-
url = urlparse(endpoint.href)
129-
# this is how actions/checkout persist credentials
130-
# see https://github.com/actions/checkout/blob/44c2b7a8a4ea60a981eaca3cf939b5f4305c123b/src/git-auth-helper.ts#L56-L63
131-
auth = git("config", f"http.{url.scheme}://{url.netloc}/.extraheader") or ""
132-
endpoint.update_headers(get_env(auth, sep=": "))
133-
if os.environ.get("GITHUB_TOKEN"):
134-
endpoint.headers["Authorization"] = f"token {os.environ['GITHUB_TOKEN']}"
135-
if "Authorization" not in endpoint.headers:
136-
# last chance: use git credentials (possibly backed by a credential helper like the one installed by gh)
137-
# see https://git-scm.com/docs/git-credential
138-
credentials = git(
139-
"credential",
140-
"fill",
141-
check=True,
142-
# drop leading / from url.path
143-
input=f"protocol={url.scheme}\nhost={url.netloc}\npath={url.path[1:]}\n",
144-
)
145-
if credentials is None:
146-
warn(f"no authorization method found, ignoring {endpoint.name} endpoint")
147-
continue
148-
credentials = dict(get_env(credentials))
149-
auth = base64.b64encode(
150-
f'{credentials["username"]}:{credentials["password"]}'.encode()
151-
).decode("ascii")
152-
endpoint.headers["Authorization"] = f"Basic {auth}"
153-
yield endpoint
154-
155-
156-
# see https://github.com/git-lfs/git-lfs/blob/310d1b4a7d01e8d9d884447df4635c7a9c7642c2/docs/api/basic-transfers.md
157-
def get_locations(objects):
47+
def get_locations(objects, opts):
15848
ret = ["local" for _ in objects]
15949
indexes = [i for i, o in enumerate(objects) if o]
160-
if not indexes:
161-
# all objects are local, do not send an empty request as that would be an error
162-
return ret
16350
if opts.hash_only:
16451
for i in indexes:
16552
ret[i] = objects[i]["oid"]
166-
return ret
167-
data = {
168-
"operation": "download",
169-
"transfers": ["basic"],
170-
"objects": [objects[i] for i in indexes],
171-
"hash_algo": "sha256",
172-
}
173-
for endpoint in get_endpoints():
174-
req = urllib.request.Request(
175-
f"{endpoint.href}/objects/batch",
176-
headers=endpoint.headers,
177-
data=json.dumps(data).encode("ascii"),
178-
)
179-
try:
180-
with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
181-
data = json.load(resp)
182-
assert len(data["objects"]) == len(
183-
indexes
184-
), f"received {len(data)} objects, expected {len(indexes)}"
185-
for i, resp in zip(indexes, data["objects"]):
186-
ret[i] = f'{resp["oid"]} {resp["actions"]["download"]["href"]}'
187-
return ret
188-
except urllib.error.URLError as e:
189-
warn(f"encountered {type(e).__name__} {e}, ignoring endpoint {endpoint.name}")
190-
continue
191-
except KeyError:
192-
warn(f"encountered malformed response, ignoring endpoint {endpoint.name}:\n{json.dumps(data, indent=2)}")
193-
continue
194-
raise NoEndpointsFound
195-
53+
else:
54+
cmd = [opts.git_lfs, "ls-urls", "--json"]
55+
cmd.extend(objects[i]["path"] for i in indexes)
56+
data = json.loads(subprocess.check_output(cmd, cwd=opts.source_dir))
57+
for i, f in zip(indexes, data["files"]):
58+
ret[i] = f'{f["oid"]} {f["url"]}'
59+
return ret
19660

19761
def get_lfs_object(path):
19862
with open(path, "rb") as fileobj:
19963
lfs_header = "version https://git-lfs.github.com/spec".encode()
20064
actual_header = fileobj.read(len(lfs_header))
201-
sha256 = size = None
20265
if lfs_header != actual_header:
20366
return None
20467
data = dict(get_env(fileobj.read().decode("ascii"), sep=" "))
20568
assert data["oid"].startswith("sha256:"), f"unknown oid type: {data['oid']}"
20669
_, _, sha256 = data["oid"].partition(":")
207-
size = int(data["size"])
208-
return {"oid": sha256, "size": size}
70+
return {"path": path, "oid": sha256}
20971

21072

211-
try:
212-
objects = [get_lfs_object(src) for src in sources]
213-
for resp in get_locations(objects):
73+
def main():
74+
opts = options()
75+
objects = [get_lfs_object(src) for src in opts.sources]
76+
for resp in get_locations(objects, opts):
21477
print(resp)
215-
except NoEndpointsFound as e:
216-
print("""\
217-
ERROR: no valid endpoints found, your git authentication method might be currently unsupported by this script.
218-
You can bypass this error by running from semmle-code (this might take a while):
219-
git config lfs.fetchexclude ""
220-
git -C ql config lfs.fetchinclude \\*
221-
git lfs fetch && git lfs checkout
222-
cd ql
223-
git lfs fetch && git lfs checkout""", file=sys.stderr)
224-
sys.exit(1)
78+
79+
if __name__ == "__main__":
80+
main()

misc/bazel/lfs.bzl

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@ def lfs_smudge(repository_ctx, srcs, *, extract = False, stripPrefix = None, exe
22
python = repository_ctx.which("python3") or repository_ctx.which("python")
33
if not python:
44
fail("Neither python3 nor python executables found")
5-
script = Label("//misc/bazel/internal:git_lfs_probe.py")
5+
script = repository_ctx.path(Label("//misc/bazel/internal:git_lfs_probe.py"))
6+
git_lfs_binary = repository_ctx.path(Label("@git-lfs"))
67

78
def probe(srcs, hash_only = False):
89
repository_ctx.report_progress("querying LFS url(s) for: %s" % ", ".join([src.basename for src in srcs]))
910
cmd = [python, script]
1011
if hash_only:
1112
cmd.append("--hash-only")
13+
else:
14+
cmd += ["--git-lfs", git_lfs_binary]
1215
cmd.extend(srcs)
1316
res = repository_ctx.execute(cmd, quiet = True)
1417
if res.return_code != 0:
@@ -102,3 +105,48 @@ lfs_files = repository_rule(
102105
"executable": attr.bool(doc = "Whether files should be marked as executable"),
103106
},
104107
)
108+
109+
def _lfs_binary_impl(repository_ctx):
110+
suffix = ""
111+
if repository_ctx.os.name.startswith("windows"):
112+
arch = "windows-amd64"
113+
sha256 = repository_ctx.attr.sha256_windows
114+
suffix = ".exe"
115+
elif repository_ctx.os.name.startswith("mac"):
116+
if repository_ctx.os.arch == "x86":
117+
arch = "darwin-amd64"
118+
sha256 = repository_ctx.attr.sha256_macos_x86
119+
else:
120+
arch = "darwin-arm64"
121+
sha256 = repository_ctx.attr.sha256_macos_arm64
122+
else:
123+
arch = "linux-amd64"
124+
sha256 = repository_ctx.attr.sha256_linux
125+
url = "https://github.com/dsp-testing/codeql-git-lfs/releases/download/%s/git-lfs-%s%s" % (
126+
repository_ctx.attr.version,
127+
arch,
128+
suffix,
129+
)
130+
exe = "git-lfs" + suffix
131+
repository_ctx.download(
132+
url = url,
133+
output = exe,
134+
sha256 = sha256,
135+
executable = True,
136+
)
137+
name = repository_ctx.name.split("+")[-1]
138+
if suffix:
139+
repository_ctx.file("BUILD.bazel", "filegroup(name = %r, srcs = [%r], visibility = ['//visibility:public'])" % (name, exe))
140+
else:
141+
repository_ctx.file("BUILD.bazel", "exports_files([%r])" % exe)
142+
143+
git_lfs_binary = repository_rule(
144+
implementation = _lfs_binary_impl,
145+
attrs = {
146+
"version": attr.string(mandatory = True),
147+
"sha256_linux": attr.string(mandatory = True),
148+
"sha256_macos_x86": attr.string(mandatory = True),
149+
"sha256_macos_arm64": attr.string(mandatory = True),
150+
"sha256_windows": attr.string(mandatory = True),
151+
},
152+
)

0 commit comments

Comments
 (0)