Skip to content

Commit e0f83d1

Browse files
kousuMathieu Guay-Paquet
authored andcommitted
git-annex: support downloading over HTTP (#6)
This makes HTTP symmetric with SSH clone URLs. This gives us the fancy feature of _anonymous_ downloads, so people can access datasets without having to set up an account or manage ssh keys. Previously, to access "open access" data shared this way, users would need to: 1. Create an account on gitea.example.com 2. Create ssh keys 3. Upload ssh keys (and make sure to find and upload the correct file) 4. `git clone git@gitea.example.com:user/dataset.git` 5. `cd dataset` 6. `git annex get` This cuts that down to just the last three steps: 1. `git clone https://gitea.example.com/user/dataset.git` 2. `cd dataset` 3. `git annex get` This is significantly simpler for downstream users, especially for those unfamiliar with the command line. Unfortunately there's no uploading. While git-annex supports uploading over HTTP to S3 and some other special remotes, it seems to fail on a _plain_ HTTP remote. See #7 and https://git-annex.branchable.com/forum/HTTP_uploads/#comment-ce28adc128fdefe4c4c49628174d9b92. This is not a major loss since no one wants uploading to be anonymous anyway. To support private repos, I had to hunt down and patch a secret extra security corner that Gitea only applies to HTTP for some reason (services/auth/basic.go). This was guided by https://git-annex.branchable.com/tips/setup_a_public_repository_on_a_web_site/ Fixes #3 Co-authored-by: Mathieu Guay-Paquet <mathieu.guaypaquet@polymtl.ca>
1 parent 081a4bd commit e0f83d1

File tree

7 files changed

+412
-17
lines changed

7 files changed

+412
-17
lines changed

modules/git/command.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,12 +439,13 @@ func (c *Command) RunStdBytes(opts *RunOpts) (stdout, stderr []byte, runErr RunS
439439
}
440440

441441
// AllowLFSFiltersArgs return globalCommandArgs with lfs filter, it should only be used for tests
442+
// It also re-enables git-credential(1), which is used to test git-annex's HTTP support
442443
func AllowLFSFiltersArgs() TrustedCmdArgs {
443444
// Now here we should explicitly allow lfs filters to run
444445
filteredLFSGlobalArgs := make(TrustedCmdArgs, len(globalCommandArgs))
445446
j := 0
446447
for _, arg := range globalCommandArgs {
447-
if strings.Contains(string(arg), "lfs") {
448+
if strings.Contains(string(arg), "lfs") || strings.Contains(string(arg), "credential") {
448449
j--
449450
} else {
450451
filteredLFSGlobalArgs[j] = arg

routers/web/repo/githttp.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,3 +611,34 @@ func GetIdxFile(ctx *context.Context) {
611611
h.sendFile("application/x-git-packed-objects-toc", "objects/pack/pack-"+ctx.Params("file")+".idx")
612612
}
613613
}
614+
615+
// GetAnnexObject implements git-annex dumb HTTP
616+
func GetAnnexObject(ctx *context.Context) {
617+
h := httpBase(ctx)
618+
if h != nil {
619+
// git-annex objects are stored in .git/annex/objects/{hash1}/{hash2}/{key}/{key}
620+
// where key is a string containing the size and (usually SHA256) checksum of the file,
621+
// and hash1+hash2 are the first few bits of the md5sum of key itself.
622+
// ({hash1}/{hash2}/ is just there to avoid putting too many files in one directory)
623+
// ref: https://git-annex.branchable.com/internals/hashing/
624+
625+
// keyDir should = key, but we don't enforce that
626+
object := path.Join(ctx.Params("hash1"), ctx.Params("hash2"), ctx.Params("keyDir"), ctx.Params("key"))
627+
628+
// Sanitize the input against directory traversals.
629+
//
630+
// This works because at the filesystem root, "/.." = "/";
631+
// So if a path starts rooted ("/"), path.Clean(), which
632+
// path.Join() calls internally, removes all '..' prefixes.
633+
// After, this unroots the path unconditionally ([1:]), which
634+
// works because we know the input is never supposed to be rooted.
635+
//
636+
// The router code probably also disallows "..", so this
637+
// should be redundant, but it's defensive to keep it
638+
// whenever touching filesystem paths with user input.
639+
object = path.Join("/", object)[1:]
640+
641+
h.setHeaderCacheForever()
642+
h.sendFile("application/octet-stream", "annex/objects/"+object)
643+
}
644+
}

routers/web/web.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,13 @@ func registerRoutes(m *web.Route) {
331331
}
332332
}
333333

334+
annexEnabled := func(ctx *context.Context) {
335+
if !setting.Annex.Enabled {
336+
ctx.Error(http.StatusNotFound)
337+
return
338+
}
339+
}
340+
334341
federationEnabled := func(ctx *context.Context) {
335342
if !setting.Federation.Enabled {
336343
ctx.Error(http.StatusNotFound)
@@ -1514,6 +1521,12 @@ func registerRoutes(m *web.Route) {
15141521
})
15151522
}, ignSignInAndCsrf, lfsServerEnabled)
15161523

1524+
m.Group("", func() {
1525+
// for git-annex
1526+
m.GetOptions("/config", repo.GetTextFile("config")) // needed by clients reading annex.uuid during `git annex initremote`
1527+
m.GetOptions("/annex/objects/{hash1}/{hash2}/{keyDir}/{key}", repo.GetAnnexObject)
1528+
}, ignSignInAndCsrf, annexEnabled, context_service.UserAssignmentWeb())
1529+
15171530
gitHTTPRouters(m)
15181531
})
15191532
})

services/auth/auth.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,17 @@ func isGitRawOrAttachOrLFSPath(req *http.Request) bool {
5454
return false
5555
}
5656

57+
var annexPathRe = regexp.MustCompile(`^/[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+/annex/`)
58+
59+
func isAnnexPath(req *http.Request) bool {
60+
if setting.Annex.Enabled {
61+
// "/config" is git's config, not specifically git-annex's; but the only current
62+
// user of it is when git-annex downloads the annex.uuid during 'git annex init'.
63+
return strings.HasSuffix(req.URL.Path, "/config") || annexPathRe.MatchString(req.URL.Path)
64+
}
65+
return false
66+
}
67+
5768
// handleSignIn clears existing session variables and stores new ones for the specified user object
5869
func handleSignIn(resp http.ResponseWriter, req *http.Request, sess SessionStore, user *user_model.User) {
5970
// We need to regenerate the session...

services/auth/basic.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ func (b *Basic) Name() string {
4242
// name/token on successful validation.
4343
// Returns nil if header is empty or validation fails.
4444
func (b *Basic) Verify(req *http.Request, w http.ResponseWriter, store DataStore, sess SessionStore) (*user_model.User, error) {
45-
// Basic authentication should only fire on API, Download or on Git or LFSPaths
46-
if !middleware.IsAPIPath(req) && !isContainerPath(req) && !isAttachmentDownload(req) && !isGitRawOrAttachOrLFSPath(req) {
45+
// Basic authentication should only fire on API, Download or on Git or LFSPaths or Git-Annex paths
46+
if !middleware.IsAPIPath(req) && !isContainerPath(req) && !isAttachmentDownload(req) && !isGitRawOrAttachOrLFSPath(req) && !isAnnexPath(req) {
4747
return nil, nil
4848
}
4949

0 commit comments

Comments
 (0)