Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gateway): more explicit IPFSBackend and no multi-range #369

Merged
merged 11 commits into from
Oct 2, 2023
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ The following emojis are used to highlight certain changes:

### Changed

* `boxo/gateway`
* 🛠 The `IPFSBackend` interface was updated to make the responses of the
`Head` method more explicit. It now returns a `HeadResponse` instead of a
`files.Node`.

### Removed

### Fixed
Expand Down
65 changes: 59 additions & 6 deletions gateway/blocks_backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,34 @@
return md, nil, err
}

// Only a single range is supported in responses to HTTP Range Requests.
// When more than one is passed in the Range header, this library will
// return a response for the first one and ignores remaining ones.
var ra *ByteRange
if len(ranges) > 0 {
ra = &ranges[0]
}

Check warning on line 162 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L161-L162

Added lines #L161 - L162 were not covered by tests

rootCodec := nd.Cid().Prefix().GetCodec()

// This covers both Raw blocks and terminal IPLD codecs like dag-cbor and dag-json
// Note: while only cbor, json, dag-cbor, and dag-json are currently supported by gateways this could change
// Note: For the raw codec we return just the relevant range rather than the entire block
if rootCodec != uint64(mc.DagPb) {
return md, NewGetResponseFromFile(files.NewBytesFile(nd.RawData())), nil
f := files.NewBytesFile(nd.RawData())

fileSize, err := f.Size()
if err != nil {
return ContentPathMetadata{}, nil, err
}

Check warning on line 175 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L174-L175

Added lines #L174 - L175 were not covered by tests

if rootCodec == uint64(mc.Raw) {
if err := seekToRangeStart(f, ra); err != nil {
return ContentPathMetadata{}, nil, err
}

Check warning on line 180 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L179-L180

Added lines #L179 - L180 were not covered by tests
}

return md, NewGetResponseFromReader(f, fileSize), nil
}

// This code path covers full graph, single file/directory, and range requests
Expand All @@ -179,10 +202,23 @@
if sz < 0 {
return ContentPathMetadata{}, nil, fmt.Errorf("directory cumulative DAG size cannot be negative")
}
return md, NewGetResponseFromDirectoryListing(uint64(sz), dir.EnumLinksAsync(ctx)), nil
return md, NewGetResponseFromDirectoryListing(uint64(sz), dir.EnumLinksAsync(ctx), nil), nil
}
if file, ok := f.(files.File); ok {
return md, NewGetResponseFromFile(file), nil
fileSize, err := f.Size()
if err != nil {
return ContentPathMetadata{}, nil, err
}

Check warning on line 211 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L210-L211

Added lines #L210 - L211 were not covered by tests

if err := seekToRangeStart(file, ra); err != nil {
return ContentPathMetadata{}, nil, err
}

Check warning on line 215 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L214-L215

Added lines #L214 - L215 were not covered by tests

if s, ok := f.(*files.Symlink); ok {
return md, NewGetResponseFromSymlink(s, fileSize), nil
}

Check warning on line 219 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L218-L219

Added lines #L218 - L219 were not covered by tests

return md, NewGetResponseFromReader(file, fileSize), nil
}

return ContentPathMetadata{}, nil, fmt.Errorf("data was not a valid file or directory: %w", ErrInternalServerError) // TODO: should there be a gateway invalid content type to abstract over the various IPLD error types?
Expand Down Expand Up @@ -211,15 +247,15 @@
return md, files.NewBytesFile(nd.RawData()), nil
}

func (bb *BlocksBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) {
func (bb *BlocksBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *HeadResponse, error) {

Check warning on line 250 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L250

Added line #L250 was not covered by tests
md, nd, err := bb.getNode(ctx, path)
if err != nil {
return md, nil, err
}

rootCodec := nd.Cid().Prefix().GetCodec()
if rootCodec != uint64(mc.DagPb) {
return md, files.NewBytesFile(nd.RawData()), nil
return md, NewHeadResponseForFile(files.NewBytesFile(nd.RawData()), int64(len(nd.RawData()))), nil

Check warning on line 258 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L258

Added line #L258 was not covered by tests
}

// TODO: We're not handling non-UnixFS dag-pb. There's a bit of a discrepancy
Expand All @@ -229,7 +265,24 @@
return ContentPathMetadata{}, nil, err
}

return md, fileNode, nil
sz, err := fileNode.Size()
if err != nil {
return ContentPathMetadata{}, nil, err
}

Check warning on line 271 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L268-L271

Added lines #L268 - L271 were not covered by tests

if _, ok := fileNode.(files.Directory); ok {
return md, NewHeadResponseForDirectory(sz), nil
}

Check warning on line 275 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L273-L275

Added lines #L273 - L275 were not covered by tests

if _, ok := fileNode.(*files.Symlink); ok {
return md, NewHeadResponseForSymlink(sz), nil
}

Check warning on line 279 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L277-L279

Added lines #L277 - L279 were not covered by tests

if f, ok := fileNode.(files.File); ok {
return md, NewHeadResponseForFile(f, sz), nil
}

Check warning on line 283 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L281-L283

Added lines #L281 - L283 were not covered by tests

return ContentPathMetadata{}, nil, fmt.Errorf("unsupported UnixFS file type")

Check warning on line 285 in gateway/blocks_backend.go

View check run for this annotation

Codecov / codecov/patch

gateway/blocks_backend.go#L285

Added line #L285 was not covered by tests
}

// emptyRoot is a CAR root with the empty identity CID. CAR files are recommended
Expand Down
82 changes: 71 additions & 11 deletions gateway/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,21 +260,74 @@
}

type GetResponse struct {
bytes files.File
bytes io.ReadCloser
bytesSize int64
symlink *files.Symlink
directoryMetadata *directoryMetadata
}

func (r *GetResponse) Close() error {
if r.bytes != nil {
return r.bytes.Close()
}
if r.symlink != nil {
return r.symlink.Close()
}

Check warning on line 275 in gateway/gateway.go

View check run for this annotation

Codecov / codecov/patch

gateway/gateway.go#L274-L275

Added lines #L274 - L275 were not covered by tests
if r.directoryMetadata != nil {
if r.directoryMetadata.closeFn == nil {
return nil
}
return r.directoryMetadata.closeFn()

Check warning on line 280 in gateway/gateway.go

View check run for this annotation

Codecov / codecov/patch

gateway/gateway.go#L280

Added line #L280 was not covered by tests
}
// Should be unreachable
return nil

Check warning on line 283 in gateway/gateway.go

View check run for this annotation

Codecov / codecov/patch

gateway/gateway.go#L283

Added line #L283 was not covered by tests
}

var _ io.Closer = (*GetResponse)(nil)

type directoryMetadata struct {
dagSize uint64
entries <-chan unixfs.LinkResult
closeFn func() error
}

func NewGetResponseFromReader(file io.ReadCloser, fullFileSize int64) *GetResponse {
return &GetResponse{bytes: file, bytesSize: fullFileSize}
}

func NewGetResponseFromSymlink(symlink *files.Symlink, size int64) *GetResponse {
return &GetResponse{symlink: symlink, bytesSize: size}

Check warning on line 299 in gateway/gateway.go

View check run for this annotation

Codecov / codecov/patch

gateway/gateway.go#L298-L299

Added lines #L298 - L299 were not covered by tests
}

func NewGetResponseFromFile(file files.File) *GetResponse {
return &GetResponse{bytes: file}
func NewGetResponseFromDirectoryListing(dagSize uint64, entries <-chan unixfs.LinkResult, closeFn func() error) *GetResponse {
return &GetResponse{directoryMetadata: &directoryMetadata{dagSize: dagSize, entries: entries, closeFn: closeFn}}
}

func NewGetResponseFromDirectoryListing(dagSize uint64, entries <-chan unixfs.LinkResult) *GetResponse {
return &GetResponse{directoryMetadata: &directoryMetadata{dagSize, entries}}
type HeadResponse struct {
bytesSize int64
startingBytes io.ReadCloser
isFile bool
isSymLink bool
isDir bool
}

func (r *HeadResponse) Close() error {
if r.startingBytes != nil {
return r.startingBytes.Close()
}
return nil

Check warning on line 318 in gateway/gateway.go

View check run for this annotation

Codecov / codecov/patch

gateway/gateway.go#L314-L318

Added lines #L314 - L318 were not covered by tests
}

func NewHeadResponseForFile(startingBytes io.ReadCloser, size int64) *HeadResponse {
return &HeadResponse{startingBytes: startingBytes, isFile: true, bytesSize: size}

Check warning on line 322 in gateway/gateway.go

View check run for this annotation

Codecov / codecov/patch

gateway/gateway.go#L321-L322

Added lines #L321 - L322 were not covered by tests
}

func NewHeadResponseForSymlink(symlinkSize int64) *HeadResponse {
return &HeadResponse{isSymLink: true, bytesSize: symlinkSize}

Check warning on line 326 in gateway/gateway.go

View check run for this annotation

Codecov / codecov/patch

gateway/gateway.go#L325-L326

Added lines #L325 - L326 were not covered by tests
}

func NewHeadResponseForDirectory(dagSize int64) *HeadResponse {
return &HeadResponse{isDir: true, bytesSize: dagSize}

Check warning on line 330 in gateway/gateway.go

View check run for this annotation

Codecov / codecov/patch

gateway/gateway.go#L329-L330

Added lines #L329 - L330 were not covered by tests
}

// IPFSBackend is the required set of functionality used to implement the IPFS
Expand Down Expand Up @@ -305,6 +358,9 @@
// file will still need magic bytes from the very beginning for content
// type sniffing).
// - A range request for a directory currently holds no semantic meaning.
// - For non-UnixFS (and non-raw data) such as terminal IPLD dag-cbor/json, etc. blocks the returned response
// bytes should be the complete block and returned as an [io.ReadSeekCloser] starting at the beginning of the
// block rather than as an [io.ReadCloser] that starts at the beginning of the range request.
//
// [HTTP Byte Ranges]: https://httpwg.org/specs/rfc9110.html#rfc.section.14.1.2
Get(context.Context, ImmutablePath, ...ByteRange) (ContentPathMetadata, *GetResponse, error)
Expand All @@ -316,12 +372,16 @@
// GetBlock returns a single block of data
GetBlock(context.Context, ImmutablePath) (ContentPathMetadata, files.File, error)

// Head returns a file or directory depending on what the path is that has been requested.
// For UnixFS files should return a file which has the correct file size and either returns the ContentType in ContentPathMetadata or
// enough data (e.g. 3kiB) such that the content type can be determined by sniffing.
// For all other data types returning just size information is sufficient
// TODO: give function more explicit return types
Head(context.Context, ImmutablePath) (ContentPathMetadata, files.Node, error)
// Head returns a [HeadResponse] depending on what the path is that has been requested.
// For UnixFS files (and raw blocks) should return the size of the file and either set the ContentType in
// ContentPathMetadata or send back a reader from the beginning of the file with enough data (e.g. 3kiB) such that
// the content type can be determined by sniffing.
//
// For UnixFS directories and symlinks only setting the size and type are necessary.
//
// For all other data types (e.g. (DAG-)CBOR/JSON blocks) returning the size information as a file while setting
// the content-type is sufficient.
Head(context.Context, ImmutablePath) (ContentPathMetadata, *HeadResponse, error)

// ResolvePath resolves the path using UnixFS resolver. If the path does not
// exist due to a missing link, it should return an error of type:
Expand Down
4 changes: 2 additions & 2 deletions gateway/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,7 @@ func (mb *errorMockBackend) GetBlock(ctx context.Context, path ImmutablePath) (C
return ContentPathMetadata{}, nil, mb.err
}

func (mb *errorMockBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) {
func (mb *errorMockBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *HeadResponse, error) {
return ContentPathMetadata{}, nil, mb.err
}

Expand Down Expand Up @@ -803,7 +803,7 @@ func (mb *panicMockBackend) GetBlock(ctx context.Context, immutablePath Immutabl
panic("i am panicking")
}

func (mb *panicMockBackend) Head(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, files.Node, error) {
func (mb *panicMockBackend) Head(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, *HeadResponse, error) {
panic("i am panicking")
}

Expand Down
55 changes: 17 additions & 38 deletions gateway/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,19 @@
return newHandlerWithMetrics(&c, backend)
}

// serveContent replies to the request using the content in the provided ReadSeeker
// serveContent replies to the request using the content in the provided Reader
// and returns the status code written and any error encountered during a write.
// It wraps http.serveContent which takes care of If-None-Match+Etag,
// It wraps httpServeContent (a close clone of http.ServeContent) which takes care of If-None-Match+Etag,
// Content-Length and range requests.
func serveContent(w http.ResponseWriter, req *http.Request, name string, modtime time.Time, content io.ReadSeeker) (int, bool, error) {
//
// Notes:
// 1. For HEAD requests the io.Reader may be nil/undefined
// 2. When the io.Reader is needed it must start at the beginning of the first Range Request component if it exists
// 3. Only a single HTTP Range Request is supported, if more than one are requested only the first will be honored
// 4. The Content-Type header must already be set
func serveContent(w http.ResponseWriter, req *http.Request, modtime time.Time, size int64, content io.Reader) (int, bool, error) {
ew := &errRecordingResponseWriter{ResponseWriter: w}
http.ServeContent(ew, req, name, modtime, content)
httpServeContent(ew, req, modtime, size, content)

// When we calculate some metrics we want a flag that lets us to ignore
// errors and 304 Not Modified, and only care when requested data
Expand Down Expand Up @@ -554,40 +560,6 @@
return false
}

// scanETag determines if a syntactically valid ETag is present at s. If so,
// the ETag and remaining text after consuming ETag is returned. Otherwise,
// it returns "", "".
// (This is the same logic as one executed inside of http.ServeContent)
func scanETag(s string) (etag string, remain string) {
s = textproto.TrimString(s)
start := 0
if strings.HasPrefix(s, "W/") {
start = 2
}
if len(s[start:]) < 2 || s[start] != '"' {
return "", ""
}
// ETag is either W/"text" or "text".
// See RFC 7232 2.3.
for i := start + 1; i < len(s); i++ {
c := s[i]
switch {
// Character values allowed in ETags.
case c == 0x21 || c >= 0x23 && c <= 0x7E || c >= 0x80:
case c == '"':
return s[:i+1], s[i+1:]
default:
return "", ""
}
}
return "", ""
}

// etagWeakMatch reports whether a and b match using weak ETag comparison.
func etagWeakMatch(a, b string) bool {
return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/")
}

// getEtag generates an ETag value based on an HTTP Request, a CID and a response
// format. This function DOES NOT generate ETags for CARs or IPNS Records.
func getEtag(r *http.Request, cid cid.Cid, responseFormat string) string {
Expand Down Expand Up @@ -776,6 +748,13 @@
return ImmutablePath{}, false
}

// If the error is not an IPLD traversal error then we should not be looking for _redirects or legacy 404s
if !isErrNotFound(err) {
err = fmt.Errorf("failed to resolve %s: %w", debugStr(contentPath.String()), err)
i.webError(w, r, err, http.StatusInternalServerError)
return ImmutablePath{}, false
}

Check warning on line 756 in gateway/handler.go

View check run for this annotation

Codecov / codecov/patch

gateway/handler.go#L753-L756

Added lines #L753 - L756 were not covered by tests

// If we have origin isolation (subdomain gw, DNSLink website),
// and response type is UnixFS (default for website hosting)
// we can leverage the presence of an _redirects file and apply rules defined there.
Expand Down
12 changes: 11 additions & 1 deletion gateway/handler_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,19 @@
w.Header().Set("Content-Type", rawResponseFormat)
w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^)

sz, err := data.Size()
if err != nil {
i.handleRequestErrors(w, r, rq.contentPath, err)
return false
}

Check warning on line 45 in gateway/handler_block.go

View check run for this annotation

Codecov / codecov/patch

gateway/handler_block.go#L43-L45

Added lines #L43 - L45 were not covered by tests

if !i.seekToStartOfFirstRange(w, r, data) {
return false
}

Check warning on line 49 in gateway/handler_block.go

View check run for this annotation

Codecov / codecov/patch

gateway/handler_block.go#L48-L49

Added lines #L48 - L49 were not covered by tests

// ServeContent will take care of
// If-None-Match+Etag, Content-Length and range requests
_, dataSent, _ := serveContent(w, r, name, modtime, data)
_, dataSent, _ := serveContent(w, r, modtime, sz, data)

if dataSent {
// Update metrics
Expand Down
Loading
Loading