diff --git a/transport.go b/transport.go new file mode 100644 index 000000000..38748bed3 --- /dev/null +++ b/transport.go @@ -0,0 +1,333 @@ +// Copyright 2013 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package imageproxy provides an image proxy server. For typical use of +// creating and using a Proxy, see cmd/imageproxy/main.go. +package imageproxy // import "willnorris.com/go/imageproxy" + +import ( + "bufio" + "bytes" + "fmt" + "net/http" + "net/http/httputil" + "strings" + "errors" + "time" +) + + +const ( + stale = iota + fresh + transparent + // XFromCache is the header added to responses that are returned from the cache + XFromCache = "X-From-Cache" +) +// cacheKey returns the cache key for req. +func cacheKey(req *http.Request) string { + return req.URL.String() +} +// CachedResponse returns the cached http.Response for req if present, and nil +// otherwise. +func CachedResponse(c Cache, req *http.Request) (resp *http.Response, err error) { + cachedVal, ok := c.Get(cacheKey(req)) + if !ok { + return + } + + b := bytes.NewBuffer(cachedVal) + return http.ReadResponse(bufio.NewReader(b), req) +} + + +// Transport is an implementation of http.RoundTripper that will return values from a cache +// where possible (avoiding a network request) and will additionally add validators (etag/if-modified-since) +// to repeated requests allowing servers to return 304 / Not Modified +type Transport struct { + // The RoundTripper interface actually used to make requests + // If nil, http.DefaultTransport is used + Transport http.RoundTripper + Cache Cache + // If true, responses returned from the cache will be given an extra header, X-From-Cache + MarkCachedResponses bool +} + +// Client returns an *http.Client that caches responses. +func (t *Transport) Client() *http.Client { + return &http.Client{Transport: t} +} + +// varyMatches will return false unless all of the cached values for the headers listed in Vary +// match the new request +func varyMatches(cachedResp *http.Response, req *http.Request) bool { + for _, header := range headerAllCommaSepValues(cachedResp.Header, "vary") { + header = http.CanonicalHeaderKey(header) + if header != "" && req.Header.Get(header) != cachedResp.Header.Get("X-Varied-"+header) { + return false + } + } + return true +} + +// RoundTrip takes a Request and returns a Response +// +// If there is a fresh Response already in cache, then it will be returned without connecting to +// the server. +// +// If there is a stale Response, then any validators it contains will be set on the new request +// to give the server a chance to respond with NotModified. If this happens, then the cached Response +// will be returned. +func (t *Transport) RoundTrip(req *http.Request) (resp *http.Response, err error) { + req = cloneRequest(req) + cacheKey := cacheKey(req) + cacheableMethod := req.Method == "GET" || req.Method == "HEAD" + var cachedResp *http.Response + if cacheableMethod { + cachedResp, err = CachedResponse(t.Cache, req) + } else { + // Need to invalidate an existing value + t.Cache.Delete(cacheKey) + } + + transport := t.Transport + if transport == nil { + transport = http.DefaultTransport + } + + if cachedResp != nil && err == nil && cacheableMethod && req.Header.Get("range") == "" { + if t.MarkCachedResponses { + cachedResp.Header.Set(XFromCache, "1") + } + + if varyMatches(cachedResp, req) { + // Can only use cached value if the new request doesn't Vary significantly + freshness := getFreshness(cachedResp.Header, req.Header) + if freshness == fresh { + return cachedResp, nil + } + + if freshness == stale { + // Add validators if caller hasn't already done so + etag := cachedResp.Header.Get("etag") + if etag != "" && req.Header.Get("etag") == "" { + req.Header.Set("if-none-match", etag) + } + lastModified := cachedResp.Header.Get("last-modified") + if lastModified != "" && req.Header.Get("last-modified") == "" { + req.Header.Set("if-modified-since", lastModified) + } + } + } + + resp, err = transport.RoundTrip(req) + if err == nil && req.Method == "GET" && resp.StatusCode == http.StatusNotModified { + // Replace the 304 response with the one from cache, but update with some new headers + endToEndHeaders := getEndToEndHeaders(resp.Header) + for _, header := range endToEndHeaders { + cachedResp.Header[header] = resp.Header[header] + } + cachedResp.Status = fmt.Sprintf("%d %s", http.StatusOK, http.StatusText(http.StatusOK)) + cachedResp.StatusCode = http.StatusOK + + resp = cachedResp + } else { + if err != nil || resp.StatusCode != http.StatusOK { + t.Cache.Delete(cacheKey) + } + if err != nil { + return nil, err + } + } + } else { + reqCacheControl := parseCacheControl(req.Header) + if _, ok := reqCacheControl["only-if-cached"]; ok { + resp = newGatewayTimeoutResponse(req) + } else { + resp, err = transport.RoundTrip(req) + if err != nil { + return nil, err + } + } + } + + reqCacheControl := parseCacheControl(req.Header) + respCacheControl := parseCacheControl(resp.Header) + + if canStore(reqCacheControl, respCacheControl) { + for _, varyKey := range headerAllCommaSepValues(resp.Header, "vary") { + varyKey = http.CanonicalHeaderKey(varyKey) + fakeHeader := "X-Varied-" + varyKey + reqValue := req.Header.Get(varyKey) + if reqValue != "" { + resp.Header.Set(fakeHeader, reqValue) + } + } + respBytes, err := httputil.DumpResponse(resp, true) + if err == nil { + t.Cache.Set(cacheKey, respBytes) + } + } else { + t.Cache.Delete(cacheKey) + } + return resp, nil +} + +// ErrNoDateHeader indicates that the HTTP headers contained no Date header. +var ErrNoDateHeader = errors.New("no Date header") + +// Date parses and returns the value of the Date header. +func Date(respHeaders http.Header) (date time.Time, err error) { + dateHeader := respHeaders.Get("date") + if dateHeader == "" { + err = ErrNoDateHeader + return + } + + return time.Parse(time.RFC1123, dateHeader) +} + +type realClock struct{} + +func (c *realClock) since(d time.Time) time.Duration { + return time.Since(d) +} + +type timer interface { + since(d time.Time) time.Duration +} + +var clock timer = &realClock{} + +// getFreshness will return one of fresh/stale/transparent based on the cache-control +// values of the request and the response +// +// fresh indicates the response can be returned +// stale indicates that the response needs validating before it is returned +// transparent indicates the response should not be used to fulfil the request +// +// Because this is only a private cache, 'public' and 'private' in cache-control aren't +// signficant. Similarly, smax-age isn't used. +func getFreshness(respHeaders, reqHeaders http.Header) (freshness int) { + date, err := Date(respHeaders) + if err != nil { + return stale + } + currentAge := clock.since(date) + if currentAge > time.Duration(86400*14) * time.Second { + return stale + } + return fresh +} + +func getEndToEndHeaders(respHeaders http.Header) []string { + // These headers are always hop-by-hop + hopByHopHeaders := map[string]struct{}{ + "Connection": struct{}{}, + "Keep-Alive": struct{}{}, + "Proxy-Authenticate": struct{}{}, + "Proxy-Authorization": struct{}{}, + "Te": struct{}{}, + "Trailers": struct{}{}, + "Transfer-Encoding": struct{}{}, + "Upgrade": struct{}{}, + } + + for _, extra := range strings.Split(respHeaders.Get("connection"), ",") { + // any header listed in connection, if present, is also considered hop-by-hop + if strings.Trim(extra, " ") != "" { + hopByHopHeaders[http.CanonicalHeaderKey(extra)] = struct{}{} + } + } + endToEndHeaders := []string{} + for respHeader, _ := range respHeaders { + if _, ok := hopByHopHeaders[respHeader]; !ok { + endToEndHeaders = append(endToEndHeaders, respHeader) + } + } + return endToEndHeaders +} + +func canStore(reqCacheControl, respCacheControl cacheControl) (canStore bool) { + if _, ok := respCacheControl["no-store"]; ok { + return false + } + if _, ok := reqCacheControl["no-store"]; ok { + return false + } + return true +} + +func newGatewayTimeoutResponse(req *http.Request) *http.Response { + var braw bytes.Buffer + braw.WriteString("HTTP/1.1 504 Gateway Timeout\r\n\r\n") + resp, err := http.ReadResponse(bufio.NewReader(&braw), req) + if err != nil { + panic(err) + } + return resp +} + +// cloneRequest returns a clone of the provided *http.Request. +// The clone is a shallow copy of the struct and its Header map. +// (This function copyright goauth2 authors: https://code.google.com/p/goauth2) +func cloneRequest(r *http.Request) *http.Request { + // shallow copy of the struct + r2 := new(http.Request) + *r2 = *r + // deep copy of the Header + r2.Header = make(http.Header) + for k, s := range r.Header { + r2.Header[k] = s + } + return r2 +} + +type cacheControl map[string]string + +func parseCacheControl(headers http.Header) cacheControl { + cc := cacheControl{} + ccHeader := headers.Get("Cache-Control") + for _, part := range strings.Split(ccHeader, ",") { + part = strings.Trim(part, " ") + if part == "" { + continue + } + if strings.ContainsRune(part, '=') { + keyval := strings.Split(part, "=") + cc[strings.Trim(keyval[0], " ")] = strings.Trim(keyval[1], ",") + } else { + cc[part] = "" + } + } + return cc +} + +// headerAllCommaSepValues returns all comma-separated values (each +// with whitespace trimmed) for header name in headers. According to +// Section 4.2 of the HTTP/1.1 spec +// (http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2), +// values from multiple occurrences of a header should be concatenated, if +// the header's value is a comma-separated list. +func headerAllCommaSepValues(headers http.Header, name string) []string { + var vals []string + for _, val := range headers[http.CanonicalHeaderKey(name)] { + fields := strings.Split(val, ",") + for i, f := range fields { + fields[i] = strings.TrimSpace(f) + } + vals = append(vals, fields...) + } + return vals +}