Skip to content

Commit

Permalink
cache restructured; custom cache-enforcing transport
Browse files Browse the repository at this point in the history
  • Loading branch information
shepik committed May 6, 2015
1 parent 4d85a63 commit 781938a
Showing 1 changed file with 333 additions and 0 deletions.
333 changes: 333 additions & 0 deletions transport.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,333 @@
// Copyright 2013 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package imageproxy provides an image proxy server. For typical use of
// creating and using a Proxy, see cmd/imageproxy/main.go.
package imageproxy // import "willnorris.com/go/imageproxy"

import (
"bufio"
"bytes"
"fmt"
"net/http"
"net/http/httputil"
"strings"
"errors"
"time"
)


const (
stale = iota
fresh
transparent
// XFromCache is the header added to responses that are returned from the cache
XFromCache = "X-From-Cache"
)
// cacheKey returns the cache key for req.
func cacheKey(req *http.Request) string {
return req.URL.String()
}
// CachedResponse returns the cached http.Response for req if present, and nil
// otherwise.
func CachedResponse(c Cache, req *http.Request) (resp *http.Response, err error) {
cachedVal, ok := c.Get(cacheKey(req))
if !ok {
return
}

b := bytes.NewBuffer(cachedVal)
return http.ReadResponse(bufio.NewReader(b), req)
}


// Transport is an implementation of http.RoundTripper that will return values from a cache
// where possible (avoiding a network request) and will additionally add validators (etag/if-modified-since)
// to repeated requests allowing servers to return 304 / Not Modified
type Transport struct {
// The RoundTripper interface actually used to make requests
// If nil, http.DefaultTransport is used
Transport http.RoundTripper
Cache Cache
// If true, responses returned from the cache will be given an extra header, X-From-Cache
MarkCachedResponses bool
}

// Client returns an *http.Client that caches responses.
func (t *Transport) Client() *http.Client {
return &http.Client{Transport: t}
}

// varyMatches will return false unless all of the cached values for the headers listed in Vary
// match the new request
func varyMatches(cachedResp *http.Response, req *http.Request) bool {
for _, header := range headerAllCommaSepValues(cachedResp.Header, "vary") {
header = http.CanonicalHeaderKey(header)
if header != "" && req.Header.Get(header) != cachedResp.Header.Get("X-Varied-"+header) {
return false
}
}
return true
}

// RoundTrip takes a Request and returns a Response
//
// If there is a fresh Response already in cache, then it will be returned without connecting to
// the server.
//
// If there is a stale Response, then any validators it contains will be set on the new request
// to give the server a chance to respond with NotModified. If this happens, then the cached Response
// will be returned.
func (t *Transport) RoundTrip(req *http.Request) (resp *http.Response, err error) {
req = cloneRequest(req)
cacheKey := cacheKey(req)
cacheableMethod := req.Method == "GET" || req.Method == "HEAD"
var cachedResp *http.Response
if cacheableMethod {
cachedResp, err = CachedResponse(t.Cache, req)
} else {
// Need to invalidate an existing value
t.Cache.Delete(cacheKey)
}

transport := t.Transport
if transport == nil {
transport = http.DefaultTransport
}

if cachedResp != nil && err == nil && cacheableMethod && req.Header.Get("range") == "" {
if t.MarkCachedResponses {
cachedResp.Header.Set(XFromCache, "1")
}

if varyMatches(cachedResp, req) {
// Can only use cached value if the new request doesn't Vary significantly
freshness := getFreshness(cachedResp.Header, req.Header)
if freshness == fresh {
return cachedResp, nil
}

if freshness == stale {
// Add validators if caller hasn't already done so
etag := cachedResp.Header.Get("etag")
if etag != "" && req.Header.Get("etag") == "" {
req.Header.Set("if-none-match", etag)
}
lastModified := cachedResp.Header.Get("last-modified")
if lastModified != "" && req.Header.Get("last-modified") == "" {
req.Header.Set("if-modified-since", lastModified)
}
}
}

resp, err = transport.RoundTrip(req)
if err == nil && req.Method == "GET" && resp.StatusCode == http.StatusNotModified {
// Replace the 304 response with the one from cache, but update with some new headers
endToEndHeaders := getEndToEndHeaders(resp.Header)
for _, header := range endToEndHeaders {
cachedResp.Header[header] = resp.Header[header]
}
cachedResp.Status = fmt.Sprintf("%d %s", http.StatusOK, http.StatusText(http.StatusOK))
cachedResp.StatusCode = http.StatusOK

resp = cachedResp
} else {
if err != nil || resp.StatusCode != http.StatusOK {
t.Cache.Delete(cacheKey)
}
if err != nil {
return nil, err
}
}
} else {
reqCacheControl := parseCacheControl(req.Header)
if _, ok := reqCacheControl["only-if-cached"]; ok {
resp = newGatewayTimeoutResponse(req)
} else {
resp, err = transport.RoundTrip(req)
if err != nil {
return nil, err
}
}
}

reqCacheControl := parseCacheControl(req.Header)
respCacheControl := parseCacheControl(resp.Header)

if canStore(reqCacheControl, respCacheControl) {
for _, varyKey := range headerAllCommaSepValues(resp.Header, "vary") {
varyKey = http.CanonicalHeaderKey(varyKey)
fakeHeader := "X-Varied-" + varyKey
reqValue := req.Header.Get(varyKey)
if reqValue != "" {
resp.Header.Set(fakeHeader, reqValue)
}
}
respBytes, err := httputil.DumpResponse(resp, true)
if err == nil {
t.Cache.Set(cacheKey, respBytes)
}
} else {
t.Cache.Delete(cacheKey)
}
return resp, nil
}

// ErrNoDateHeader indicates that the HTTP headers contained no Date header.
var ErrNoDateHeader = errors.New("no Date header")

// Date parses and returns the value of the Date header.
func Date(respHeaders http.Header) (date time.Time, err error) {
dateHeader := respHeaders.Get("date")
if dateHeader == "" {
err = ErrNoDateHeader
return
}

return time.Parse(time.RFC1123, dateHeader)
}

type realClock struct{}

func (c *realClock) since(d time.Time) time.Duration {
return time.Since(d)
}

type timer interface {
since(d time.Time) time.Duration
}

var clock timer = &realClock{}

// getFreshness will return one of fresh/stale/transparent based on the cache-control
// values of the request and the response
//
// fresh indicates the response can be returned
// stale indicates that the response needs validating before it is returned
// transparent indicates the response should not be used to fulfil the request
//
// Because this is only a private cache, 'public' and 'private' in cache-control aren't
// signficant. Similarly, smax-age isn't used.
func getFreshness(respHeaders, reqHeaders http.Header) (freshness int) {
date, err := Date(respHeaders)
if err != nil {
return stale
}
currentAge := clock.since(date)
if currentAge > time.Duration(86400*14) * time.Second {
return stale
}
return fresh
}

func getEndToEndHeaders(respHeaders http.Header) []string {
// These headers are always hop-by-hop
hopByHopHeaders := map[string]struct{}{
"Connection": struct{}{},
"Keep-Alive": struct{}{},
"Proxy-Authenticate": struct{}{},
"Proxy-Authorization": struct{}{},
"Te": struct{}{},
"Trailers": struct{}{},
"Transfer-Encoding": struct{}{},
"Upgrade": struct{}{},
}

for _, extra := range strings.Split(respHeaders.Get("connection"), ",") {
// any header listed in connection, if present, is also considered hop-by-hop
if strings.Trim(extra, " ") != "" {
hopByHopHeaders[http.CanonicalHeaderKey(extra)] = struct{}{}
}
}
endToEndHeaders := []string{}
for respHeader, _ := range respHeaders {
if _, ok := hopByHopHeaders[respHeader]; !ok {
endToEndHeaders = append(endToEndHeaders, respHeader)
}
}
return endToEndHeaders
}

func canStore(reqCacheControl, respCacheControl cacheControl) (canStore bool) {
if _, ok := respCacheControl["no-store"]; ok {
return false
}
if _, ok := reqCacheControl["no-store"]; ok {
return false
}
return true
}

func newGatewayTimeoutResponse(req *http.Request) *http.Response {
var braw bytes.Buffer
braw.WriteString("HTTP/1.1 504 Gateway Timeout\r\n\r\n")
resp, err := http.ReadResponse(bufio.NewReader(&braw), req)
if err != nil {
panic(err)
}
return resp
}

// cloneRequest returns a clone of the provided *http.Request.
// The clone is a shallow copy of the struct and its Header map.
// (This function copyright goauth2 authors: https://code.google.com/p/goauth2)
func cloneRequest(r *http.Request) *http.Request {
// shallow copy of the struct
r2 := new(http.Request)
*r2 = *r
// deep copy of the Header
r2.Header = make(http.Header)
for k, s := range r.Header {
r2.Header[k] = s
}
return r2
}

type cacheControl map[string]string

func parseCacheControl(headers http.Header) cacheControl {
cc := cacheControl{}
ccHeader := headers.Get("Cache-Control")
for _, part := range strings.Split(ccHeader, ",") {
part = strings.Trim(part, " ")
if part == "" {
continue
}
if strings.ContainsRune(part, '=') {
keyval := strings.Split(part, "=")
cc[strings.Trim(keyval[0], " ")] = strings.Trim(keyval[1], ",")
} else {
cc[part] = ""
}
}
return cc
}

// headerAllCommaSepValues returns all comma-separated values (each
// with whitespace trimmed) for header name in headers. According to
// Section 4.2 of the HTTP/1.1 spec
// (http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2),
// values from multiple occurrences of a header should be concatenated, if
// the header's value is a comma-separated list.
func headerAllCommaSepValues(headers http.Header, name string) []string {
var vals []string
for _, val := range headers[http.CanonicalHeaderKey(name)] {
fields := strings.Split(val, ",")
for i, f := range fields {
fields[i] = strings.TrimSpace(f)
}
vals = append(vals, fields...)
}
return vals
}

0 comments on commit 781938a

Please sign in to comment.