From 7d55b49c58506ae32039973880fb114bc4b0f3ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Wed, 29 Mar 2017 15:16:02 -0700 Subject: [PATCH] feat(api): initial implementation -- can make and cache requests BREAKING CHANGE: actual api implemented --- cache.js | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++ index.js | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++ package.json | 3 +- 3 files changed, 327 insertions(+), 1 deletion(-) create mode 100644 cache.js create mode 100644 index.js diff --git a/cache.js b/cache.js new file mode 100644 index 0000000..d7b6c89 --- /dev/null +++ b/cache.js @@ -0,0 +1,164 @@ +'use strict' + +const cacache = require('cacache') +const fetch = require('node-fetch') +const fs = require('fs') +const pipe = require('mississippi').pipe +const through = require('mississippi').through +const to = require('mississippi').to + +const MAX_MEM_SIZE = 5 * 1024 * 1024 // 5MB + +function cacheKey (req) { + return `make-fetch-happen:request-cache:${ + (req.method || 'GET').toUpperCase() + }:${ + req.headers && req.headers.get('accept-encoding') || '*' + }:${ + req.uri + }` +} + +// This is a cacache-based implementation of the Cache standard, +// using node-fetch. +// docs: https://developer.mozilla.org/en-US/docs/Web/API/Cache +// +module.exports = class Cache { + constructor (path, opts) { + this._cachePath = path + this._cacheOpts = opts + this.Promise = opts.Promise || Promise + } + + // Returns a Promise that resolves to the response associated with the first + // matching request in the Cache object. + match (request, opts) { + // TODO - opts.ignoreSearch, opts.ignoreMethod, opts.ignoreVary + request = new fetch.Request(request) + return cacache.get.info( + this._cachePath, + cacheKey(request), + this._cacheOpts + ).then(info => { + if (info) { + // TODO - if it's small enough, slurp into memory + return new this.Promise((resolve, reject) => { + fs.stat(info.path, (err, stat) => { + if (err) { + return reject(err) + } else { + return resolve(stat) + } + }) + }).then(stat => { + // meh + this._cacheOpts.hashAlgorithm = info.hashAlgorithm + + let body + if (stat.size > MAX_MEM_SIZE) { + body = cacache.get.stream.byDigest( + this._cachePath, + info.digest, + this._cacheOpts + ) + } else { + // cacache is much faster at bulk reads + body = through() + cacache.get.byDigest( + this._cachePath, + info.digest, + this._cacheOpts + ).then(data => { + body.write(data, () => { + body.end() + }) + }, err => body.emit('error', err)) + } + return new fetch.Response(body, { + url: request.url, + headers: info.metadata.headers, + status: 200, + size: stat.size + }) + }).catch({code: 'ENOENT'}, () => { + return null + }) + } + }) + } + + // Returns a Promise that resolves to an array of all matching requests in + // the Cache object. + matchAll (request, options) { + return this.Promise.reject(new Error('Cache.matchAll not implemented')) + } + + // Takes a URL, retrieves it and adds the resulting response object to the + // given cache. This is fuctionally equivalent to calling fetch(), then using + // Cache.put() to add the results to the cache. + add (request) { + return this.Promise.reject(new Error('Cache.add not implemented')) + } + + // Takes an array of URLs, retrieves them, and adds the resulting response + // objects to the given cache. + addAll (requests) { + return this.Promise.reject(new Error('Cache.addAll not implemented')) + } + + // Takes both a request and its response and adds it to the given cache. + put (request, response) { + const req = new fetch.Request(request) + const size = response.headers.get('content-length') + this._cacheOpts.metadata = { + headers: response.headers.raw() + } + if (false && size && size < MAX_MEM_SIZE) { + return response.buffer().then(data => { + return cacache.put( + this._cachePath, + cacheKey(req), + data, + this._cacheOpts + ) + }).then(() => response) + } else { + const stream = cacache.put.stream( + this._cachePath, + cacheKey(req.url), + this._cacheOpts + ) + const oldBody = response.body + const newBody = through() + response.body = newBody + oldBody.once('error', err => newBody.emit('error', err)) + newBody.once('error', err => oldBody.emit('error', err)) + stream.once('error', err => newBody.emit('error', err)) + pipe(oldBody, to((chunk, enc, cb) => { + stream.write(chunk, enc, () => { + newBody.write(chunk, enc, cb) + }) + }, done => { + stream.end(() => newBody.end(done)) + })) + return response + } + } + + // Finds the Cache entry whose key is the request, and if found, deletes the + // Cache entry and returns a Promise that resolves to true. If no Cache entry + // is found, it returns false. + ['delete'] (request, options) { + const req = new fetch.Request(request) + return cacache.rm.entry( + this._cachePath, + cacheKey(req.url), + this._cacheOpts + // TODO - true/false + ).then(() => false) + } + + keys (request, options) { + return cacache.ls(this._cachePath).then(entries => Object.keys(entries)) + } +} diff --git a/index.js b/index.js new file mode 100644 index 0000000..da9824d --- /dev/null +++ b/index.js @@ -0,0 +1,161 @@ +'use strict' + +let Cache +const fetch = require('node-fetch') +const http = require('http') +const https = require('https') +let ProxyAgent +const pkg = require('./package.json') +const url = require('url') + +// The "cache mode" options are really confusing, and this module does +// its best to recreate them: +// https://fetch.spec.whatwg.org/#http-network-or-cache-fetch +module.exports = cachingFetch +function cachingFetch (uri, opts) { + opts = opts || {} + opts.cache = opts.cache || 'default' + if (opts.cache === 'default' && isConditional(opts.headers || {})) { + // If header list contains `If-Modified-Since`, `If-None-Match`, + // `If-Unmodified-Since`, `If-Match`, or `If-Range`, fetch will set cache + // mode to "no-store" if it is "default". + opts.cache = 'no-store' + } + let res + if ( + opts.cachePath && !( + opts.cache === 'no-store' || + opts.cache === 'reload' + ) + ) { + if (!Cache) { Cache = require('./cache') } + res = new Cache(opts.cachePath, opts).match(uri) + } + return fetch.Promise.resolve(res).then(res => { + if (res && opts.cache === 'default' && !isStale(res)) { + return res + } else if (res && opts.cache === 'default' || opts.cache === 'no-cache') { + return condFetch(uri, res, opts) + } else if (!res && opts.cache === 'only-if-cached') { + throw new Error(`request to ${uri} failed: cache mode is 'only-if-cached' but no cached response available.`) + } else { + // Missing cache entry, stale default, reload, no-store + return remoteFetch(uri, opts) + } + }) +} + +// https://tools.ietf.org/html/rfc7234#section-4.2 +function isStale (res) { + if (!res) { return null } + const maxAge = freshnessLifetime(res) + const currentAge = (new Date() - new Date(res.headers.get('Date') || new Date())) / 1000 + return maxAge <= currentAge +} + +function freshnessLifetime (res) { + const cacheControl = res.headers.get('Cache-Control') || '' + const maxAgeMatch = cacheControl.match(/(?:s\-maxage|max\-age):\s*(\d+)/) + if (maxAgeMatch) { + return +maxAgeMatch[1] + } else if (res.headers.get('Expires')) { + const expireDate = new Date(res.headers.get('Expires')) + const resDate = new Date(res.headers.get('Date') || new Date()) + return (expireDate - resDate) / 1000 + } else { + return heuristicFreshness(res) + } +} + +// https://tools.ietf.org/html/rfc7234#section-4.2.2 +function heuristicFreshness (res) { + const lastMod = res.headers.get('Last-Modified') + const date = new Date(res.headers.get('Date') || new Date()) + !res.headers.get('Warning') && res.headers.set('Warning', 113) + if (lastMod) { + const age = (date - new Date(lastMod)) / 1000 + return Math.min(age * 0.1, 300) + } else { + return 300 * 1000 + } +} + +function condFetch (uri, res, opts) { + const newHeaders = {} + Object.keys(opts.headers || {}).forEach(k => { + newHeaders[k] = opts.headers[k] + }) + if (res.headers.get('etag')) { + const condHeader = opts.method && opts.method.toLowerCase() !== 'get' + ? 'if-match' + : 'if-none-match' + newHeaders[condHeader] = res.headers.get('etag') + } + if (res.headers.get('last-modified')) { + const condHeader = opts.method && opts.method.toLowerCase() !== 'get' + ? 'if-unmodified-since' + : 'if-modified-since' + newHeaders[condHeader] = res.headers.get('last-modified') + } + opts.headers = newHeaders + return remoteFetch(uri, opts).then(condRes => { + if (condRes.status === 304) { + // TODO - update cache last-modified? + return res + } else { + return condRes + } + }) +} + +function remoteFetch (uri, opts) { + const headers = { + 'connection': 'keep-alive', + 'user-agent': opts.userAgent || `${pkg.name}/${pkg.version}` + } + if (opts.headers) { + Object.keys(opts.headers).forEach(k => { + headers[k] = opts.headers[k] + }) + } + const agentOpts = url.parse(opts.proxy || uri) + agentOpts.ca = opts.ca + agentOpts.cert = opts.cert + agentOpts.ciphers = opts.ciphers + if (opts.proxy && !ProxyAgent) { + ProxyAgent = require('proxy-agent') + } + const agent = opts.agent || (opts.proxy + ? new ProxyAgent(agentOpts) + : ( + url.parse(uri).protocol === 'https:' + ? https.globalAgent + : http.globalAgent + )) + const req = new fetch.Request(uri, { + agent, + compress: opts.compress == null || opts.compress, + headers, + redirect: opts.redirect || 'follow' + }) + return fetch(req).then(res => { + if (!opts.cachePath || opts.cache === 'no-store' || res.status > 299) { + return res + } else { + return new Cache(opts.cachePath, opts).put(req, res) + } + }) +} + +function isConditional (headers) { + return Object.keys(headers).some(h => { + h = h.toLowerCase() + return ( + h === 'if-modified-since' || + h === 'if-none-match' || + h === 'if-unmodified-since' || + h === 'if-match' || + h === 'if-range' + ) + }) +} diff --git a/package.json b/package.json index c2172fa..973f06b 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,8 @@ "lru-cache": "^4.0.2", "mississippi": "^1.2.0", "node-fetch": "^2.0.0-alpha.3", - "proxy-agent": "^2.0.0" + "proxy-agent": "^2.0.0", + "retry": "^0.10.1" }, "devDependencies": { "mkdirp": "^0.5.1",