1const { Request, Response } = require('minipass-fetch') 2const { Minipass } = require('minipass') 3const MinipassFlush = require('minipass-flush') 4const cacache = require('cacache') 5const url = require('url') 6 7const CachingMinipassPipeline = require('../pipeline.js') 8const CachePolicy = require('./policy.js') 9const cacheKey = require('./key.js') 10const remote = require('../remote.js') 11 12const hasOwnProperty = (obj, prop) => Object.prototype.hasOwnProperty.call(obj, prop) 13 14// allow list for request headers that will be written to the cache index 15// note: we will also store any request headers 16// that are named in a response's vary header 17const KEEP_REQUEST_HEADERS = [ 18 'accept-charset', 19 'accept-encoding', 20 'accept-language', 21 'accept', 22 'cache-control', 23] 24 25// allow list for response headers that will be written to the cache index 26// note: we must not store the real response's age header, or when we load 27// a cache policy based on the metadata it will think the cached response 28// is always stale 29const KEEP_RESPONSE_HEADERS = [ 30 'cache-control', 31 'content-encoding', 32 'content-language', 33 'content-type', 34 'date', 35 'etag', 36 'expires', 37 'last-modified', 38 'link', 39 'location', 40 'pragma', 41 'vary', 42] 43 44// return an object containing all metadata to be written to the index 45const getMetadata = (request, response, options) => { 46 const metadata = { 47 time: Date.now(), 48 url: request.url, 49 reqHeaders: {}, 50 resHeaders: {}, 51 52 // options on which we must match the request and vary the response 53 options: { 54 compress: options.compress != null ? options.compress : request.compress, 55 }, 56 } 57 58 // only save the status if it's not a 200 or 304 59 if (response.status !== 200 && response.status !== 304) { 60 metadata.status = response.status 61 } 62 63 for (const name of KEEP_REQUEST_HEADERS) { 64 if (request.headers.has(name)) { 65 metadata.reqHeaders[name] = request.headers.get(name) 66 } 67 } 68 69 // if the request's host header differs from the host in the url 70 // we need to keep it, otherwise it's just noise and we ignore it 71 const host = request.headers.get('host') 72 const parsedUrl = new url.URL(request.url) 73 if (host && parsedUrl.host !== host) { 74 metadata.reqHeaders.host = host 75 } 76 77 // if the response has a vary header, make sure 78 // we store the relevant request headers too 79 if (response.headers.has('vary')) { 80 const vary = response.headers.get('vary') 81 // a vary of "*" means every header causes a different response. 82 // in that scenario, we do not include any additional headers 83 // as the freshness check will always fail anyway and we don't 84 // want to bloat the cache indexes 85 if (vary !== '*') { 86 // copy any other request headers that will vary the response 87 const varyHeaders = vary.trim().toLowerCase().split(/\s*,\s*/) 88 for (const name of varyHeaders) { 89 if (request.headers.has(name)) { 90 metadata.reqHeaders[name] = request.headers.get(name) 91 } 92 } 93 } 94 } 95 96 for (const name of KEEP_RESPONSE_HEADERS) { 97 if (response.headers.has(name)) { 98 metadata.resHeaders[name] = response.headers.get(name) 99 } 100 } 101 102 for (const name of options.cacheAdditionalHeaders) { 103 if (response.headers.has(name)) { 104 metadata.resHeaders[name] = response.headers.get(name) 105 } 106 } 107 108 return metadata 109} 110 111// symbols used to hide objects that may be lazily evaluated in a getter 112const _request = Symbol('request') 113const _response = Symbol('response') 114const _policy = Symbol('policy') 115 116class CacheEntry { 117 constructor ({ entry, request, response, options }) { 118 if (entry) { 119 this.key = entry.key 120 this.entry = entry 121 // previous versions of this module didn't write an explicit timestamp in 122 // the metadata, so fall back to the entry's timestamp. we can't use the 123 // entry timestamp to determine staleness because cacache will update it 124 // when it verifies its data 125 this.entry.metadata.time = this.entry.metadata.time || this.entry.time 126 } else { 127 this.key = cacheKey(request) 128 } 129 130 this.options = options 131 132 // these properties are behind getters that lazily evaluate 133 this[_request] = request 134 this[_response] = response 135 this[_policy] = null 136 } 137 138 // returns a CacheEntry instance that satisfies the given request 139 // or undefined if no existing entry satisfies 140 static async find (request, options) { 141 try { 142 // compacts the index and returns an array of unique entries 143 var matches = await cacache.index.compact(options.cachePath, cacheKey(request), (A, B) => { 144 const entryA = new CacheEntry({ entry: A, options }) 145 const entryB = new CacheEntry({ entry: B, options }) 146 return entryA.policy.satisfies(entryB.request) 147 }, { 148 validateEntry: (entry) => { 149 // clean out entries with a buggy content-encoding value 150 if (entry.metadata && 151 entry.metadata.resHeaders && 152 entry.metadata.resHeaders['content-encoding'] === null) { 153 return false 154 } 155 156 // if an integrity is null, it needs to have a status specified 157 if (entry.integrity === null) { 158 return !!(entry.metadata && entry.metadata.status) 159 } 160 161 return true 162 }, 163 }) 164 } catch (err) { 165 // if the compact request fails, ignore the error and return 166 return 167 } 168 169 // a cache mode of 'reload' means to behave as though we have no cache 170 // on the way to the network. return undefined to allow cacheFetch to 171 // create a brand new request no matter what. 172 if (options.cache === 'reload') { 173 return 174 } 175 176 // find the specific entry that satisfies the request 177 let match 178 for (const entry of matches) { 179 const _entry = new CacheEntry({ 180 entry, 181 options, 182 }) 183 184 if (_entry.policy.satisfies(request)) { 185 match = _entry 186 break 187 } 188 } 189 190 return match 191 } 192 193 // if the user made a PUT/POST/PATCH then we invalidate our 194 // cache for the same url by deleting the index entirely 195 static async invalidate (request, options) { 196 const key = cacheKey(request) 197 try { 198 await cacache.rm.entry(options.cachePath, key, { removeFully: true }) 199 } catch (err) { 200 // ignore errors 201 } 202 } 203 204 get request () { 205 if (!this[_request]) { 206 this[_request] = new Request(this.entry.metadata.url, { 207 method: 'GET', 208 headers: this.entry.metadata.reqHeaders, 209 ...this.entry.metadata.options, 210 }) 211 } 212 213 return this[_request] 214 } 215 216 get response () { 217 if (!this[_response]) { 218 this[_response] = new Response(null, { 219 url: this.entry.metadata.url, 220 counter: this.options.counter, 221 status: this.entry.metadata.status || 200, 222 headers: { 223 ...this.entry.metadata.resHeaders, 224 'content-length': this.entry.size, 225 }, 226 }) 227 } 228 229 return this[_response] 230 } 231 232 get policy () { 233 if (!this[_policy]) { 234 this[_policy] = new CachePolicy({ 235 entry: this.entry, 236 request: this.request, 237 response: this.response, 238 options: this.options, 239 }) 240 } 241 242 return this[_policy] 243 } 244 245 // wraps the response in a pipeline that stores the data 246 // in the cache while the user consumes it 247 async store (status) { 248 // if we got a status other than 200, 301, or 308, 249 // or the CachePolicy forbid storage, append the 250 // cache status header and return it untouched 251 if ( 252 this.request.method !== 'GET' || 253 ![200, 301, 308].includes(this.response.status) || 254 !this.policy.storable() 255 ) { 256 this.response.headers.set('x-local-cache-status', 'skip') 257 return this.response 258 } 259 260 const size = this.response.headers.get('content-length') 261 const cacheOpts = { 262 algorithms: this.options.algorithms, 263 metadata: getMetadata(this.request, this.response, this.options), 264 size, 265 integrity: this.options.integrity, 266 integrityEmitter: this.response.body.hasIntegrityEmitter && this.response.body, 267 } 268 269 let body = null 270 // we only set a body if the status is a 200, redirects are 271 // stored as metadata only 272 if (this.response.status === 200) { 273 let cacheWriteResolve, cacheWriteReject 274 const cacheWritePromise = new Promise((resolve, reject) => { 275 cacheWriteResolve = resolve 276 cacheWriteReject = reject 277 }) 278 279 body = new CachingMinipassPipeline({ events: ['integrity', 'size'] }, new MinipassFlush({ 280 flush () { 281 return cacheWritePromise 282 }, 283 })) 284 // this is always true since if we aren't reusing the one from the remote fetch, we 285 // are using the one from cacache 286 body.hasIntegrityEmitter = true 287 288 const onResume = () => { 289 const tee = new Minipass() 290 const cacheStream = cacache.put.stream(this.options.cachePath, this.key, cacheOpts) 291 // re-emit the integrity and size events on our new response body so they can be reused 292 cacheStream.on('integrity', i => body.emit('integrity', i)) 293 cacheStream.on('size', s => body.emit('size', s)) 294 // stick a flag on here so downstream users will know if they can expect integrity events 295 tee.pipe(cacheStream) 296 // TODO if the cache write fails, log a warning but return the response anyway 297 // eslint-disable-next-line promise/catch-or-return 298 cacheStream.promise().then(cacheWriteResolve, cacheWriteReject) 299 body.unshift(tee) 300 body.unshift(this.response.body) 301 } 302 303 body.once('resume', onResume) 304 body.once('end', () => body.removeListener('resume', onResume)) 305 } else { 306 await cacache.index.insert(this.options.cachePath, this.key, null, cacheOpts) 307 } 308 309 // note: we do not set the x-local-cache-hash header because we do not know 310 // the hash value until after the write to the cache completes, which doesn't 311 // happen until after the response has been sent and it's too late to write 312 // the header anyway 313 this.response.headers.set('x-local-cache', encodeURIComponent(this.options.cachePath)) 314 this.response.headers.set('x-local-cache-key', encodeURIComponent(this.key)) 315 this.response.headers.set('x-local-cache-mode', 'stream') 316 this.response.headers.set('x-local-cache-status', status) 317 this.response.headers.set('x-local-cache-time', new Date().toISOString()) 318 const newResponse = new Response(body, { 319 url: this.response.url, 320 status: this.response.status, 321 headers: this.response.headers, 322 counter: this.options.counter, 323 }) 324 return newResponse 325 } 326 327 // use the cached data to create a response and return it 328 async respond (method, options, status) { 329 let response 330 if (method === 'HEAD' || [301, 308].includes(this.response.status)) { 331 // if the request is a HEAD, or the response is a redirect, 332 // then the metadata in the entry already includes everything 333 // we need to build a response 334 response = this.response 335 } else { 336 // we're responding with a full cached response, so create a body 337 // that reads from cacache and attach it to a new Response 338 const body = new Minipass() 339 const headers = { ...this.policy.responseHeaders() } 340 341 const onResume = () => { 342 const cacheStream = cacache.get.stream.byDigest( 343 this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize } 344 ) 345 cacheStream.on('error', async (err) => { 346 cacheStream.pause() 347 if (err.code === 'EINTEGRITY') { 348 await cacache.rm.content( 349 this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize } 350 ) 351 } 352 if (err.code === 'ENOENT' || err.code === 'EINTEGRITY') { 353 await CacheEntry.invalidate(this.request, this.options) 354 } 355 body.emit('error', err) 356 cacheStream.resume() 357 }) 358 // emit the integrity and size events based on our metadata so we're consistent 359 body.emit('integrity', this.entry.integrity) 360 body.emit('size', Number(headers['content-length'])) 361 cacheStream.pipe(body) 362 } 363 364 body.once('resume', onResume) 365 body.once('end', () => body.removeListener('resume', onResume)) 366 response = new Response(body, { 367 url: this.entry.metadata.url, 368 counter: options.counter, 369 status: 200, 370 headers, 371 }) 372 } 373 374 response.headers.set('x-local-cache', encodeURIComponent(this.options.cachePath)) 375 response.headers.set('x-local-cache-hash', encodeURIComponent(this.entry.integrity)) 376 response.headers.set('x-local-cache-key', encodeURIComponent(this.key)) 377 response.headers.set('x-local-cache-mode', 'stream') 378 response.headers.set('x-local-cache-status', status) 379 response.headers.set('x-local-cache-time', new Date(this.entry.metadata.time).toUTCString()) 380 return response 381 } 382 383 // use the provided request along with this cache entry to 384 // revalidate the stored response. returns a response, either 385 // from the cache or from the update 386 async revalidate (request, options) { 387 const revalidateRequest = new Request(request, { 388 headers: this.policy.revalidationHeaders(request), 389 }) 390 391 try { 392 // NOTE: be sure to remove the headers property from the 393 // user supplied options, since we have already defined 394 // them on the new request object. if they're still in the 395 // options then those will overwrite the ones from the policy 396 var response = await remote(revalidateRequest, { 397 ...options, 398 headers: undefined, 399 }) 400 } catch (err) { 401 // if the network fetch fails, return the stale 402 // cached response unless it has a cache-control 403 // of 'must-revalidate' 404 if (!this.policy.mustRevalidate) { 405 return this.respond(request.method, options, 'stale') 406 } 407 408 throw err 409 } 410 411 if (this.policy.revalidated(revalidateRequest, response)) { 412 // we got a 304, write a new index to the cache and respond from cache 413 const metadata = getMetadata(request, response, options) 414 // 304 responses do not include headers that are specific to the response data 415 // since they do not include a body, so we copy values for headers that were 416 // in the old cache entry to the new one, if the new metadata does not already 417 // include that header 418 for (const name of KEEP_RESPONSE_HEADERS) { 419 if ( 420 !hasOwnProperty(metadata.resHeaders, name) && 421 hasOwnProperty(this.entry.metadata.resHeaders, name) 422 ) { 423 metadata.resHeaders[name] = this.entry.metadata.resHeaders[name] 424 } 425 } 426 427 for (const name of options.cacheAdditionalHeaders) { 428 const inMeta = hasOwnProperty(metadata.resHeaders, name) 429 const inEntry = hasOwnProperty(this.entry.metadata.resHeaders, name) 430 const inPolicy = hasOwnProperty(this.policy.response.headers, name) 431 432 // if the header is in the existing entry, but it is not in the metadata 433 // then we need to write it to the metadata as this will refresh the on-disk cache 434 if (!inMeta && inEntry) { 435 metadata.resHeaders[name] = this.entry.metadata.resHeaders[name] 436 } 437 // if the header is in the metadata, but not in the policy, then we need to set 438 // it in the policy so that it's included in the immediate response. future 439 // responses will load a new cache entry, so we don't need to change that 440 if (!inPolicy && inMeta) { 441 this.policy.response.headers[name] = metadata.resHeaders[name] 442 } 443 } 444 445 try { 446 await cacache.index.insert(options.cachePath, this.key, this.entry.integrity, { 447 size: this.entry.size, 448 metadata, 449 }) 450 } catch (err) { 451 // if updating the cache index fails, we ignore it and 452 // respond anyway 453 } 454 return this.respond(request.method, options, 'revalidated') 455 } 456 457 // if we got a modified response, create a new entry based on it 458 const newEntry = new CacheEntry({ 459 request, 460 response, 461 options, 462 }) 463 464 // respond with the new entry while writing it to the cache 465 return newEntry.store('updated') 466 } 467} 468 469module.exports = CacheEntry 470