1'use strict' 2 3const { 4 mkdir, 5 readFile, 6 rm, 7 stat, 8 truncate, 9 writeFile, 10} = require('fs/promises') 11const pMap = require('p-map') 12const contentPath = require('./content/path') 13const fsm = require('fs-minipass') 14const glob = require('./util/glob.js') 15const index = require('./entry-index') 16const path = require('path') 17const ssri = require('ssri') 18 19const hasOwnProperty = (obj, key) => 20 Object.prototype.hasOwnProperty.call(obj, key) 21 22const verifyOpts = (opts) => ({ 23 concurrency: 20, 24 log: { silly () {} }, 25 ...opts, 26}) 27 28module.exports = verify 29 30async function verify (cache, opts) { 31 opts = verifyOpts(opts) 32 opts.log.silly('verify', 'verifying cache at', cache) 33 34 const steps = [ 35 markStartTime, 36 fixPerms, 37 garbageCollect, 38 rebuildIndex, 39 cleanTmp, 40 writeVerifile, 41 markEndTime, 42 ] 43 44 const stats = {} 45 for (const step of steps) { 46 const label = step.name 47 const start = new Date() 48 const s = await step(cache, opts) 49 if (s) { 50 Object.keys(s).forEach((k) => { 51 stats[k] = s[k] 52 }) 53 } 54 const end = new Date() 55 if (!stats.runTime) { 56 stats.runTime = {} 57 } 58 stats.runTime[label] = end - start 59 } 60 stats.runTime.total = stats.endTime - stats.startTime 61 opts.log.silly( 62 'verify', 63 'verification finished for', 64 cache, 65 'in', 66 `${stats.runTime.total}ms` 67 ) 68 return stats 69} 70 71async function markStartTime (cache, opts) { 72 return { startTime: new Date() } 73} 74 75async function markEndTime (cache, opts) { 76 return { endTime: new Date() } 77} 78 79async function fixPerms (cache, opts) { 80 opts.log.silly('verify', 'fixing cache permissions') 81 await mkdir(cache, { recursive: true }) 82 return null 83} 84 85// Implements a naive mark-and-sweep tracing garbage collector. 86// 87// The algorithm is basically as follows: 88// 1. Read (and filter) all index entries ("pointers") 89// 2. Mark each integrity value as "live" 90// 3. Read entire filesystem tree in `content-vX/` dir 91// 4. If content is live, verify its checksum and delete it if it fails 92// 5. If content is not marked as live, rm it. 93// 94async function garbageCollect (cache, opts) { 95 opts.log.silly('verify', 'garbage collecting content') 96 const indexStream = index.lsStream(cache) 97 const liveContent = new Set() 98 indexStream.on('data', (entry) => { 99 if (opts.filter && !opts.filter(entry)) { 100 return 101 } 102 103 // integrity is stringified, re-parse it so we can get each hash 104 const integrity = ssri.parse(entry.integrity) 105 for (const algo in integrity) { 106 liveContent.add(integrity[algo].toString()) 107 } 108 }) 109 await new Promise((resolve, reject) => { 110 indexStream.on('end', resolve).on('error', reject) 111 }) 112 const contentDir = contentPath.contentDir(cache) 113 const files = await glob(path.join(contentDir, '**'), { 114 follow: false, 115 nodir: true, 116 nosort: true, 117 }) 118 const stats = { 119 verifiedContent: 0, 120 reclaimedCount: 0, 121 reclaimedSize: 0, 122 badContentCount: 0, 123 keptSize: 0, 124 } 125 await pMap( 126 files, 127 async (f) => { 128 const split = f.split(/[/\\]/) 129 const digest = split.slice(split.length - 3).join('') 130 const algo = split[split.length - 4] 131 const integrity = ssri.fromHex(digest, algo) 132 if (liveContent.has(integrity.toString())) { 133 const info = await verifyContent(f, integrity) 134 if (!info.valid) { 135 stats.reclaimedCount++ 136 stats.badContentCount++ 137 stats.reclaimedSize += info.size 138 } else { 139 stats.verifiedContent++ 140 stats.keptSize += info.size 141 } 142 } else { 143 // No entries refer to this content. We can delete. 144 stats.reclaimedCount++ 145 const s = await stat(f) 146 await rm(f, { recursive: true, force: true }) 147 stats.reclaimedSize += s.size 148 } 149 return stats 150 }, 151 { concurrency: opts.concurrency } 152 ) 153 return stats 154} 155 156async function verifyContent (filepath, sri) { 157 const contentInfo = {} 158 try { 159 const { size } = await stat(filepath) 160 contentInfo.size = size 161 contentInfo.valid = true 162 await ssri.checkStream(new fsm.ReadStream(filepath), sri) 163 } catch (err) { 164 if (err.code === 'ENOENT') { 165 return { size: 0, valid: false } 166 } 167 if (err.code !== 'EINTEGRITY') { 168 throw err 169 } 170 171 await rm(filepath, { recursive: true, force: true }) 172 contentInfo.valid = false 173 } 174 return contentInfo 175} 176 177async function rebuildIndex (cache, opts) { 178 opts.log.silly('verify', 'rebuilding index') 179 const entries = await index.ls(cache) 180 const stats = { 181 missingContent: 0, 182 rejectedEntries: 0, 183 totalEntries: 0, 184 } 185 const buckets = {} 186 for (const k in entries) { 187 /* istanbul ignore else */ 188 if (hasOwnProperty(entries, k)) { 189 const hashed = index.hashKey(k) 190 const entry = entries[k] 191 const excluded = opts.filter && !opts.filter(entry) 192 excluded && stats.rejectedEntries++ 193 if (buckets[hashed] && !excluded) { 194 buckets[hashed].push(entry) 195 } else if (buckets[hashed] && excluded) { 196 // skip 197 } else if (excluded) { 198 buckets[hashed] = [] 199 buckets[hashed]._path = index.bucketPath(cache, k) 200 } else { 201 buckets[hashed] = [entry] 202 buckets[hashed]._path = index.bucketPath(cache, k) 203 } 204 } 205 } 206 await pMap( 207 Object.keys(buckets), 208 (key) => { 209 return rebuildBucket(cache, buckets[key], stats, opts) 210 }, 211 { concurrency: opts.concurrency } 212 ) 213 return stats 214} 215 216async function rebuildBucket (cache, bucket, stats, opts) { 217 await truncate(bucket._path) 218 // This needs to be serialized because cacache explicitly 219 // lets very racy bucket conflicts clobber each other. 220 for (const entry of bucket) { 221 const content = contentPath(cache, entry.integrity) 222 try { 223 await stat(content) 224 await index.insert(cache, entry.key, entry.integrity, { 225 metadata: entry.metadata, 226 size: entry.size, 227 time: entry.time, 228 }) 229 stats.totalEntries++ 230 } catch (err) { 231 if (err.code === 'ENOENT') { 232 stats.rejectedEntries++ 233 stats.missingContent++ 234 } else { 235 throw err 236 } 237 } 238 } 239} 240 241function cleanTmp (cache, opts) { 242 opts.log.silly('verify', 'cleaning tmp directory') 243 return rm(path.join(cache, 'tmp'), { recursive: true, force: true }) 244} 245 246async function writeVerifile (cache, opts) { 247 const verifile = path.join(cache, '_lastverified') 248 opts.log.silly('verify', 'writing verifile to ' + verifile) 249 return writeFile(verifile, `${Date.now()}`) 250} 251 252module.exports.lastRun = lastRun 253 254async function lastRun (cache) { 255 const data = await readFile(path.join(cache, '_lastverified'), { encoding: 'utf8' }) 256 return new Date(+data) 257} 258