1'use strict'
2
3const {
4  mkdir,
5  readFile,
6  rm,
7  stat,
8  truncate,
9  writeFile,
10} = require('fs/promises')
11const pMap = require('p-map')
12const contentPath = require('./content/path')
13const fsm = require('fs-minipass')
14const glob = require('./util/glob.js')
15const index = require('./entry-index')
16const path = require('path')
17const ssri = require('ssri')
18
19const hasOwnProperty = (obj, key) =>
20  Object.prototype.hasOwnProperty.call(obj, key)
21
22const verifyOpts = (opts) => ({
23  concurrency: 20,
24  log: { silly () {} },
25  ...opts,
26})
27
28module.exports = verify
29
30async function verify (cache, opts) {
31  opts = verifyOpts(opts)
32  opts.log.silly('verify', 'verifying cache at', cache)
33
34  const steps = [
35    markStartTime,
36    fixPerms,
37    garbageCollect,
38    rebuildIndex,
39    cleanTmp,
40    writeVerifile,
41    markEndTime,
42  ]
43
44  const stats = {}
45  for (const step of steps) {
46    const label = step.name
47    const start = new Date()
48    const s = await step(cache, opts)
49    if (s) {
50      Object.keys(s).forEach((k) => {
51        stats[k] = s[k]
52      })
53    }
54    const end = new Date()
55    if (!stats.runTime) {
56      stats.runTime = {}
57    }
58    stats.runTime[label] = end - start
59  }
60  stats.runTime.total = stats.endTime - stats.startTime
61  opts.log.silly(
62    'verify',
63    'verification finished for',
64    cache,
65    'in',
66    `${stats.runTime.total}ms`
67  )
68  return stats
69}
70
71async function markStartTime (cache, opts) {
72  return { startTime: new Date() }
73}
74
75async function markEndTime (cache, opts) {
76  return { endTime: new Date() }
77}
78
79async function fixPerms (cache, opts) {
80  opts.log.silly('verify', 'fixing cache permissions')
81  await mkdir(cache, { recursive: true })
82  return null
83}
84
85// Implements a naive mark-and-sweep tracing garbage collector.
86//
87// The algorithm is basically as follows:
88// 1. Read (and filter) all index entries ("pointers")
89// 2. Mark each integrity value as "live"
90// 3. Read entire filesystem tree in `content-vX/` dir
91// 4. If content is live, verify its checksum and delete it if it fails
92// 5. If content is not marked as live, rm it.
93//
94async function garbageCollect (cache, opts) {
95  opts.log.silly('verify', 'garbage collecting content')
96  const indexStream = index.lsStream(cache)
97  const liveContent = new Set()
98  indexStream.on('data', (entry) => {
99    if (opts.filter && !opts.filter(entry)) {
100      return
101    }
102
103    // integrity is stringified, re-parse it so we can get each hash
104    const integrity = ssri.parse(entry.integrity)
105    for (const algo in integrity) {
106      liveContent.add(integrity[algo].toString())
107    }
108  })
109  await new Promise((resolve, reject) => {
110    indexStream.on('end', resolve).on('error', reject)
111  })
112  const contentDir = contentPath.contentDir(cache)
113  const files = await glob(path.join(contentDir, '**'), {
114    follow: false,
115    nodir: true,
116    nosort: true,
117  })
118  const stats = {
119    verifiedContent: 0,
120    reclaimedCount: 0,
121    reclaimedSize: 0,
122    badContentCount: 0,
123    keptSize: 0,
124  }
125  await pMap(
126    files,
127    async (f) => {
128      const split = f.split(/[/\\]/)
129      const digest = split.slice(split.length - 3).join('')
130      const algo = split[split.length - 4]
131      const integrity = ssri.fromHex(digest, algo)
132      if (liveContent.has(integrity.toString())) {
133        const info = await verifyContent(f, integrity)
134        if (!info.valid) {
135          stats.reclaimedCount++
136          stats.badContentCount++
137          stats.reclaimedSize += info.size
138        } else {
139          stats.verifiedContent++
140          stats.keptSize += info.size
141        }
142      } else {
143        // No entries refer to this content. We can delete.
144        stats.reclaimedCount++
145        const s = await stat(f)
146        await rm(f, { recursive: true, force: true })
147        stats.reclaimedSize += s.size
148      }
149      return stats
150    },
151    { concurrency: opts.concurrency }
152  )
153  return stats
154}
155
156async function verifyContent (filepath, sri) {
157  const contentInfo = {}
158  try {
159    const { size } = await stat(filepath)
160    contentInfo.size = size
161    contentInfo.valid = true
162    await ssri.checkStream(new fsm.ReadStream(filepath), sri)
163  } catch (err) {
164    if (err.code === 'ENOENT') {
165      return { size: 0, valid: false }
166    }
167    if (err.code !== 'EINTEGRITY') {
168      throw err
169    }
170
171    await rm(filepath, { recursive: true, force: true })
172    contentInfo.valid = false
173  }
174  return contentInfo
175}
176
177async function rebuildIndex (cache, opts) {
178  opts.log.silly('verify', 'rebuilding index')
179  const entries = await index.ls(cache)
180  const stats = {
181    missingContent: 0,
182    rejectedEntries: 0,
183    totalEntries: 0,
184  }
185  const buckets = {}
186  for (const k in entries) {
187    /* istanbul ignore else */
188    if (hasOwnProperty(entries, k)) {
189      const hashed = index.hashKey(k)
190      const entry = entries[k]
191      const excluded = opts.filter && !opts.filter(entry)
192      excluded && stats.rejectedEntries++
193      if (buckets[hashed] && !excluded) {
194        buckets[hashed].push(entry)
195      } else if (buckets[hashed] && excluded) {
196        // skip
197      } else if (excluded) {
198        buckets[hashed] = []
199        buckets[hashed]._path = index.bucketPath(cache, k)
200      } else {
201        buckets[hashed] = [entry]
202        buckets[hashed]._path = index.bucketPath(cache, k)
203      }
204    }
205  }
206  await pMap(
207    Object.keys(buckets),
208    (key) => {
209      return rebuildBucket(cache, buckets[key], stats, opts)
210    },
211    { concurrency: opts.concurrency }
212  )
213  return stats
214}
215
216async function rebuildBucket (cache, bucket, stats, opts) {
217  await truncate(bucket._path)
218  // This needs to be serialized because cacache explicitly
219  // lets very racy bucket conflicts clobber each other.
220  for (const entry of bucket) {
221    const content = contentPath(cache, entry.integrity)
222    try {
223      await stat(content)
224      await index.insert(cache, entry.key, entry.integrity, {
225        metadata: entry.metadata,
226        size: entry.size,
227        time: entry.time,
228      })
229      stats.totalEntries++
230    } catch (err) {
231      if (err.code === 'ENOENT') {
232        stats.rejectedEntries++
233        stats.missingContent++
234      } else {
235        throw err
236      }
237    }
238  }
239}
240
241function cleanTmp (cache, opts) {
242  opts.log.silly('verify', 'cleaning tmp directory')
243  return rm(path.join(cache, 'tmp'), { recursive: true, force: true })
244}
245
246async function writeVerifile (cache, opts) {
247  const verifile = path.join(cache, '_lastverified')
248  opts.log.silly('verify', 'writing verifile to ' + verifile)
249  return writeFile(verifile, `${Date.now()}`)
250}
251
252module.exports.lastRun = lastRun
253
254async function lastRun (cache) {
255  const data = await readFile(path.join(cache, '_lastverified'), { encoding: 'utf8' })
256  return new Date(+data)
257}
258