1/*
2 * libwebsockets - small server side websockets and web server implementation
3 *
4 * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25#if !defined(_GNU_SOURCE)
26#define _GNU_SOURCE
27#endif
28#include <pthread.h>
29
30#include <libwebsockets.h>
31#include "private-lib-core.h"
32
33#include <string.h>
34#include <stdio.h>
35#include <unistd.h>
36#include <fcntl.h>
37#include <dirent.h>
38#include <time.h>
39#include <errno.h>
40#include <stdarg.h>
41
42#include <sys/stat.h>
43#include <sys/time.h>
44#include <sys/types.h>
45
46#if defined(__APPLE__)
47#include <sys/dirent.h>
48/* Travis OSX does not have DT_REG... */
49#if !defined(DT_REG)
50#define DT_REG 8
51#endif
52#endif
53
54struct file_entry {
55	lws_list_ptr sorted;
56	lws_list_ptr prev;
57	char name[64];
58	time_t modified;
59	size_t size;
60};
61
62struct lws_diskcache_scan {
63	struct file_entry *batch;
64	const char *cache_dir_base;
65	lws_list_ptr head;
66	time_t last_scan_completed;
67	uint64_t agg_size;
68	uint64_t cache_size_limit;
69	uint64_t avg_size;
70	uint64_t cache_tries;
71	uint64_t cache_hits;
72	int cache_subdir;
73	int batch_in_use;
74	int agg_file_count;
75	int secs_waiting;
76};
77
78#define KIB (1024)
79#define MIB (KIB * KIB)
80
81#define lp_to_fe(p, _n) lws_list_ptr_container(p, struct file_entry, _n)
82
83static const char *hex = "0123456789abcdef";
84
85#define BATCH_COUNT 128
86
87static int
88fe_modified_sort(lws_list_ptr a, lws_list_ptr b)
89{
90	struct file_entry *p1 = lp_to_fe(a, sorted), *p2 = lp_to_fe(b, sorted);
91
92	return (int)((long)p2->modified - (long)p1->modified);
93}
94
95struct lws_diskcache_scan *
96lws_diskcache_create(const char *cache_dir_base, uint64_t cache_size_limit)
97{
98	struct lws_diskcache_scan *lds = lws_malloc(sizeof(*lds), "cachescan");
99
100	if (!lds)
101		return NULL;
102
103	memset(lds, 0, sizeof(*lds));
104
105	lds->cache_dir_base = cache_dir_base;
106	lds->cache_size_limit = cache_size_limit;
107
108	return lds;
109}
110
111void
112lws_diskcache_destroy(struct lws_diskcache_scan **lds)
113{
114	if ((*lds)->batch)
115		lws_free((*lds)->batch);
116	lws_free(*lds);
117	*lds = NULL;
118}
119
120int
121lws_diskcache_prepare(const char *cache_base_dir, int mode, uid_t uid)
122{
123	char dir[256];
124	int n, m;
125
126	(void)mkdir(cache_base_dir, (unsigned short)mode);
127	if (chown(cache_base_dir, uid, (gid_t)-1))
128		lwsl_err("%s: %s: unable to chown %d\n", __func__,
129			 cache_base_dir, uid);
130
131	for (n = 0; n < 16; n++) {
132		lws_snprintf(dir, sizeof(dir), "%s/%c", cache_base_dir, hex[n]);
133		(void)mkdir(dir, (mode_t)mode);
134		if (chown(dir, uid, (uid_t)-1))
135			lwsl_err("%s: %s: unable to chown %d\n", __func__,
136						 dir, uid);
137		for (m = 0; m < 16; m++) {
138			lws_snprintf(dir, sizeof(dir), "%s/%c/%c",
139				     cache_base_dir, hex[n], hex[m]);
140			(void)mkdir(dir, (mode_t)mode);
141			if (chown(dir, uid, (uid_t)-1))
142				lwsl_err("%s: %s: unable to chown %d\n",
143					 __func__, dir, uid);
144		}
145	}
146
147	return 0;
148}
149
150/* copies and then truncates the incoming name, and renames the file at the
151 * untruncated path to have the new truncated name */
152
153int
154lws_diskcache_finalize_name(char *cache)
155{
156	char ren[256], *p;
157
158	strncpy(ren, cache, sizeof(ren) - 1);
159	ren[sizeof(ren) - 1] = '\0';
160	p = strchr(cache, '~');
161	if (p) {
162		*p = '\0';
163		if (rename(ren, cache)) {
164			lwsl_err("%s: problem renaming %s to %s\n", __func__,
165				 ren, cache);
166			return 1;
167		}
168
169		return 0;
170	}
171
172	return 1;
173}
174
175int
176lws_diskcache_query(struct lws_diskcache_scan *lds, int is_bot,
177		    const char *hash_hex, int *_fd, char *cache, int cache_len,
178		    size_t *extant_cache_len)
179{
180	struct stat s;
181	int n;
182
183	/* caching is disabled? */
184	if (!lds->cache_dir_base)
185		return LWS_DISKCACHE_QUERY_NO_CACHE;
186
187	if (!is_bot)
188		lds->cache_tries++;
189
190	n = lws_snprintf(cache, (size_t)cache_len, "%s/%c/%c/%s", lds->cache_dir_base,
191			 hash_hex[0], hash_hex[1], hash_hex);
192
193	lwsl_info("%s: job cache %s\n", __func__, cache);
194
195	*_fd = open(cache, O_RDONLY);
196	if (*_fd >= 0) {
197		int fd;
198
199		if (!is_bot)
200			lds->cache_hits++;
201
202		if (fstat(*_fd, &s)) {
203			close(*_fd);
204
205			return LWS_DISKCACHE_QUERY_NO_CACHE;
206		}
207
208		*extant_cache_len = (size_t)s.st_size;
209
210		/* "touch" the hit cache file so it's last for LRU now */
211		fd = open(cache, O_RDWR);
212		if (fd >= 0)
213			close(fd);
214
215		return LWS_DISKCACHE_QUERY_EXISTS;
216	}
217
218	/* bots are too random to pollute the cache with their antics */
219	if (is_bot)
220		return LWS_DISKCACHE_QUERY_NO_CACHE;
221
222	/* let's create it first with a unique temp name */
223
224	lws_snprintf(cache + n, (size_t)cache_len - (unsigned int)n, "~%d-%p", (int)getpid(),
225		     extant_cache_len);
226
227	*_fd = open(cache, O_RDWR | O_CREAT | O_TRUNC, 0600);
228	if (*_fd < 0) {
229		/* well... ok... we will proceed without cache then... */
230		lwsl_notice("%s: Problem creating cache %s: errno %d\n",
231			    __func__, cache, errno);
232		return LWS_DISKCACHE_QUERY_NO_CACHE;
233	}
234
235	return LWS_DISKCACHE_QUERY_CREATING;
236}
237
238int
239lws_diskcache_secs_to_idle(struct lws_diskcache_scan *lds)
240{
241	return lds->secs_waiting;
242}
243
244/*
245 * The goal is to collect the oldest BATCH_COUNT filepaths and filesizes from
246 * the dirs under the cache dir.  Since we don't need or want a full list of
247 * files in there in memory at once, we restrict the linked-list size to
248 * BATCH_COUNT entries, and once it is full, simply ignore any further files
249 * that are newer than the newest one on that list.  Files older than the
250 * newest guy already on the list evict the newest guy already on the list
251 * and are sorted into the correct order.  In this way no matter the number
252 * of files to be processed the memory requirement is fixed at BATCH_COUNT
253 * struct file_entry-s.
254 *
255 * The oldest subset of BATCH_COUNT files are sorted into the cd->batch
256 * allocation in more recent -> least recent order.
257 *
258 * We want to track the total size of all files we saw as well, so we know if
259 * we need to actually do anything yet to restrict how much space it's taking
260 * up.
261 *
262 * And we want to do those things statefully and incrementally instead of one
263 * big atomic operation, since the user may want a huge cache, so we look in
264 * one cache dir at a time and track state in the repodir struct.
265 *
266 * When we have seen everything, we add the doubly-linked prev pointers and then
267 * if we are over the limit, start deleting up to BATCH_COUNT files working back
268 * from the end.
269 */
270
271int
272lws_diskcache_trim(struct lws_diskcache_scan *lds)
273{
274	size_t cache_size_limit = (size_t)lds->cache_size_limit;
275	char dirpath[132], filepath[132 + 32];
276	lws_list_ptr lp, op = NULL;
277	int files_trimmed = 0;
278	struct file_entry *p;
279	int fd, n, ret = -1;
280	size_t trimmed = 0;
281	struct dirent *de;
282	struct stat s;
283	DIR *dir;
284
285	if (!lds->cache_subdir) {
286
287		if (lds->last_scan_completed + lds->secs_waiting > time(NULL))
288			return 0;
289
290		lds->batch = lws_malloc(sizeof(struct file_entry) *
291				BATCH_COUNT, "cache_trim");
292		if (!lds->batch) {
293			lwsl_err("%s: OOM\n", __func__);
294
295			return 1;
296		}
297		lds->agg_size = 0;
298		lds->head = NULL;
299		lds->batch_in_use = 0;
300		lds->agg_file_count = 0;
301	}
302
303	lws_snprintf(dirpath, sizeof(dirpath), "%s/%c/%c",
304		     lds->cache_dir_base, hex[(lds->cache_subdir >> 4) & 15],
305		     hex[lds->cache_subdir & 15]);
306
307	dir = opendir(dirpath);
308	if (!dir) {
309		lwsl_err("Unable to walk repo dir '%s'\n",
310			 lds->cache_dir_base);
311		return -1;
312	}
313
314	do {
315		de = readdir(dir);
316		if (!de)
317			break;
318
319		if (de->d_type != DT_REG)
320			continue;
321
322		lds->agg_file_count++;
323
324		lws_snprintf(filepath, sizeof(filepath), "%s/%s", dirpath,
325			     de->d_name);
326
327		fd = open(filepath, O_RDONLY);
328		if (fd < 0) {
329			lwsl_err("%s: cannot open %s\n", __func__, filepath);
330
331			continue;
332		}
333
334		n = fstat(fd, &s);
335		close(fd);
336		if (n) {
337			lwsl_notice("%s: cannot stat %s\n", __func__, filepath);
338			continue;
339		}
340
341		lds->agg_size += (uint64_t)s.st_size;
342
343		if (lds->batch_in_use == BATCH_COUNT) {
344			/*
345			 * once we filled up the batch with candidates, we don't
346			 * need to consider any files newer than the newest guy
347			 * on the list...
348			 */
349			if (lp_to_fe(lds->head, sorted)->modified < s.st_mtime)
350				continue;
351
352			/*
353			 * ... and if we find an older file later, we know it
354			 * will be replacing the newest guy on the list, so use
355			 * that directly...
356			 */
357			p = lds->head;
358			lds->head = p->sorted;
359		} else
360			/* we are still accepting anything to fill the batch */
361
362			p = &lds->batch[lds->batch_in_use++];
363
364		p->sorted = NULL;
365		strncpy(p->name, de->d_name, sizeof(p->name) - 1);
366		p->name[sizeof(p->name) - 1] = '\0';
367		p->modified = s.st_mtime;
368		p->size = (size_t)s.st_size;
369
370		lws_list_ptr_insert(&lds->head, &p->sorted, fe_modified_sort);
371	} while (de);
372
373	ret = 0;
374
375	lds->cache_subdir++;
376	if (lds->cache_subdir != 0x100)
377		goto done;
378
379	/* we completed the whole scan... */
380
381	/* if really no guidence, then 256MiB */
382	if (!cache_size_limit)
383		cache_size_limit = 256 * 1024 * 1024;
384
385	if (lds->agg_size > cache_size_limit) {
386
387		/* apply prev pointers to make the list doubly-linked */
388
389		lp = lds->head;
390		while (lp) {
391			p = lp_to_fe(lp, sorted);
392
393			p->prev = op;
394			op = &p->prev;
395			lp = p->sorted;
396		}
397
398		/*
399		 * reverse the list (start from tail, now traverse using
400		 * .prev)... it's oldest-first now...
401		 */
402
403		lp = op;
404
405		while (lp && lds->agg_size > cache_size_limit) {
406			p = lp_to_fe(lp, prev);
407
408			lws_snprintf(filepath, sizeof(filepath), "%s/%c/%c/%s",
409				     lds->cache_dir_base, p->name[0],
410				     p->name[1], p->name);
411
412			if (!unlink(filepath)) {
413				lds->agg_size -= p->size;
414				trimmed += p->size;
415				files_trimmed++;
416			} else
417				lwsl_notice("%s: Failed to unlink %s\n",
418					    __func__, filepath);
419
420			lp = p->prev;
421		}
422
423		if (files_trimmed)
424			lwsl_notice("%s: %s: trimmed %d files totalling "
425				    "%lldKib, leaving %lldMiB\n", __func__,
426				    lds->cache_dir_base, files_trimmed,
427				    ((unsigned long long)trimmed) / KIB,
428				    ((unsigned long long)lds->agg_size) / MIB);
429	}
430
431	if (lds->agg_size && lds->agg_file_count)
432		lds->avg_size = lds->agg_size / (uint64_t)lds->agg_file_count;
433
434	/*
435	 * estimate how long we can go before scanning again... default we need
436	 * to start again immediately
437	 */
438
439	lds->last_scan_completed = time(NULL);
440	lds->secs_waiting = 1;
441
442	if (lds->agg_size < cache_size_limit) {
443		uint64_t avg = 4096, capacity, projected;
444
445		/* let's use 80% of the real average for margin */
446		if (lds->agg_size && lds->agg_file_count)
447			avg = ((lds->agg_size * 8) / (uint64_t)lds->agg_file_count) / 10;
448
449		/*
450		 * if we collected BATCH_COUNT files of the average size,
451		 * how much can we clean up in 256s?
452		 */
453
454		capacity = avg * BATCH_COUNT;
455
456		/*
457		 * if the cache grew by 10%, would we hit the limit even then?
458		 */
459		projected = (lds->agg_size * 11) / 10;
460		if (projected < cache_size_limit)
461			/* no... */
462			lds->secs_waiting  = (int)((256 / 2) * ((cache_size_limit -
463						    projected) / capacity));
464
465		/*
466		 * large waits imply we may not have enough info yet, so
467		 * check once an hour at least.
468		 */
469
470		if (lds->secs_waiting > 3600)
471			lds->secs_waiting = 3600;
472	} else
473		lds->secs_waiting = 0;
474
475	lwsl_info("%s: cache %s: %lldKiB / %lldKiB, next scan %ds\n", __func__,
476		  lds->cache_dir_base,
477		  (unsigned long long)lds->agg_size / KIB,
478		  (unsigned long long)cache_size_limit / KIB,
479		  lds->secs_waiting);
480
481	lws_free(lds->batch);
482	lds->batch = NULL;
483
484	lds->cache_subdir = 0;
485
486done:
487	closedir(dir);
488
489	return ret;
490}
491