1/* 2 * libwebsockets - small server side websockets and web server implementation 3 * 4 * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25#if !defined(_GNU_SOURCE) 26#define _GNU_SOURCE 27#endif 28#include <pthread.h> 29 30#include <libwebsockets.h> 31#include "private-lib-core.h" 32 33#include <string.h> 34#include <stdio.h> 35#include <unistd.h> 36#include <fcntl.h> 37#include <dirent.h> 38#include <time.h> 39#include <errno.h> 40#include <stdarg.h> 41 42#include <sys/stat.h> 43#include <sys/time.h> 44#include <sys/types.h> 45 46#if defined(__APPLE__) 47#include <sys/dirent.h> 48/* Travis OSX does not have DT_REG... */ 49#if !defined(DT_REG) 50#define DT_REG 8 51#endif 52#endif 53 54struct file_entry { 55 lws_list_ptr sorted; 56 lws_list_ptr prev; 57 char name[64]; 58 time_t modified; 59 size_t size; 60}; 61 62struct lws_diskcache_scan { 63 struct file_entry *batch; 64 const char *cache_dir_base; 65 lws_list_ptr head; 66 time_t last_scan_completed; 67 uint64_t agg_size; 68 uint64_t cache_size_limit; 69 uint64_t avg_size; 70 uint64_t cache_tries; 71 uint64_t cache_hits; 72 int cache_subdir; 73 int batch_in_use; 74 int agg_file_count; 75 int secs_waiting; 76}; 77 78#define KIB (1024) 79#define MIB (KIB * KIB) 80 81#define lp_to_fe(p, _n) lws_list_ptr_container(p, struct file_entry, _n) 82 83static const char *hex = "0123456789abcdef"; 84 85#define BATCH_COUNT 128 86 87static int 88fe_modified_sort(lws_list_ptr a, lws_list_ptr b) 89{ 90 struct file_entry *p1 = lp_to_fe(a, sorted), *p2 = lp_to_fe(b, sorted); 91 92 return (int)((long)p2->modified - (long)p1->modified); 93} 94 95struct lws_diskcache_scan * 96lws_diskcache_create(const char *cache_dir_base, uint64_t cache_size_limit) 97{ 98 struct lws_diskcache_scan *lds = lws_malloc(sizeof(*lds), "cachescan"); 99 100 if (!lds) 101 return NULL; 102 103 memset(lds, 0, sizeof(*lds)); 104 105 lds->cache_dir_base = cache_dir_base; 106 lds->cache_size_limit = cache_size_limit; 107 108 return lds; 109} 110 111void 112lws_diskcache_destroy(struct lws_diskcache_scan **lds) 113{ 114 if ((*lds)->batch) 115 lws_free((*lds)->batch); 116 lws_free(*lds); 117 *lds = NULL; 118} 119 120int 121lws_diskcache_prepare(const char *cache_base_dir, int mode, uid_t uid) 122{ 123 char dir[256]; 124 int n, m; 125 126 (void)mkdir(cache_base_dir, (unsigned short)mode); 127 if (chown(cache_base_dir, uid, (gid_t)-1)) 128 lwsl_err("%s: %s: unable to chown %d\n", __func__, 129 cache_base_dir, uid); 130 131 for (n = 0; n < 16; n++) { 132 lws_snprintf(dir, sizeof(dir), "%s/%c", cache_base_dir, hex[n]); 133 (void)mkdir(dir, (mode_t)mode); 134 if (chown(dir, uid, (uid_t)-1)) 135 lwsl_err("%s: %s: unable to chown %d\n", __func__, 136 dir, uid); 137 for (m = 0; m < 16; m++) { 138 lws_snprintf(dir, sizeof(dir), "%s/%c/%c", 139 cache_base_dir, hex[n], hex[m]); 140 (void)mkdir(dir, (mode_t)mode); 141 if (chown(dir, uid, (uid_t)-1)) 142 lwsl_err("%s: %s: unable to chown %d\n", 143 __func__, dir, uid); 144 } 145 } 146 147 return 0; 148} 149 150/* copies and then truncates the incoming name, and renames the file at the 151 * untruncated path to have the new truncated name */ 152 153int 154lws_diskcache_finalize_name(char *cache) 155{ 156 char ren[256], *p; 157 158 strncpy(ren, cache, sizeof(ren) - 1); 159 ren[sizeof(ren) - 1] = '\0'; 160 p = strchr(cache, '~'); 161 if (p) { 162 *p = '\0'; 163 if (rename(ren, cache)) { 164 lwsl_err("%s: problem renaming %s to %s\n", __func__, 165 ren, cache); 166 return 1; 167 } 168 169 return 0; 170 } 171 172 return 1; 173} 174 175int 176lws_diskcache_query(struct lws_diskcache_scan *lds, int is_bot, 177 const char *hash_hex, int *_fd, char *cache, int cache_len, 178 size_t *extant_cache_len) 179{ 180 struct stat s; 181 int n; 182 183 /* caching is disabled? */ 184 if (!lds->cache_dir_base) 185 return LWS_DISKCACHE_QUERY_NO_CACHE; 186 187 if (!is_bot) 188 lds->cache_tries++; 189 190 n = lws_snprintf(cache, (size_t)cache_len, "%s/%c/%c/%s", lds->cache_dir_base, 191 hash_hex[0], hash_hex[1], hash_hex); 192 193 lwsl_info("%s: job cache %s\n", __func__, cache); 194 195 *_fd = open(cache, O_RDONLY); 196 if (*_fd >= 0) { 197 int fd; 198 199 if (!is_bot) 200 lds->cache_hits++; 201 202 if (fstat(*_fd, &s)) { 203 close(*_fd); 204 205 return LWS_DISKCACHE_QUERY_NO_CACHE; 206 } 207 208 *extant_cache_len = (size_t)s.st_size; 209 210 /* "touch" the hit cache file so it's last for LRU now */ 211 fd = open(cache, O_RDWR); 212 if (fd >= 0) 213 close(fd); 214 215 return LWS_DISKCACHE_QUERY_EXISTS; 216 } 217 218 /* bots are too random to pollute the cache with their antics */ 219 if (is_bot) 220 return LWS_DISKCACHE_QUERY_NO_CACHE; 221 222 /* let's create it first with a unique temp name */ 223 224 lws_snprintf(cache + n, (size_t)cache_len - (unsigned int)n, "~%d-%p", (int)getpid(), 225 extant_cache_len); 226 227 *_fd = open(cache, O_RDWR | O_CREAT | O_TRUNC, 0600); 228 if (*_fd < 0) { 229 /* well... ok... we will proceed without cache then... */ 230 lwsl_notice("%s: Problem creating cache %s: errno %d\n", 231 __func__, cache, errno); 232 return LWS_DISKCACHE_QUERY_NO_CACHE; 233 } 234 235 return LWS_DISKCACHE_QUERY_CREATING; 236} 237 238int 239lws_diskcache_secs_to_idle(struct lws_diskcache_scan *lds) 240{ 241 return lds->secs_waiting; 242} 243 244/* 245 * The goal is to collect the oldest BATCH_COUNT filepaths and filesizes from 246 * the dirs under the cache dir. Since we don't need or want a full list of 247 * files in there in memory at once, we restrict the linked-list size to 248 * BATCH_COUNT entries, and once it is full, simply ignore any further files 249 * that are newer than the newest one on that list. Files older than the 250 * newest guy already on the list evict the newest guy already on the list 251 * and are sorted into the correct order. In this way no matter the number 252 * of files to be processed the memory requirement is fixed at BATCH_COUNT 253 * struct file_entry-s. 254 * 255 * The oldest subset of BATCH_COUNT files are sorted into the cd->batch 256 * allocation in more recent -> least recent order. 257 * 258 * We want to track the total size of all files we saw as well, so we know if 259 * we need to actually do anything yet to restrict how much space it's taking 260 * up. 261 * 262 * And we want to do those things statefully and incrementally instead of one 263 * big atomic operation, since the user may want a huge cache, so we look in 264 * one cache dir at a time and track state in the repodir struct. 265 * 266 * When we have seen everything, we add the doubly-linked prev pointers and then 267 * if we are over the limit, start deleting up to BATCH_COUNT files working back 268 * from the end. 269 */ 270 271int 272lws_diskcache_trim(struct lws_diskcache_scan *lds) 273{ 274 size_t cache_size_limit = (size_t)lds->cache_size_limit; 275 char dirpath[132], filepath[132 + 32]; 276 lws_list_ptr lp, op = NULL; 277 int files_trimmed = 0; 278 struct file_entry *p; 279 int fd, n, ret = -1; 280 size_t trimmed = 0; 281 struct dirent *de; 282 struct stat s; 283 DIR *dir; 284 285 if (!lds->cache_subdir) { 286 287 if (lds->last_scan_completed + lds->secs_waiting > time(NULL)) 288 return 0; 289 290 lds->batch = lws_malloc(sizeof(struct file_entry) * 291 BATCH_COUNT, "cache_trim"); 292 if (!lds->batch) { 293 lwsl_err("%s: OOM\n", __func__); 294 295 return 1; 296 } 297 lds->agg_size = 0; 298 lds->head = NULL; 299 lds->batch_in_use = 0; 300 lds->agg_file_count = 0; 301 } 302 303 lws_snprintf(dirpath, sizeof(dirpath), "%s/%c/%c", 304 lds->cache_dir_base, hex[(lds->cache_subdir >> 4) & 15], 305 hex[lds->cache_subdir & 15]); 306 307 dir = opendir(dirpath); 308 if (!dir) { 309 lwsl_err("Unable to walk repo dir '%s'\n", 310 lds->cache_dir_base); 311 return -1; 312 } 313 314 do { 315 de = readdir(dir); 316 if (!de) 317 break; 318 319 if (de->d_type != DT_REG) 320 continue; 321 322 lds->agg_file_count++; 323 324 lws_snprintf(filepath, sizeof(filepath), "%s/%s", dirpath, 325 de->d_name); 326 327 fd = open(filepath, O_RDONLY); 328 if (fd < 0) { 329 lwsl_err("%s: cannot open %s\n", __func__, filepath); 330 331 continue; 332 } 333 334 n = fstat(fd, &s); 335 close(fd); 336 if (n) { 337 lwsl_notice("%s: cannot stat %s\n", __func__, filepath); 338 continue; 339 } 340 341 lds->agg_size += (uint64_t)s.st_size; 342 343 if (lds->batch_in_use == BATCH_COUNT) { 344 /* 345 * once we filled up the batch with candidates, we don't 346 * need to consider any files newer than the newest guy 347 * on the list... 348 */ 349 if (lp_to_fe(lds->head, sorted)->modified < s.st_mtime) 350 continue; 351 352 /* 353 * ... and if we find an older file later, we know it 354 * will be replacing the newest guy on the list, so use 355 * that directly... 356 */ 357 p = lds->head; 358 lds->head = p->sorted; 359 } else 360 /* we are still accepting anything to fill the batch */ 361 362 p = &lds->batch[lds->batch_in_use++]; 363 364 p->sorted = NULL; 365 strncpy(p->name, de->d_name, sizeof(p->name) - 1); 366 p->name[sizeof(p->name) - 1] = '\0'; 367 p->modified = s.st_mtime; 368 p->size = (size_t)s.st_size; 369 370 lws_list_ptr_insert(&lds->head, &p->sorted, fe_modified_sort); 371 } while (de); 372 373 ret = 0; 374 375 lds->cache_subdir++; 376 if (lds->cache_subdir != 0x100) 377 goto done; 378 379 /* we completed the whole scan... */ 380 381 /* if really no guidence, then 256MiB */ 382 if (!cache_size_limit) 383 cache_size_limit = 256 * 1024 * 1024; 384 385 if (lds->agg_size > cache_size_limit) { 386 387 /* apply prev pointers to make the list doubly-linked */ 388 389 lp = lds->head; 390 while (lp) { 391 p = lp_to_fe(lp, sorted); 392 393 p->prev = op; 394 op = &p->prev; 395 lp = p->sorted; 396 } 397 398 /* 399 * reverse the list (start from tail, now traverse using 400 * .prev)... it's oldest-first now... 401 */ 402 403 lp = op; 404 405 while (lp && lds->agg_size > cache_size_limit) { 406 p = lp_to_fe(lp, prev); 407 408 lws_snprintf(filepath, sizeof(filepath), "%s/%c/%c/%s", 409 lds->cache_dir_base, p->name[0], 410 p->name[1], p->name); 411 412 if (!unlink(filepath)) { 413 lds->agg_size -= p->size; 414 trimmed += p->size; 415 files_trimmed++; 416 } else 417 lwsl_notice("%s: Failed to unlink %s\n", 418 __func__, filepath); 419 420 lp = p->prev; 421 } 422 423 if (files_trimmed) 424 lwsl_notice("%s: %s: trimmed %d files totalling " 425 "%lldKib, leaving %lldMiB\n", __func__, 426 lds->cache_dir_base, files_trimmed, 427 ((unsigned long long)trimmed) / KIB, 428 ((unsigned long long)lds->agg_size) / MIB); 429 } 430 431 if (lds->agg_size && lds->agg_file_count) 432 lds->avg_size = lds->agg_size / (uint64_t)lds->agg_file_count; 433 434 /* 435 * estimate how long we can go before scanning again... default we need 436 * to start again immediately 437 */ 438 439 lds->last_scan_completed = time(NULL); 440 lds->secs_waiting = 1; 441 442 if (lds->agg_size < cache_size_limit) { 443 uint64_t avg = 4096, capacity, projected; 444 445 /* let's use 80% of the real average for margin */ 446 if (lds->agg_size && lds->agg_file_count) 447 avg = ((lds->agg_size * 8) / (uint64_t)lds->agg_file_count) / 10; 448 449 /* 450 * if we collected BATCH_COUNT files of the average size, 451 * how much can we clean up in 256s? 452 */ 453 454 capacity = avg * BATCH_COUNT; 455 456 /* 457 * if the cache grew by 10%, would we hit the limit even then? 458 */ 459 projected = (lds->agg_size * 11) / 10; 460 if (projected < cache_size_limit) 461 /* no... */ 462 lds->secs_waiting = (int)((256 / 2) * ((cache_size_limit - 463 projected) / capacity)); 464 465 /* 466 * large waits imply we may not have enough info yet, so 467 * check once an hour at least. 468 */ 469 470 if (lds->secs_waiting > 3600) 471 lds->secs_waiting = 3600; 472 } else 473 lds->secs_waiting = 0; 474 475 lwsl_info("%s: cache %s: %lldKiB / %lldKiB, next scan %ds\n", __func__, 476 lds->cache_dir_base, 477 (unsigned long long)lds->agg_size / KIB, 478 (unsigned long long)cache_size_limit / KIB, 479 lds->secs_waiting); 480 481 lws_free(lds->batch); 482 lds->batch = NULL; 483 484 lds->cache_subdir = 0; 485 486done: 487 closedir(dir); 488 489 return ret; 490} 491