17c2aad20Sopenharmony_ci// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 27c2aad20Sopenharmony_ci/* 37c2aad20Sopenharmony_ci * Routines for dealing with .zip archives. 47c2aad20Sopenharmony_ci * 57c2aad20Sopenharmony_ci * Copyright (c) Meta Platforms, Inc. and affiliates. 67c2aad20Sopenharmony_ci */ 77c2aad20Sopenharmony_ci 87c2aad20Sopenharmony_ci#include <errno.h> 97c2aad20Sopenharmony_ci#include <fcntl.h> 107c2aad20Sopenharmony_ci#include <stdint.h> 117c2aad20Sopenharmony_ci#include <stdlib.h> 127c2aad20Sopenharmony_ci#include <string.h> 137c2aad20Sopenharmony_ci#include <sys/mman.h> 147c2aad20Sopenharmony_ci#include <unistd.h> 157c2aad20Sopenharmony_ci 167c2aad20Sopenharmony_ci#include "libbpf_internal.h" 177c2aad20Sopenharmony_ci#include "zip.h" 187c2aad20Sopenharmony_ci 197c2aad20Sopenharmony_ci#pragma GCC diagnostic push 207c2aad20Sopenharmony_ci#pragma GCC diagnostic ignored "-Wpacked" 217c2aad20Sopenharmony_ci#pragma GCC diagnostic ignored "-Wattributes" 227c2aad20Sopenharmony_ci 237c2aad20Sopenharmony_ci/* Specification of ZIP file format can be found here: 247c2aad20Sopenharmony_ci * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT 257c2aad20Sopenharmony_ci * For a high level overview of the structure of a ZIP file see 267c2aad20Sopenharmony_ci * sections 4.3.1 - 4.3.6. 277c2aad20Sopenharmony_ci * 287c2aad20Sopenharmony_ci * Data structures appearing in ZIP files do not contain any 297c2aad20Sopenharmony_ci * padding and they might be misaligned. To allow us to safely 307c2aad20Sopenharmony_ci * operate on pointers to such structures and their members, we 317c2aad20Sopenharmony_ci * declare the types as packed. 327c2aad20Sopenharmony_ci */ 337c2aad20Sopenharmony_ci 347c2aad20Sopenharmony_ci#define END_OF_CD_RECORD_MAGIC 0x06054b50 357c2aad20Sopenharmony_ci 367c2aad20Sopenharmony_ci/* See section 4.3.16 of the spec. */ 377c2aad20Sopenharmony_cistruct end_of_cd_record { 387c2aad20Sopenharmony_ci /* Magic value equal to END_OF_CD_RECORD_MAGIC */ 397c2aad20Sopenharmony_ci __u32 magic; 407c2aad20Sopenharmony_ci 417c2aad20Sopenharmony_ci /* Number of the file containing this structure or 0xFFFF if ZIP64 archive. 427c2aad20Sopenharmony_ci * Zip archive might span multiple files (disks). 437c2aad20Sopenharmony_ci */ 447c2aad20Sopenharmony_ci __u16 this_disk; 457c2aad20Sopenharmony_ci 467c2aad20Sopenharmony_ci /* Number of the file containing the beginning of the central directory or 477c2aad20Sopenharmony_ci * 0xFFFF if ZIP64 archive. 487c2aad20Sopenharmony_ci */ 497c2aad20Sopenharmony_ci __u16 cd_disk; 507c2aad20Sopenharmony_ci 517c2aad20Sopenharmony_ci /* Number of central directory records on this disk or 0xFFFF if ZIP64 527c2aad20Sopenharmony_ci * archive. 537c2aad20Sopenharmony_ci */ 547c2aad20Sopenharmony_ci __u16 cd_records; 557c2aad20Sopenharmony_ci 567c2aad20Sopenharmony_ci /* Number of central directory records on all disks or 0xFFFF if ZIP64 577c2aad20Sopenharmony_ci * archive. 587c2aad20Sopenharmony_ci */ 597c2aad20Sopenharmony_ci __u16 cd_records_total; 607c2aad20Sopenharmony_ci 617c2aad20Sopenharmony_ci /* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */ 627c2aad20Sopenharmony_ci __u32 cd_size; 637c2aad20Sopenharmony_ci 647c2aad20Sopenharmony_ci /* Offset of the central directory from the beginning of the archive or 657c2aad20Sopenharmony_ci * 0xFFFFFFFF if ZIP64 archive. 667c2aad20Sopenharmony_ci */ 677c2aad20Sopenharmony_ci __u32 cd_offset; 687c2aad20Sopenharmony_ci 697c2aad20Sopenharmony_ci /* Length of comment data following end of central directory record. */ 707c2aad20Sopenharmony_ci __u16 comment_length; 717c2aad20Sopenharmony_ci 727c2aad20Sopenharmony_ci /* Up to 64k of arbitrary bytes. */ 737c2aad20Sopenharmony_ci /* uint8_t comment[comment_length] */ 747c2aad20Sopenharmony_ci} __attribute__((packed)); 757c2aad20Sopenharmony_ci 767c2aad20Sopenharmony_ci#define CD_FILE_HEADER_MAGIC 0x02014b50 777c2aad20Sopenharmony_ci#define FLAG_ENCRYPTED (1 << 0) 787c2aad20Sopenharmony_ci#define FLAG_HAS_DATA_DESCRIPTOR (1 << 3) 797c2aad20Sopenharmony_ci 807c2aad20Sopenharmony_ci/* See section 4.3.12 of the spec. */ 817c2aad20Sopenharmony_cistruct cd_file_header { 827c2aad20Sopenharmony_ci /* Magic value equal to CD_FILE_HEADER_MAGIC. */ 837c2aad20Sopenharmony_ci __u32 magic; 847c2aad20Sopenharmony_ci __u16 version; 857c2aad20Sopenharmony_ci /* Minimum zip version needed to extract the file. */ 867c2aad20Sopenharmony_ci __u16 min_version; 877c2aad20Sopenharmony_ci __u16 flags; 887c2aad20Sopenharmony_ci __u16 compression; 897c2aad20Sopenharmony_ci __u16 last_modified_time; 907c2aad20Sopenharmony_ci __u16 last_modified_date; 917c2aad20Sopenharmony_ci __u32 crc; 927c2aad20Sopenharmony_ci __u32 compressed_size; 937c2aad20Sopenharmony_ci __u32 uncompressed_size; 947c2aad20Sopenharmony_ci __u16 file_name_length; 957c2aad20Sopenharmony_ci __u16 extra_field_length; 967c2aad20Sopenharmony_ci __u16 file_comment_length; 977c2aad20Sopenharmony_ci /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */ 987c2aad20Sopenharmony_ci __u16 disk; 997c2aad20Sopenharmony_ci __u16 internal_attributes; 1007c2aad20Sopenharmony_ci __u32 external_attributes; 1017c2aad20Sopenharmony_ci /* Offset from the start of the disk containing the local file header to the 1027c2aad20Sopenharmony_ci * start of the local file header. 1037c2aad20Sopenharmony_ci */ 1047c2aad20Sopenharmony_ci __u32 offset; 1057c2aad20Sopenharmony_ci} __attribute__((packed)); 1067c2aad20Sopenharmony_ci 1077c2aad20Sopenharmony_ci#define LOCAL_FILE_HEADER_MAGIC 0x04034b50 1087c2aad20Sopenharmony_ci 1097c2aad20Sopenharmony_ci/* See section 4.3.7 of the spec. */ 1107c2aad20Sopenharmony_cistruct local_file_header { 1117c2aad20Sopenharmony_ci /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */ 1127c2aad20Sopenharmony_ci __u32 magic; 1137c2aad20Sopenharmony_ci /* Minimum zip version needed to extract the file. */ 1147c2aad20Sopenharmony_ci __u16 min_version; 1157c2aad20Sopenharmony_ci __u16 flags; 1167c2aad20Sopenharmony_ci __u16 compression; 1177c2aad20Sopenharmony_ci __u16 last_modified_time; 1187c2aad20Sopenharmony_ci __u16 last_modified_date; 1197c2aad20Sopenharmony_ci __u32 crc; 1207c2aad20Sopenharmony_ci __u32 compressed_size; 1217c2aad20Sopenharmony_ci __u32 uncompressed_size; 1227c2aad20Sopenharmony_ci __u16 file_name_length; 1237c2aad20Sopenharmony_ci __u16 extra_field_length; 1247c2aad20Sopenharmony_ci} __attribute__((packed)); 1257c2aad20Sopenharmony_ci 1267c2aad20Sopenharmony_ci#pragma GCC diagnostic pop 1277c2aad20Sopenharmony_ci 1287c2aad20Sopenharmony_cistruct zip_archive { 1297c2aad20Sopenharmony_ci void *data; 1307c2aad20Sopenharmony_ci __u32 size; 1317c2aad20Sopenharmony_ci __u32 cd_offset; 1327c2aad20Sopenharmony_ci __u32 cd_records; 1337c2aad20Sopenharmony_ci}; 1347c2aad20Sopenharmony_ci 1357c2aad20Sopenharmony_cistatic void *check_access(struct zip_archive *archive, __u32 offset, __u32 size) 1367c2aad20Sopenharmony_ci{ 1377c2aad20Sopenharmony_ci if (offset + size > archive->size || offset > offset + size) 1387c2aad20Sopenharmony_ci return NULL; 1397c2aad20Sopenharmony_ci 1407c2aad20Sopenharmony_ci return archive->data + offset; 1417c2aad20Sopenharmony_ci} 1427c2aad20Sopenharmony_ci 1437c2aad20Sopenharmony_ci/* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the 1447c2aad20Sopenharmony_ci * archive uses features which are not supported. 1457c2aad20Sopenharmony_ci */ 1467c2aad20Sopenharmony_cistatic int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset) 1477c2aad20Sopenharmony_ci{ 1487c2aad20Sopenharmony_ci __u16 comment_length, cd_records; 1497c2aad20Sopenharmony_ci struct end_of_cd_record *eocd; 1507c2aad20Sopenharmony_ci __u32 cd_offset, cd_size; 1517c2aad20Sopenharmony_ci 1527c2aad20Sopenharmony_ci eocd = check_access(archive, offset, sizeof(*eocd)); 1537c2aad20Sopenharmony_ci if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC) 1547c2aad20Sopenharmony_ci return -EINVAL; 1557c2aad20Sopenharmony_ci 1567c2aad20Sopenharmony_ci comment_length = eocd->comment_length; 1577c2aad20Sopenharmony_ci if (offset + sizeof(*eocd) + comment_length != archive->size) 1587c2aad20Sopenharmony_ci return -EINVAL; 1597c2aad20Sopenharmony_ci 1607c2aad20Sopenharmony_ci cd_records = eocd->cd_records; 1617c2aad20Sopenharmony_ci if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records) 1627c2aad20Sopenharmony_ci /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */ 1637c2aad20Sopenharmony_ci return -ENOTSUP; 1647c2aad20Sopenharmony_ci 1657c2aad20Sopenharmony_ci cd_offset = eocd->cd_offset; 1667c2aad20Sopenharmony_ci cd_size = eocd->cd_size; 1677c2aad20Sopenharmony_ci if (!check_access(archive, cd_offset, cd_size)) 1687c2aad20Sopenharmony_ci return -EINVAL; 1697c2aad20Sopenharmony_ci 1707c2aad20Sopenharmony_ci archive->cd_offset = cd_offset; 1717c2aad20Sopenharmony_ci archive->cd_records = cd_records; 1727c2aad20Sopenharmony_ci return 0; 1737c2aad20Sopenharmony_ci} 1747c2aad20Sopenharmony_ci 1757c2aad20Sopenharmony_cistatic int find_cd(struct zip_archive *archive) 1767c2aad20Sopenharmony_ci{ 1777c2aad20Sopenharmony_ci int64_t limit, offset; 1787c2aad20Sopenharmony_ci int rc = -EINVAL; 1797c2aad20Sopenharmony_ci 1807c2aad20Sopenharmony_ci if (archive->size <= sizeof(struct end_of_cd_record)) 1817c2aad20Sopenharmony_ci return -EINVAL; 1827c2aad20Sopenharmony_ci 1837c2aad20Sopenharmony_ci /* Because the end of central directory ends with a variable length array of 1847c2aad20Sopenharmony_ci * up to 0xFFFF bytes we can't know exactly where it starts and need to 1857c2aad20Sopenharmony_ci * search for it at the end of the file, scanning the (limit, offset] range. 1867c2aad20Sopenharmony_ci */ 1877c2aad20Sopenharmony_ci offset = archive->size - sizeof(struct end_of_cd_record); 1887c2aad20Sopenharmony_ci limit = (int64_t)offset - (1 << 16); 1897c2aad20Sopenharmony_ci 1907c2aad20Sopenharmony_ci for (; offset >= 0 && offset > limit && rc != 0; offset--) { 1917c2aad20Sopenharmony_ci rc = try_parse_end_of_cd(archive, offset); 1927c2aad20Sopenharmony_ci if (rc == -ENOTSUP) 1937c2aad20Sopenharmony_ci break; 1947c2aad20Sopenharmony_ci } 1957c2aad20Sopenharmony_ci return rc; 1967c2aad20Sopenharmony_ci} 1977c2aad20Sopenharmony_ci 1987c2aad20Sopenharmony_cistruct zip_archive *zip_archive_open(const char *path) 1997c2aad20Sopenharmony_ci{ 2007c2aad20Sopenharmony_ci struct zip_archive *archive; 2017c2aad20Sopenharmony_ci int err, fd; 2027c2aad20Sopenharmony_ci off_t size; 2037c2aad20Sopenharmony_ci void *data; 2047c2aad20Sopenharmony_ci 2057c2aad20Sopenharmony_ci fd = open(path, O_RDONLY | O_CLOEXEC); 2067c2aad20Sopenharmony_ci if (fd < 0) 2077c2aad20Sopenharmony_ci return ERR_PTR(-errno); 2087c2aad20Sopenharmony_ci 2097c2aad20Sopenharmony_ci size = lseek(fd, 0, SEEK_END); 2107c2aad20Sopenharmony_ci if (size == (off_t)-1 || size > UINT32_MAX) { 2117c2aad20Sopenharmony_ci close(fd); 2127c2aad20Sopenharmony_ci return ERR_PTR(-EINVAL); 2137c2aad20Sopenharmony_ci } 2147c2aad20Sopenharmony_ci 2157c2aad20Sopenharmony_ci data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); 2167c2aad20Sopenharmony_ci err = -errno; 2177c2aad20Sopenharmony_ci close(fd); 2187c2aad20Sopenharmony_ci 2197c2aad20Sopenharmony_ci if (data == MAP_FAILED) 2207c2aad20Sopenharmony_ci return ERR_PTR(err); 2217c2aad20Sopenharmony_ci 2227c2aad20Sopenharmony_ci archive = malloc(sizeof(*archive)); 2237c2aad20Sopenharmony_ci if (!archive) { 2247c2aad20Sopenharmony_ci munmap(data, size); 2257c2aad20Sopenharmony_ci return ERR_PTR(-ENOMEM); 2267c2aad20Sopenharmony_ci }; 2277c2aad20Sopenharmony_ci 2287c2aad20Sopenharmony_ci archive->data = data; 2297c2aad20Sopenharmony_ci archive->size = size; 2307c2aad20Sopenharmony_ci 2317c2aad20Sopenharmony_ci err = find_cd(archive); 2327c2aad20Sopenharmony_ci if (err) { 2337c2aad20Sopenharmony_ci munmap(data, size); 2347c2aad20Sopenharmony_ci free(archive); 2357c2aad20Sopenharmony_ci return ERR_PTR(err); 2367c2aad20Sopenharmony_ci } 2377c2aad20Sopenharmony_ci 2387c2aad20Sopenharmony_ci return archive; 2397c2aad20Sopenharmony_ci} 2407c2aad20Sopenharmony_ci 2417c2aad20Sopenharmony_civoid zip_archive_close(struct zip_archive *archive) 2427c2aad20Sopenharmony_ci{ 2437c2aad20Sopenharmony_ci munmap(archive->data, archive->size); 2447c2aad20Sopenharmony_ci free(archive); 2457c2aad20Sopenharmony_ci} 2467c2aad20Sopenharmony_ci 2477c2aad20Sopenharmony_cistatic struct local_file_header *local_file_header_at_offset(struct zip_archive *archive, 2487c2aad20Sopenharmony_ci __u32 offset) 2497c2aad20Sopenharmony_ci{ 2507c2aad20Sopenharmony_ci struct local_file_header *lfh; 2517c2aad20Sopenharmony_ci 2527c2aad20Sopenharmony_ci lfh = check_access(archive, offset, sizeof(*lfh)); 2537c2aad20Sopenharmony_ci if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC) 2547c2aad20Sopenharmony_ci return NULL; 2557c2aad20Sopenharmony_ci 2567c2aad20Sopenharmony_ci return lfh; 2577c2aad20Sopenharmony_ci} 2587c2aad20Sopenharmony_ci 2597c2aad20Sopenharmony_cistatic int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out) 2607c2aad20Sopenharmony_ci{ 2617c2aad20Sopenharmony_ci struct local_file_header *lfh; 2627c2aad20Sopenharmony_ci __u32 compressed_size; 2637c2aad20Sopenharmony_ci const char *name; 2647c2aad20Sopenharmony_ci void *data; 2657c2aad20Sopenharmony_ci 2667c2aad20Sopenharmony_ci lfh = local_file_header_at_offset(archive, offset); 2677c2aad20Sopenharmony_ci if (!lfh) 2687c2aad20Sopenharmony_ci return -EINVAL; 2697c2aad20Sopenharmony_ci 2707c2aad20Sopenharmony_ci offset += sizeof(*lfh); 2717c2aad20Sopenharmony_ci if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR)) 2727c2aad20Sopenharmony_ci return -EINVAL; 2737c2aad20Sopenharmony_ci 2747c2aad20Sopenharmony_ci name = check_access(archive, offset, lfh->file_name_length); 2757c2aad20Sopenharmony_ci if (!name) 2767c2aad20Sopenharmony_ci return -EINVAL; 2777c2aad20Sopenharmony_ci 2787c2aad20Sopenharmony_ci offset += lfh->file_name_length; 2797c2aad20Sopenharmony_ci if (!check_access(archive, offset, lfh->extra_field_length)) 2807c2aad20Sopenharmony_ci return -EINVAL; 2817c2aad20Sopenharmony_ci 2827c2aad20Sopenharmony_ci offset += lfh->extra_field_length; 2837c2aad20Sopenharmony_ci compressed_size = lfh->compressed_size; 2847c2aad20Sopenharmony_ci data = check_access(archive, offset, compressed_size); 2857c2aad20Sopenharmony_ci if (!data) 2867c2aad20Sopenharmony_ci return -EINVAL; 2877c2aad20Sopenharmony_ci 2887c2aad20Sopenharmony_ci out->compression = lfh->compression; 2897c2aad20Sopenharmony_ci out->name_length = lfh->file_name_length; 2907c2aad20Sopenharmony_ci out->name = name; 2917c2aad20Sopenharmony_ci out->data = data; 2927c2aad20Sopenharmony_ci out->data_length = compressed_size; 2937c2aad20Sopenharmony_ci out->data_offset = offset; 2947c2aad20Sopenharmony_ci 2957c2aad20Sopenharmony_ci return 0; 2967c2aad20Sopenharmony_ci} 2977c2aad20Sopenharmony_ci 2987c2aad20Sopenharmony_ciint zip_archive_find_entry(struct zip_archive *archive, const char *file_name, 2997c2aad20Sopenharmony_ci struct zip_entry *out) 3007c2aad20Sopenharmony_ci{ 3017c2aad20Sopenharmony_ci size_t file_name_length = strlen(file_name); 3027c2aad20Sopenharmony_ci __u32 i, offset = archive->cd_offset; 3037c2aad20Sopenharmony_ci 3047c2aad20Sopenharmony_ci for (i = 0; i < archive->cd_records; ++i) { 3057c2aad20Sopenharmony_ci __u16 cdfh_name_length, cdfh_flags; 3067c2aad20Sopenharmony_ci struct cd_file_header *cdfh; 3077c2aad20Sopenharmony_ci const char *cdfh_name; 3087c2aad20Sopenharmony_ci 3097c2aad20Sopenharmony_ci cdfh = check_access(archive, offset, sizeof(*cdfh)); 3107c2aad20Sopenharmony_ci if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC) 3117c2aad20Sopenharmony_ci return -EINVAL; 3127c2aad20Sopenharmony_ci 3137c2aad20Sopenharmony_ci offset += sizeof(*cdfh); 3147c2aad20Sopenharmony_ci cdfh_name_length = cdfh->file_name_length; 3157c2aad20Sopenharmony_ci cdfh_name = check_access(archive, offset, cdfh_name_length); 3167c2aad20Sopenharmony_ci if (!cdfh_name) 3177c2aad20Sopenharmony_ci return -EINVAL; 3187c2aad20Sopenharmony_ci 3197c2aad20Sopenharmony_ci cdfh_flags = cdfh->flags; 3207c2aad20Sopenharmony_ci if ((cdfh_flags & FLAG_ENCRYPTED) == 0 && 3217c2aad20Sopenharmony_ci (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 && 3227c2aad20Sopenharmony_ci file_name_length == cdfh_name_length && 3237c2aad20Sopenharmony_ci memcmp(file_name, archive->data + offset, file_name_length) == 0) { 3247c2aad20Sopenharmony_ci return get_entry_at_offset(archive, cdfh->offset, out); 3257c2aad20Sopenharmony_ci } 3267c2aad20Sopenharmony_ci 3277c2aad20Sopenharmony_ci offset += cdfh_name_length; 3287c2aad20Sopenharmony_ci offset += cdfh->extra_field_length; 3297c2aad20Sopenharmony_ci offset += cdfh->file_comment_length; 3307c2aad20Sopenharmony_ci } 3317c2aad20Sopenharmony_ci 3327c2aad20Sopenharmony_ci return -ENOENT; 3337c2aad20Sopenharmony_ci} 334