17c2aad20Sopenharmony_ci// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
27c2aad20Sopenharmony_ci/*
37c2aad20Sopenharmony_ci * Routines for dealing with .zip archives.
47c2aad20Sopenharmony_ci *
57c2aad20Sopenharmony_ci * Copyright (c) Meta Platforms, Inc. and affiliates.
67c2aad20Sopenharmony_ci */
77c2aad20Sopenharmony_ci
87c2aad20Sopenharmony_ci#include <errno.h>
97c2aad20Sopenharmony_ci#include <fcntl.h>
107c2aad20Sopenharmony_ci#include <stdint.h>
117c2aad20Sopenharmony_ci#include <stdlib.h>
127c2aad20Sopenharmony_ci#include <string.h>
137c2aad20Sopenharmony_ci#include <sys/mman.h>
147c2aad20Sopenharmony_ci#include <unistd.h>
157c2aad20Sopenharmony_ci
167c2aad20Sopenharmony_ci#include "libbpf_internal.h"
177c2aad20Sopenharmony_ci#include "zip.h"
187c2aad20Sopenharmony_ci
197c2aad20Sopenharmony_ci#pragma GCC diagnostic push
207c2aad20Sopenharmony_ci#pragma GCC diagnostic ignored "-Wpacked"
217c2aad20Sopenharmony_ci#pragma GCC diagnostic ignored "-Wattributes"
227c2aad20Sopenharmony_ci
237c2aad20Sopenharmony_ci/* Specification of ZIP file format can be found here:
247c2aad20Sopenharmony_ci * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
257c2aad20Sopenharmony_ci * For a high level overview of the structure of a ZIP file see
267c2aad20Sopenharmony_ci * sections 4.3.1 - 4.3.6.
277c2aad20Sopenharmony_ci *
287c2aad20Sopenharmony_ci * Data structures appearing in ZIP files do not contain any
297c2aad20Sopenharmony_ci * padding and they might be misaligned. To allow us to safely
307c2aad20Sopenharmony_ci * operate on pointers to such structures and their members, we
317c2aad20Sopenharmony_ci * declare the types as packed.
327c2aad20Sopenharmony_ci */
337c2aad20Sopenharmony_ci
347c2aad20Sopenharmony_ci#define END_OF_CD_RECORD_MAGIC 0x06054b50
357c2aad20Sopenharmony_ci
367c2aad20Sopenharmony_ci/* See section 4.3.16 of the spec. */
377c2aad20Sopenharmony_cistruct end_of_cd_record {
387c2aad20Sopenharmony_ci	/* Magic value equal to END_OF_CD_RECORD_MAGIC */
397c2aad20Sopenharmony_ci	__u32 magic;
407c2aad20Sopenharmony_ci
417c2aad20Sopenharmony_ci	/* Number of the file containing this structure or 0xFFFF if ZIP64 archive.
427c2aad20Sopenharmony_ci	 * Zip archive might span multiple files (disks).
437c2aad20Sopenharmony_ci	 */
447c2aad20Sopenharmony_ci	__u16 this_disk;
457c2aad20Sopenharmony_ci
467c2aad20Sopenharmony_ci	/* Number of the file containing the beginning of the central directory or
477c2aad20Sopenharmony_ci	 * 0xFFFF if ZIP64 archive.
487c2aad20Sopenharmony_ci	 */
497c2aad20Sopenharmony_ci	__u16 cd_disk;
507c2aad20Sopenharmony_ci
517c2aad20Sopenharmony_ci	/* Number of central directory records on this disk or 0xFFFF if ZIP64
527c2aad20Sopenharmony_ci	 * archive.
537c2aad20Sopenharmony_ci	 */
547c2aad20Sopenharmony_ci	__u16 cd_records;
557c2aad20Sopenharmony_ci
567c2aad20Sopenharmony_ci	/* Number of central directory records on all disks or 0xFFFF if ZIP64
577c2aad20Sopenharmony_ci	 * archive.
587c2aad20Sopenharmony_ci	 */
597c2aad20Sopenharmony_ci	__u16 cd_records_total;
607c2aad20Sopenharmony_ci
617c2aad20Sopenharmony_ci	/* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */
627c2aad20Sopenharmony_ci	__u32 cd_size;
637c2aad20Sopenharmony_ci
647c2aad20Sopenharmony_ci	/* Offset of the central directory from the beginning of the archive or
657c2aad20Sopenharmony_ci	 * 0xFFFFFFFF if ZIP64 archive.
667c2aad20Sopenharmony_ci	 */
677c2aad20Sopenharmony_ci	__u32 cd_offset;
687c2aad20Sopenharmony_ci
697c2aad20Sopenharmony_ci	/* Length of comment data following end of central directory record. */
707c2aad20Sopenharmony_ci	__u16 comment_length;
717c2aad20Sopenharmony_ci
727c2aad20Sopenharmony_ci	/* Up to 64k of arbitrary bytes. */
737c2aad20Sopenharmony_ci	/* uint8_t comment[comment_length] */
747c2aad20Sopenharmony_ci} __attribute__((packed));
757c2aad20Sopenharmony_ci
767c2aad20Sopenharmony_ci#define CD_FILE_HEADER_MAGIC 0x02014b50
777c2aad20Sopenharmony_ci#define FLAG_ENCRYPTED (1 << 0)
787c2aad20Sopenharmony_ci#define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
797c2aad20Sopenharmony_ci
807c2aad20Sopenharmony_ci/* See section 4.3.12 of the spec. */
817c2aad20Sopenharmony_cistruct cd_file_header {
827c2aad20Sopenharmony_ci	/* Magic value equal to CD_FILE_HEADER_MAGIC. */
837c2aad20Sopenharmony_ci	__u32 magic;
847c2aad20Sopenharmony_ci	__u16 version;
857c2aad20Sopenharmony_ci	/* Minimum zip version needed to extract the file. */
867c2aad20Sopenharmony_ci	__u16 min_version;
877c2aad20Sopenharmony_ci	__u16 flags;
887c2aad20Sopenharmony_ci	__u16 compression;
897c2aad20Sopenharmony_ci	__u16 last_modified_time;
907c2aad20Sopenharmony_ci	__u16 last_modified_date;
917c2aad20Sopenharmony_ci	__u32 crc;
927c2aad20Sopenharmony_ci	__u32 compressed_size;
937c2aad20Sopenharmony_ci	__u32 uncompressed_size;
947c2aad20Sopenharmony_ci	__u16 file_name_length;
957c2aad20Sopenharmony_ci	__u16 extra_field_length;
967c2aad20Sopenharmony_ci	__u16 file_comment_length;
977c2aad20Sopenharmony_ci	/* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */
987c2aad20Sopenharmony_ci	__u16 disk;
997c2aad20Sopenharmony_ci	__u16 internal_attributes;
1007c2aad20Sopenharmony_ci	__u32 external_attributes;
1017c2aad20Sopenharmony_ci	/* Offset from the start of the disk containing the local file header to the
1027c2aad20Sopenharmony_ci	 * start of the local file header.
1037c2aad20Sopenharmony_ci	 */
1047c2aad20Sopenharmony_ci	__u32 offset;
1057c2aad20Sopenharmony_ci} __attribute__((packed));
1067c2aad20Sopenharmony_ci
1077c2aad20Sopenharmony_ci#define LOCAL_FILE_HEADER_MAGIC 0x04034b50
1087c2aad20Sopenharmony_ci
1097c2aad20Sopenharmony_ci/* See section 4.3.7 of the spec. */
1107c2aad20Sopenharmony_cistruct local_file_header {
1117c2aad20Sopenharmony_ci	/* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */
1127c2aad20Sopenharmony_ci	__u32 magic;
1137c2aad20Sopenharmony_ci	/* Minimum zip version needed to extract the file. */
1147c2aad20Sopenharmony_ci	__u16 min_version;
1157c2aad20Sopenharmony_ci	__u16 flags;
1167c2aad20Sopenharmony_ci	__u16 compression;
1177c2aad20Sopenharmony_ci	__u16 last_modified_time;
1187c2aad20Sopenharmony_ci	__u16 last_modified_date;
1197c2aad20Sopenharmony_ci	__u32 crc;
1207c2aad20Sopenharmony_ci	__u32 compressed_size;
1217c2aad20Sopenharmony_ci	__u32 uncompressed_size;
1227c2aad20Sopenharmony_ci	__u16 file_name_length;
1237c2aad20Sopenharmony_ci	__u16 extra_field_length;
1247c2aad20Sopenharmony_ci} __attribute__((packed));
1257c2aad20Sopenharmony_ci
1267c2aad20Sopenharmony_ci#pragma GCC diagnostic pop
1277c2aad20Sopenharmony_ci
1287c2aad20Sopenharmony_cistruct zip_archive {
1297c2aad20Sopenharmony_ci	void *data;
1307c2aad20Sopenharmony_ci	__u32 size;
1317c2aad20Sopenharmony_ci	__u32 cd_offset;
1327c2aad20Sopenharmony_ci	__u32 cd_records;
1337c2aad20Sopenharmony_ci};
1347c2aad20Sopenharmony_ci
1357c2aad20Sopenharmony_cistatic void *check_access(struct zip_archive *archive, __u32 offset, __u32 size)
1367c2aad20Sopenharmony_ci{
1377c2aad20Sopenharmony_ci	if (offset + size > archive->size || offset > offset + size)
1387c2aad20Sopenharmony_ci		return NULL;
1397c2aad20Sopenharmony_ci
1407c2aad20Sopenharmony_ci	return archive->data + offset;
1417c2aad20Sopenharmony_ci}
1427c2aad20Sopenharmony_ci
1437c2aad20Sopenharmony_ci/* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the
1447c2aad20Sopenharmony_ci * archive uses features which are not supported.
1457c2aad20Sopenharmony_ci */
1467c2aad20Sopenharmony_cistatic int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset)
1477c2aad20Sopenharmony_ci{
1487c2aad20Sopenharmony_ci	__u16 comment_length, cd_records;
1497c2aad20Sopenharmony_ci	struct end_of_cd_record *eocd;
1507c2aad20Sopenharmony_ci	__u32 cd_offset, cd_size;
1517c2aad20Sopenharmony_ci
1527c2aad20Sopenharmony_ci	eocd = check_access(archive, offset, sizeof(*eocd));
1537c2aad20Sopenharmony_ci	if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC)
1547c2aad20Sopenharmony_ci		return -EINVAL;
1557c2aad20Sopenharmony_ci
1567c2aad20Sopenharmony_ci	comment_length = eocd->comment_length;
1577c2aad20Sopenharmony_ci	if (offset + sizeof(*eocd) + comment_length != archive->size)
1587c2aad20Sopenharmony_ci		return -EINVAL;
1597c2aad20Sopenharmony_ci
1607c2aad20Sopenharmony_ci	cd_records = eocd->cd_records;
1617c2aad20Sopenharmony_ci	if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records)
1627c2aad20Sopenharmony_ci		/* This is a valid eocd, but we only support single-file non-ZIP64 archives. */
1637c2aad20Sopenharmony_ci		return -ENOTSUP;
1647c2aad20Sopenharmony_ci
1657c2aad20Sopenharmony_ci	cd_offset = eocd->cd_offset;
1667c2aad20Sopenharmony_ci	cd_size = eocd->cd_size;
1677c2aad20Sopenharmony_ci	if (!check_access(archive, cd_offset, cd_size))
1687c2aad20Sopenharmony_ci		return -EINVAL;
1697c2aad20Sopenharmony_ci
1707c2aad20Sopenharmony_ci	archive->cd_offset = cd_offset;
1717c2aad20Sopenharmony_ci	archive->cd_records = cd_records;
1727c2aad20Sopenharmony_ci	return 0;
1737c2aad20Sopenharmony_ci}
1747c2aad20Sopenharmony_ci
1757c2aad20Sopenharmony_cistatic int find_cd(struct zip_archive *archive)
1767c2aad20Sopenharmony_ci{
1777c2aad20Sopenharmony_ci	int64_t limit, offset;
1787c2aad20Sopenharmony_ci	int rc = -EINVAL;
1797c2aad20Sopenharmony_ci
1807c2aad20Sopenharmony_ci	if (archive->size <= sizeof(struct end_of_cd_record))
1817c2aad20Sopenharmony_ci		return -EINVAL;
1827c2aad20Sopenharmony_ci
1837c2aad20Sopenharmony_ci	/* Because the end of central directory ends with a variable length array of
1847c2aad20Sopenharmony_ci	 * up to 0xFFFF bytes we can't know exactly where it starts and need to
1857c2aad20Sopenharmony_ci	 * search for it at the end of the file, scanning the (limit, offset] range.
1867c2aad20Sopenharmony_ci	 */
1877c2aad20Sopenharmony_ci	offset = archive->size - sizeof(struct end_of_cd_record);
1887c2aad20Sopenharmony_ci	limit = (int64_t)offset - (1 << 16);
1897c2aad20Sopenharmony_ci
1907c2aad20Sopenharmony_ci	for (; offset >= 0 && offset > limit && rc != 0; offset--) {
1917c2aad20Sopenharmony_ci		rc = try_parse_end_of_cd(archive, offset);
1927c2aad20Sopenharmony_ci		if (rc == -ENOTSUP)
1937c2aad20Sopenharmony_ci			break;
1947c2aad20Sopenharmony_ci	}
1957c2aad20Sopenharmony_ci	return rc;
1967c2aad20Sopenharmony_ci}
1977c2aad20Sopenharmony_ci
1987c2aad20Sopenharmony_cistruct zip_archive *zip_archive_open(const char *path)
1997c2aad20Sopenharmony_ci{
2007c2aad20Sopenharmony_ci	struct zip_archive *archive;
2017c2aad20Sopenharmony_ci	int err, fd;
2027c2aad20Sopenharmony_ci	off_t size;
2037c2aad20Sopenharmony_ci	void *data;
2047c2aad20Sopenharmony_ci
2057c2aad20Sopenharmony_ci	fd = open(path, O_RDONLY | O_CLOEXEC);
2067c2aad20Sopenharmony_ci	if (fd < 0)
2077c2aad20Sopenharmony_ci		return ERR_PTR(-errno);
2087c2aad20Sopenharmony_ci
2097c2aad20Sopenharmony_ci	size = lseek(fd, 0, SEEK_END);
2107c2aad20Sopenharmony_ci	if (size == (off_t)-1 || size > UINT32_MAX) {
2117c2aad20Sopenharmony_ci		close(fd);
2127c2aad20Sopenharmony_ci		return ERR_PTR(-EINVAL);
2137c2aad20Sopenharmony_ci	}
2147c2aad20Sopenharmony_ci
2157c2aad20Sopenharmony_ci	data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
2167c2aad20Sopenharmony_ci	err = -errno;
2177c2aad20Sopenharmony_ci	close(fd);
2187c2aad20Sopenharmony_ci
2197c2aad20Sopenharmony_ci	if (data == MAP_FAILED)
2207c2aad20Sopenharmony_ci		return ERR_PTR(err);
2217c2aad20Sopenharmony_ci
2227c2aad20Sopenharmony_ci	archive = malloc(sizeof(*archive));
2237c2aad20Sopenharmony_ci	if (!archive) {
2247c2aad20Sopenharmony_ci		munmap(data, size);
2257c2aad20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
2267c2aad20Sopenharmony_ci	};
2277c2aad20Sopenharmony_ci
2287c2aad20Sopenharmony_ci	archive->data = data;
2297c2aad20Sopenharmony_ci	archive->size = size;
2307c2aad20Sopenharmony_ci
2317c2aad20Sopenharmony_ci	err = find_cd(archive);
2327c2aad20Sopenharmony_ci	if (err) {
2337c2aad20Sopenharmony_ci		munmap(data, size);
2347c2aad20Sopenharmony_ci		free(archive);
2357c2aad20Sopenharmony_ci		return ERR_PTR(err);
2367c2aad20Sopenharmony_ci	}
2377c2aad20Sopenharmony_ci
2387c2aad20Sopenharmony_ci	return archive;
2397c2aad20Sopenharmony_ci}
2407c2aad20Sopenharmony_ci
2417c2aad20Sopenharmony_civoid zip_archive_close(struct zip_archive *archive)
2427c2aad20Sopenharmony_ci{
2437c2aad20Sopenharmony_ci	munmap(archive->data, archive->size);
2447c2aad20Sopenharmony_ci	free(archive);
2457c2aad20Sopenharmony_ci}
2467c2aad20Sopenharmony_ci
2477c2aad20Sopenharmony_cistatic struct local_file_header *local_file_header_at_offset(struct zip_archive *archive,
2487c2aad20Sopenharmony_ci							     __u32 offset)
2497c2aad20Sopenharmony_ci{
2507c2aad20Sopenharmony_ci	struct local_file_header *lfh;
2517c2aad20Sopenharmony_ci
2527c2aad20Sopenharmony_ci	lfh = check_access(archive, offset, sizeof(*lfh));
2537c2aad20Sopenharmony_ci	if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC)
2547c2aad20Sopenharmony_ci		return NULL;
2557c2aad20Sopenharmony_ci
2567c2aad20Sopenharmony_ci	return lfh;
2577c2aad20Sopenharmony_ci}
2587c2aad20Sopenharmony_ci
2597c2aad20Sopenharmony_cistatic int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out)
2607c2aad20Sopenharmony_ci{
2617c2aad20Sopenharmony_ci	struct local_file_header *lfh;
2627c2aad20Sopenharmony_ci	__u32 compressed_size;
2637c2aad20Sopenharmony_ci	const char *name;
2647c2aad20Sopenharmony_ci	void *data;
2657c2aad20Sopenharmony_ci
2667c2aad20Sopenharmony_ci	lfh = local_file_header_at_offset(archive, offset);
2677c2aad20Sopenharmony_ci	if (!lfh)
2687c2aad20Sopenharmony_ci		return -EINVAL;
2697c2aad20Sopenharmony_ci
2707c2aad20Sopenharmony_ci	offset += sizeof(*lfh);
2717c2aad20Sopenharmony_ci	if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR))
2727c2aad20Sopenharmony_ci		return -EINVAL;
2737c2aad20Sopenharmony_ci
2747c2aad20Sopenharmony_ci	name = check_access(archive, offset, lfh->file_name_length);
2757c2aad20Sopenharmony_ci	if (!name)
2767c2aad20Sopenharmony_ci		return -EINVAL;
2777c2aad20Sopenharmony_ci
2787c2aad20Sopenharmony_ci	offset += lfh->file_name_length;
2797c2aad20Sopenharmony_ci	if (!check_access(archive, offset, lfh->extra_field_length))
2807c2aad20Sopenharmony_ci		return -EINVAL;
2817c2aad20Sopenharmony_ci
2827c2aad20Sopenharmony_ci	offset += lfh->extra_field_length;
2837c2aad20Sopenharmony_ci	compressed_size = lfh->compressed_size;
2847c2aad20Sopenharmony_ci	data = check_access(archive, offset, compressed_size);
2857c2aad20Sopenharmony_ci	if (!data)
2867c2aad20Sopenharmony_ci		return -EINVAL;
2877c2aad20Sopenharmony_ci
2887c2aad20Sopenharmony_ci	out->compression = lfh->compression;
2897c2aad20Sopenharmony_ci	out->name_length = lfh->file_name_length;
2907c2aad20Sopenharmony_ci	out->name = name;
2917c2aad20Sopenharmony_ci	out->data = data;
2927c2aad20Sopenharmony_ci	out->data_length = compressed_size;
2937c2aad20Sopenharmony_ci	out->data_offset = offset;
2947c2aad20Sopenharmony_ci
2957c2aad20Sopenharmony_ci	return 0;
2967c2aad20Sopenharmony_ci}
2977c2aad20Sopenharmony_ci
2987c2aad20Sopenharmony_ciint zip_archive_find_entry(struct zip_archive *archive, const char *file_name,
2997c2aad20Sopenharmony_ci			   struct zip_entry *out)
3007c2aad20Sopenharmony_ci{
3017c2aad20Sopenharmony_ci	size_t file_name_length = strlen(file_name);
3027c2aad20Sopenharmony_ci	__u32 i, offset = archive->cd_offset;
3037c2aad20Sopenharmony_ci
3047c2aad20Sopenharmony_ci	for (i = 0; i < archive->cd_records; ++i) {
3057c2aad20Sopenharmony_ci		__u16 cdfh_name_length, cdfh_flags;
3067c2aad20Sopenharmony_ci		struct cd_file_header *cdfh;
3077c2aad20Sopenharmony_ci		const char *cdfh_name;
3087c2aad20Sopenharmony_ci
3097c2aad20Sopenharmony_ci		cdfh = check_access(archive, offset, sizeof(*cdfh));
3107c2aad20Sopenharmony_ci		if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC)
3117c2aad20Sopenharmony_ci			return -EINVAL;
3127c2aad20Sopenharmony_ci
3137c2aad20Sopenharmony_ci		offset += sizeof(*cdfh);
3147c2aad20Sopenharmony_ci		cdfh_name_length = cdfh->file_name_length;
3157c2aad20Sopenharmony_ci		cdfh_name = check_access(archive, offset, cdfh_name_length);
3167c2aad20Sopenharmony_ci		if (!cdfh_name)
3177c2aad20Sopenharmony_ci			return -EINVAL;
3187c2aad20Sopenharmony_ci
3197c2aad20Sopenharmony_ci		cdfh_flags = cdfh->flags;
3207c2aad20Sopenharmony_ci		if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
3217c2aad20Sopenharmony_ci		    (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
3227c2aad20Sopenharmony_ci		    file_name_length == cdfh_name_length &&
3237c2aad20Sopenharmony_ci		    memcmp(file_name, archive->data + offset, file_name_length) == 0) {
3247c2aad20Sopenharmony_ci			return get_entry_at_offset(archive, cdfh->offset, out);
3257c2aad20Sopenharmony_ci		}
3267c2aad20Sopenharmony_ci
3277c2aad20Sopenharmony_ci		offset += cdfh_name_length;
3287c2aad20Sopenharmony_ci		offset += cdfh->extra_field_length;
3297c2aad20Sopenharmony_ci		offset += cdfh->file_comment_length;
3307c2aad20Sopenharmony_ci	}
3317c2aad20Sopenharmony_ci
3327c2aad20Sopenharmony_ci	return -ENOENT;
3337c2aad20Sopenharmony_ci}
334