162306a36Sopenharmony_ci// SPDX-License-Identifier: LGPL-2.1
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * rseq.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * This library is free software; you can redistribute it and/or
862306a36Sopenharmony_ci * modify it under the terms of the GNU Lesser General Public
962306a36Sopenharmony_ci * License as published by the Free Software Foundation; only
1062306a36Sopenharmony_ci * version 2.1 of the License.
1162306a36Sopenharmony_ci *
1262306a36Sopenharmony_ci * This library is distributed in the hope that it will be useful,
1362306a36Sopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
1462306a36Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1562306a36Sopenharmony_ci * Lesser General Public License for more details.
1662306a36Sopenharmony_ci */
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#define _GNU_SOURCE
1962306a36Sopenharmony_ci#include <errno.h>
2062306a36Sopenharmony_ci#include <sched.h>
2162306a36Sopenharmony_ci#include <stdio.h>
2262306a36Sopenharmony_ci#include <stdlib.h>
2362306a36Sopenharmony_ci#include <string.h>
2462306a36Sopenharmony_ci#include <unistd.h>
2562306a36Sopenharmony_ci#include <syscall.h>
2662306a36Sopenharmony_ci#include <assert.h>
2762306a36Sopenharmony_ci#include <signal.h>
2862306a36Sopenharmony_ci#include <limits.h>
2962306a36Sopenharmony_ci#include <dlfcn.h>
3062306a36Sopenharmony_ci#include <stddef.h>
3162306a36Sopenharmony_ci#include <sys/auxv.h>
3262306a36Sopenharmony_ci#include <linux/auxvec.h>
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci#include <linux/compiler.h>
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci#include "../kselftest.h"
3762306a36Sopenharmony_ci#include "rseq.h"
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci/*
4062306a36Sopenharmony_ci * Define weak versions to play nice with binaries that are statically linked
4162306a36Sopenharmony_ci * against a libc that doesn't support registering its own rseq.
4262306a36Sopenharmony_ci */
4362306a36Sopenharmony_ci__weak ptrdiff_t __rseq_offset;
4462306a36Sopenharmony_ci__weak unsigned int __rseq_size;
4562306a36Sopenharmony_ci__weak unsigned int __rseq_flags;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_cistatic const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
4862306a36Sopenharmony_cistatic const unsigned int *libc_rseq_size_p = &__rseq_size;
4962306a36Sopenharmony_cistatic const unsigned int *libc_rseq_flags_p = &__rseq_flags;
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci/* Offset from the thread pointer to the rseq area. */
5262306a36Sopenharmony_ciptrdiff_t rseq_offset;
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci/*
5562306a36Sopenharmony_ci * Size of the registered rseq area. 0 if the registration was
5662306a36Sopenharmony_ci * unsuccessful.
5762306a36Sopenharmony_ci */
5862306a36Sopenharmony_ciunsigned int rseq_size = -1U;
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci/* Flags used during rseq registration.  */
6162306a36Sopenharmony_ciunsigned int rseq_flags;
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci/*
6462306a36Sopenharmony_ci * rseq feature size supported by the kernel. 0 if the registration was
6562306a36Sopenharmony_ci * unsuccessful.
6662306a36Sopenharmony_ci */
6762306a36Sopenharmony_ciunsigned int rseq_feature_size = -1U;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_cistatic int rseq_ownership;
7062306a36Sopenharmony_cistatic int rseq_reg_success;	/* At least one rseq registration has succeded. */
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci/* Allocate a large area for the TLS. */
7362306a36Sopenharmony_ci#define RSEQ_THREAD_AREA_ALLOC_SIZE	1024
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci/* Original struct rseq feature size is 20 bytes. */
7662306a36Sopenharmony_ci#define ORIG_RSEQ_FEATURE_SIZE		20
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci/* Original struct rseq allocation size is 32 bytes. */
7962306a36Sopenharmony_ci#define ORIG_RSEQ_ALLOC_SIZE		32
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_cistatic
8262306a36Sopenharmony_ci__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
8362306a36Sopenharmony_ci	.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
8462306a36Sopenharmony_ci};
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_cistatic int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
8762306a36Sopenharmony_ci		    int flags, uint32_t sig)
8862306a36Sopenharmony_ci{
8962306a36Sopenharmony_ci	return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
9062306a36Sopenharmony_ci}
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_cistatic int sys_getcpu(unsigned *cpu, unsigned *node)
9362306a36Sopenharmony_ci{
9462306a36Sopenharmony_ci	return syscall(__NR_getcpu, cpu, node, NULL);
9562306a36Sopenharmony_ci}
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ciint rseq_available(void)
9862306a36Sopenharmony_ci{
9962306a36Sopenharmony_ci	int rc;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	rc = sys_rseq(NULL, 0, 0, 0);
10262306a36Sopenharmony_ci	if (rc != -1)
10362306a36Sopenharmony_ci		abort();
10462306a36Sopenharmony_ci	switch (errno) {
10562306a36Sopenharmony_ci	case ENOSYS:
10662306a36Sopenharmony_ci		return 0;
10762306a36Sopenharmony_ci	case EINVAL:
10862306a36Sopenharmony_ci		return 1;
10962306a36Sopenharmony_ci	default:
11062306a36Sopenharmony_ci		abort();
11162306a36Sopenharmony_ci	}
11262306a36Sopenharmony_ci}
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ciint rseq_register_current_thread(void)
11562306a36Sopenharmony_ci{
11662306a36Sopenharmony_ci	int rc;
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	if (!rseq_ownership) {
11962306a36Sopenharmony_ci		/* Treat libc's ownership as a successful registration. */
12062306a36Sopenharmony_ci		return 0;
12162306a36Sopenharmony_ci	}
12262306a36Sopenharmony_ci	rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
12362306a36Sopenharmony_ci	if (rc) {
12462306a36Sopenharmony_ci		if (RSEQ_READ_ONCE(rseq_reg_success)) {
12562306a36Sopenharmony_ci			/* Incoherent success/failure within process. */
12662306a36Sopenharmony_ci			abort();
12762306a36Sopenharmony_ci		}
12862306a36Sopenharmony_ci		return -1;
12962306a36Sopenharmony_ci	}
13062306a36Sopenharmony_ci	assert(rseq_current_cpu_raw() >= 0);
13162306a36Sopenharmony_ci	RSEQ_WRITE_ONCE(rseq_reg_success, 1);
13262306a36Sopenharmony_ci	return 0;
13362306a36Sopenharmony_ci}
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ciint rseq_unregister_current_thread(void)
13662306a36Sopenharmony_ci{
13762306a36Sopenharmony_ci	int rc;
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	if (!rseq_ownership) {
14062306a36Sopenharmony_ci		/* Treat libc's ownership as a successful unregistration. */
14162306a36Sopenharmony_ci		return 0;
14262306a36Sopenharmony_ci	}
14362306a36Sopenharmony_ci	rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
14462306a36Sopenharmony_ci	if (rc)
14562306a36Sopenharmony_ci		return -1;
14662306a36Sopenharmony_ci	return 0;
14762306a36Sopenharmony_ci}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_cistatic
15062306a36Sopenharmony_ciunsigned int get_rseq_feature_size(void)
15162306a36Sopenharmony_ci{
15262306a36Sopenharmony_ci	unsigned long auxv_rseq_feature_size, auxv_rseq_align;
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
15562306a36Sopenharmony_ci	assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
15862306a36Sopenharmony_ci	assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
15962306a36Sopenharmony_ci	if (auxv_rseq_feature_size)
16062306a36Sopenharmony_ci		return auxv_rseq_feature_size;
16162306a36Sopenharmony_ci	else
16262306a36Sopenharmony_ci		return ORIG_RSEQ_FEATURE_SIZE;
16362306a36Sopenharmony_ci}
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_cistatic __attribute__((constructor))
16662306a36Sopenharmony_civoid rseq_init(void)
16762306a36Sopenharmony_ci{
16862306a36Sopenharmony_ci	/*
16962306a36Sopenharmony_ci	 * If the libc's registered rseq size isn't already valid, it may be
17062306a36Sopenharmony_ci	 * because the binary is dynamically linked and not necessarily due to
17162306a36Sopenharmony_ci	 * libc not having registered a restartable sequence.  Try to find the
17262306a36Sopenharmony_ci	 * symbols if that's the case.
17362306a36Sopenharmony_ci	 */
17462306a36Sopenharmony_ci	if (!*libc_rseq_size_p) {
17562306a36Sopenharmony_ci		libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
17662306a36Sopenharmony_ci		libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
17762306a36Sopenharmony_ci		libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
17862306a36Sopenharmony_ci	}
17962306a36Sopenharmony_ci	if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
18062306a36Sopenharmony_ci			*libc_rseq_size_p != 0) {
18162306a36Sopenharmony_ci		/* rseq registration owned by glibc */
18262306a36Sopenharmony_ci		rseq_offset = *libc_rseq_offset_p;
18362306a36Sopenharmony_ci		rseq_size = *libc_rseq_size_p;
18462306a36Sopenharmony_ci		rseq_flags = *libc_rseq_flags_p;
18562306a36Sopenharmony_ci		rseq_feature_size = get_rseq_feature_size();
18662306a36Sopenharmony_ci		if (rseq_feature_size > rseq_size)
18762306a36Sopenharmony_ci			rseq_feature_size = rseq_size;
18862306a36Sopenharmony_ci		return;
18962306a36Sopenharmony_ci	}
19062306a36Sopenharmony_ci	rseq_ownership = 1;
19162306a36Sopenharmony_ci	if (!rseq_available()) {
19262306a36Sopenharmony_ci		rseq_size = 0;
19362306a36Sopenharmony_ci		rseq_feature_size = 0;
19462306a36Sopenharmony_ci		return;
19562306a36Sopenharmony_ci	}
19662306a36Sopenharmony_ci	rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
19762306a36Sopenharmony_ci	rseq_flags = 0;
19862306a36Sopenharmony_ci	rseq_feature_size = get_rseq_feature_size();
19962306a36Sopenharmony_ci	if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE)
20062306a36Sopenharmony_ci		rseq_size = ORIG_RSEQ_ALLOC_SIZE;
20162306a36Sopenharmony_ci	else
20262306a36Sopenharmony_ci		rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE;
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_cistatic __attribute__((destructor))
20662306a36Sopenharmony_civoid rseq_exit(void)
20762306a36Sopenharmony_ci{
20862306a36Sopenharmony_ci	if (!rseq_ownership)
20962306a36Sopenharmony_ci		return;
21062306a36Sopenharmony_ci	rseq_offset = 0;
21162306a36Sopenharmony_ci	rseq_size = -1U;
21262306a36Sopenharmony_ci	rseq_feature_size = -1U;
21362306a36Sopenharmony_ci	rseq_ownership = 0;
21462306a36Sopenharmony_ci}
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ciint32_t rseq_fallback_current_cpu(void)
21762306a36Sopenharmony_ci{
21862306a36Sopenharmony_ci	int32_t cpu;
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	cpu = sched_getcpu();
22162306a36Sopenharmony_ci	if (cpu < 0) {
22262306a36Sopenharmony_ci		perror("sched_getcpu()");
22362306a36Sopenharmony_ci		abort();
22462306a36Sopenharmony_ci	}
22562306a36Sopenharmony_ci	return cpu;
22662306a36Sopenharmony_ci}
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ciint32_t rseq_fallback_current_node(void)
22962306a36Sopenharmony_ci{
23062306a36Sopenharmony_ci	uint32_t cpu_id, node_id;
23162306a36Sopenharmony_ci	int ret;
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	ret = sys_getcpu(&cpu_id, &node_id);
23462306a36Sopenharmony_ci	if (ret) {
23562306a36Sopenharmony_ci		perror("sys_getcpu()");
23662306a36Sopenharmony_ci		return ret;
23762306a36Sopenharmony_ci	}
23862306a36Sopenharmony_ci	return (int32_t) node_id;
23962306a36Sopenharmony_ci}
240