1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Memory bandwidth monitoring and allocation library
4 *
5 * Copyright (C) 2018 Intel Corporation
6 *
7 * Authors:
8 *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9 *    Fenghua Yu <fenghua.yu@intel.com>
10 */
11#include "resctrl.h"
12
13#define UNCORE_IMC		"uncore_imc"
14#define READ_FILE_NAME		"events/cas_count_read"
15#define WRITE_FILE_NAME		"events/cas_count_write"
16#define DYN_PMU_PATH		"/sys/bus/event_source/devices"
17#define SCALE			0.00006103515625
18#define MAX_IMCS		20
19#define MAX_TOKENS		5
20#define READ			0
21#define WRITE			1
22#define CON_MON_MBM_LOCAL_BYTES_PATH				\
23	"%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
24
25#define CON_MBM_LOCAL_BYTES_PATH		\
26	"%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
27
28#define MON_MBM_LOCAL_BYTES_PATH		\
29	"%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
30
31#define MBM_LOCAL_BYTES_PATH			\
32	"%s/mon_data/mon_L3_%02d/mbm_local_bytes"
33
34#define CON_MON_LCC_OCCUP_PATH		\
35	"%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
36
37#define CON_LCC_OCCUP_PATH		\
38	"%s/%s/mon_data/mon_L3_%02d/llc_occupancy"
39
40#define MON_LCC_OCCUP_PATH		\
41	"%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
42
43#define LCC_OCCUP_PATH			\
44	"%s/mon_data/mon_L3_%02d/llc_occupancy"
45
46struct membw_read_format {
47	__u64 value;         /* The value of the event */
48	__u64 time_enabled;  /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
49	__u64 time_running;  /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
50	__u64 id;            /* if PERF_FORMAT_ID */
51};
52
53struct imc_counter_config {
54	__u32 type;
55	__u64 event;
56	__u64 umask;
57	struct perf_event_attr pe;
58	struct membw_read_format return_value;
59	int fd;
60};
61
62static char mbm_total_path[1024];
63static int imcs;
64static struct imc_counter_config imc_counters_config[MAX_IMCS][2];
65
66void membw_initialize_perf_event_attr(int i, int j)
67{
68	memset(&imc_counters_config[i][j].pe, 0,
69	       sizeof(struct perf_event_attr));
70	imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type;
71	imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr);
72	imc_counters_config[i][j].pe.disabled = 1;
73	imc_counters_config[i][j].pe.inherit = 1;
74	imc_counters_config[i][j].pe.exclude_guest = 0;
75	imc_counters_config[i][j].pe.config =
76		imc_counters_config[i][j].umask << 8 |
77		imc_counters_config[i][j].event;
78	imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER;
79	imc_counters_config[i][j].pe.read_format =
80		PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
81}
82
83void membw_ioctl_perf_event_ioc_reset_enable(int i, int j)
84{
85	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0);
86	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0);
87}
88
89void membw_ioctl_perf_event_ioc_disable(int i, int j)
90{
91	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0);
92}
93
94/*
95 * get_event_and_umask:	Parse config into event and umask
96 * @cas_count_cfg:	Config
97 * @count:		iMC number
98 * @op:			Operation (read/write)
99 */
100void get_event_and_umask(char *cas_count_cfg, int count, bool op)
101{
102	char *token[MAX_TOKENS];
103	int i = 0;
104
105	strcat(cas_count_cfg, ",");
106	token[0] = strtok(cas_count_cfg, "=,");
107
108	for (i = 1; i < MAX_TOKENS; i++)
109		token[i] = strtok(NULL, "=,");
110
111	for (i = 0; i < MAX_TOKENS; i++) {
112		if (!token[i])
113			break;
114		if (strcmp(token[i], "event") == 0) {
115			if (op == READ)
116				imc_counters_config[count][READ].event =
117				strtol(token[i + 1], NULL, 16);
118			else
119				imc_counters_config[count][WRITE].event =
120				strtol(token[i + 1], NULL, 16);
121		}
122		if (strcmp(token[i], "umask") == 0) {
123			if (op == READ)
124				imc_counters_config[count][READ].umask =
125				strtol(token[i + 1], NULL, 16);
126			else
127				imc_counters_config[count][WRITE].umask =
128				strtol(token[i + 1], NULL, 16);
129		}
130	}
131}
132
133static int open_perf_event(int i, int cpu_no, int j)
134{
135	imc_counters_config[i][j].fd =
136		perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1,
137				PERF_FLAG_FD_CLOEXEC);
138
139	if (imc_counters_config[i][j].fd == -1) {
140		fprintf(stderr, "Error opening leader %llx\n",
141			imc_counters_config[i][j].pe.config);
142
143		return -1;
144	}
145
146	return 0;
147}
148
149/* Get type and config (read and write) of an iMC counter */
150static int read_from_imc_dir(char *imc_dir, int count)
151{
152	char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024];
153	FILE *fp;
154
155	/* Get type of iMC counter */
156	sprintf(imc_counter_type, "%s%s", imc_dir, "type");
157	fp = fopen(imc_counter_type, "r");
158	if (!fp) {
159		perror("Failed to open imc counter type file");
160
161		return -1;
162	}
163	if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) {
164		perror("Could not get imc type");
165		fclose(fp);
166
167		return -1;
168	}
169	fclose(fp);
170
171	imc_counters_config[count][WRITE].type =
172				imc_counters_config[count][READ].type;
173
174	/* Get read config */
175	sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME);
176	fp = fopen(imc_counter_cfg, "r");
177	if (!fp) {
178		perror("Failed to open imc config file");
179
180		return -1;
181	}
182	if (fscanf(fp, "%s", cas_count_cfg) <= 0) {
183		perror("Could not get imc cas count read");
184		fclose(fp);
185
186		return -1;
187	}
188	fclose(fp);
189
190	get_event_and_umask(cas_count_cfg, count, READ);
191
192	/* Get write config */
193	sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME);
194	fp = fopen(imc_counter_cfg, "r");
195	if (!fp) {
196		perror("Failed to open imc config file");
197
198		return -1;
199	}
200	if  (fscanf(fp, "%s", cas_count_cfg) <= 0) {
201		perror("Could not get imc cas count write");
202		fclose(fp);
203
204		return -1;
205	}
206	fclose(fp);
207
208	get_event_and_umask(cas_count_cfg, count, WRITE);
209
210	return 0;
211}
212
213/*
214 * A system can have 'n' number of iMC (Integrated Memory Controller)
215 * counters, get that 'n'. For each iMC counter get it's type and config.
216 * Also, each counter has two configs, one for read and the other for write.
217 * A config again has two parts, event and umask.
218 * Enumerate all these details into an array of structures.
219 *
220 * Return: >= 0 on success. < 0 on failure.
221 */
222static int num_of_imcs(void)
223{
224	char imc_dir[512], *temp;
225	unsigned int count = 0;
226	struct dirent *ep;
227	int ret;
228	DIR *dp;
229
230	dp = opendir(DYN_PMU_PATH);
231	if (dp) {
232		while ((ep = readdir(dp))) {
233			temp = strstr(ep->d_name, UNCORE_IMC);
234			if (!temp)
235				continue;
236
237			/*
238			 * imc counters are named as "uncore_imc_<n>", hence
239			 * increment the pointer to point to <n>. Note that
240			 * sizeof(UNCORE_IMC) would count for null character as
241			 * well and hence the last underscore character in
242			 * uncore_imc'_' need not be counted.
243			 */
244			temp = temp + sizeof(UNCORE_IMC);
245
246			/*
247			 * Some directories under "DYN_PMU_PATH" could have
248			 * names like "uncore_imc_free_running", hence, check if
249			 * first character is a numerical digit or not.
250			 */
251			if (temp[0] >= '0' && temp[0] <= '9') {
252				sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
253					ep->d_name);
254				ret = read_from_imc_dir(imc_dir, count);
255				if (ret) {
256					closedir(dp);
257
258					return ret;
259				}
260				count++;
261			}
262		}
263		closedir(dp);
264		if (count == 0) {
265			perror("Unable find iMC counters!\n");
266
267			return -1;
268		}
269	} else {
270		perror("Unable to open PMU directory!\n");
271
272		return -1;
273	}
274
275	return count;
276}
277
278static int initialize_mem_bw_imc(void)
279{
280	int imc, j;
281
282	imcs = num_of_imcs();
283	if (imcs <= 0)
284		return imcs;
285
286	/* Initialize perf_event_attr structures for all iMC's */
287	for (imc = 0; imc < imcs; imc++) {
288		for (j = 0; j < 2; j++)
289			membw_initialize_perf_event_attr(imc, j);
290	}
291
292	return 0;
293}
294
295/*
296 * get_mem_bw_imc:	Memory band width as reported by iMC counters
297 * @cpu_no:		CPU number that the benchmark PID is binded to
298 * @bw_report:		Bandwidth report type (reads, writes)
299 *
300 * Memory B/W utilized by a process on a socket can be calculated using
301 * iMC counters. Perf events are used to read these counters.
302 *
303 * Return: = 0 on success. < 0 on failure.
304 */
305static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
306{
307	float reads, writes, of_mul_read, of_mul_write;
308	int imc, j, ret;
309
310	/* Start all iMC counters to log values (both read and write) */
311	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
312	for (imc = 0; imc < imcs; imc++) {
313		for (j = 0; j < 2; j++) {
314			ret = open_perf_event(imc, cpu_no, j);
315			if (ret)
316				return -1;
317		}
318		for (j = 0; j < 2; j++)
319			membw_ioctl_perf_event_ioc_reset_enable(imc, j);
320	}
321
322	sleep(1);
323
324	/* Stop counters after a second to get results (both read and write) */
325	for (imc = 0; imc < imcs; imc++) {
326		for (j = 0; j < 2; j++)
327			membw_ioctl_perf_event_ioc_disable(imc, j);
328	}
329
330	/*
331	 * Get results which are stored in struct type imc_counter_config
332	 * Take over flow into consideration before calculating total b/w
333	 */
334	for (imc = 0; imc < imcs; imc++) {
335		struct imc_counter_config *r =
336			&imc_counters_config[imc][READ];
337		struct imc_counter_config *w =
338			&imc_counters_config[imc][WRITE];
339
340		if (read(r->fd, &r->return_value,
341			 sizeof(struct membw_read_format)) == -1) {
342			perror("Couldn't get read b/w through iMC");
343
344			return -1;
345		}
346
347		if (read(w->fd, &w->return_value,
348			 sizeof(struct membw_read_format)) == -1) {
349			perror("Couldn't get write bw through iMC");
350
351			return -1;
352		}
353
354		__u64 r_time_enabled = r->return_value.time_enabled;
355		__u64 r_time_running = r->return_value.time_running;
356
357		if (r_time_enabled != r_time_running)
358			of_mul_read = (float)r_time_enabled /
359					(float)r_time_running;
360
361		__u64 w_time_enabled = w->return_value.time_enabled;
362		__u64 w_time_running = w->return_value.time_running;
363
364		if (w_time_enabled != w_time_running)
365			of_mul_write = (float)w_time_enabled /
366					(float)w_time_running;
367		reads += r->return_value.value * of_mul_read * SCALE;
368		writes += w->return_value.value * of_mul_write * SCALE;
369	}
370
371	for (imc = 0; imc < imcs; imc++) {
372		close(imc_counters_config[imc][READ].fd);
373		close(imc_counters_config[imc][WRITE].fd);
374	}
375
376	if (strcmp(bw_report, "reads") == 0) {
377		*bw_imc = reads;
378		return 0;
379	}
380
381	if (strcmp(bw_report, "writes") == 0) {
382		*bw_imc = writes;
383		return 0;
384	}
385
386	*bw_imc = reads + writes;
387	return 0;
388}
389
390void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id)
391{
392	if (ctrlgrp && mongrp)
393		sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH,
394			RESCTRL_PATH, ctrlgrp, mongrp, resource_id);
395	else if (!ctrlgrp && mongrp)
396		sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
397			mongrp, resource_id);
398	else if (ctrlgrp && !mongrp)
399		sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
400			ctrlgrp, resource_id);
401	else if (!ctrlgrp && !mongrp)
402		sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
403			resource_id);
404}
405
406/*
407 * initialize_mem_bw_resctrl:	Appropriately populate "mbm_total_path"
408 * @ctrlgrp:			Name of the control monitor group (con_mon grp)
409 * @mongrp:			Name of the monitor group (mon grp)
410 * @cpu_no:			CPU number that the benchmark PID is binded to
411 * @resctrl_val:		Resctrl feature (Eg: mbm, mba.. etc)
412 */
413static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
414				      int cpu_no, char *resctrl_val)
415{
416	int resource_id;
417
418	if (get_resource_id(cpu_no, &resource_id) < 0) {
419		perror("Could not get resource_id");
420		return;
421	}
422
423	if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
424		set_mbm_path(ctrlgrp, mongrp, resource_id);
425
426	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
427		if (ctrlgrp)
428			sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH,
429				RESCTRL_PATH, ctrlgrp, resource_id);
430		else
431			sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH,
432				RESCTRL_PATH, resource_id);
433	}
434}
435
436/*
437 * Get MBM Local bytes as reported by resctrl FS
438 * For MBM,
439 * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp
440 * 2. If only con_mon grp is given, then read from con_mon grp
441 * 3. If both are not given, then read from root con_mon grp
442 * For MBA,
443 * 1. If con_mon grp is given, then read from it
444 * 2. If con_mon grp is not given, then read from root con_mon grp
445 */
446static int get_mem_bw_resctrl(unsigned long *mbm_total)
447{
448	FILE *fp;
449
450	fp = fopen(mbm_total_path, "r");
451	if (!fp) {
452		perror("Failed to open total bw file");
453
454		return -1;
455	}
456	if (fscanf(fp, "%lu", mbm_total) <= 0) {
457		perror("Could not get mbm local bytes");
458		fclose(fp);
459
460		return -1;
461	}
462	fclose(fp);
463
464	return 0;
465}
466
467pid_t bm_pid, ppid;
468
469void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
470{
471	/* Only kill child after bm_pid is set after fork() */
472	if (bm_pid)
473		kill(bm_pid, SIGKILL);
474	umount_resctrlfs();
475	tests_cleanup();
476	ksft_print_msg("Ending\n\n");
477
478	exit(EXIT_SUCCESS);
479}
480
481/*
482 * Register CTRL-C handler for parent, as it has to kill
483 * child process before exiting.
484 */
485int signal_handler_register(void)
486{
487	struct sigaction sigact = {};
488	int ret = 0;
489
490	bm_pid = 0;
491
492	sigact.sa_sigaction = ctrlc_handler;
493	sigemptyset(&sigact.sa_mask);
494	sigact.sa_flags = SA_SIGINFO;
495	if (sigaction(SIGINT, &sigact, NULL) ||
496	    sigaction(SIGTERM, &sigact, NULL) ||
497	    sigaction(SIGHUP, &sigact, NULL)) {
498		perror("# sigaction");
499		ret = -1;
500	}
501	return ret;
502}
503
504/*
505 * Reset signal handler to SIG_DFL.
506 * Non-Value return because the caller should keep
507 * the error code of other path even if sigaction fails.
508 */
509void signal_handler_unregister(void)
510{
511	struct sigaction sigact = {};
512
513	sigact.sa_handler = SIG_DFL;
514	sigemptyset(&sigact.sa_mask);
515	if (sigaction(SIGINT, &sigact, NULL) ||
516	    sigaction(SIGTERM, &sigact, NULL) ||
517	    sigaction(SIGHUP, &sigact, NULL)) {
518		perror("# sigaction");
519	}
520}
521
522/*
523 * print_results_bw:	the memory bandwidth results are stored in a file
524 * @filename:		file that stores the results
525 * @bm_pid:		child pid that runs benchmark
526 * @bw_imc:		perf imc counter value
527 * @bw_resc:		memory bandwidth value
528 *
529 * Return:		0 on success. non-zero on failure.
530 */
531static int print_results_bw(char *filename,  int bm_pid, float bw_imc,
532			    unsigned long bw_resc)
533{
534	unsigned long diff = fabs(bw_imc - bw_resc);
535	FILE *fp;
536
537	if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
538		printf("Pid: %d \t Mem_BW_iMC: %f \t ", bm_pid, bw_imc);
539		printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff);
540	} else {
541		fp = fopen(filename, "a");
542		if (!fp) {
543			perror("Cannot open results file");
544
545			return errno;
546		}
547		if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
548			    bm_pid, bw_imc, bw_resc, diff) <= 0) {
549			fclose(fp);
550			perror("Could not log results.");
551
552			return errno;
553		}
554		fclose(fp);
555	}
556
557	return 0;
558}
559
560static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num)
561{
562	if (strlen(ctrlgrp) && strlen(mongrp))
563		sprintf(llc_occup_path,	CON_MON_LCC_OCCUP_PATH,	RESCTRL_PATH,
564			ctrlgrp, mongrp, sock_num);
565	else if (!strlen(ctrlgrp) && strlen(mongrp))
566		sprintf(llc_occup_path,	MON_LCC_OCCUP_PATH, RESCTRL_PATH,
567			mongrp, sock_num);
568	else if (strlen(ctrlgrp) && !strlen(mongrp))
569		sprintf(llc_occup_path,	CON_LCC_OCCUP_PATH, RESCTRL_PATH,
570			ctrlgrp, sock_num);
571	else if (!strlen(ctrlgrp) && !strlen(mongrp))
572		sprintf(llc_occup_path, LCC_OCCUP_PATH,	RESCTRL_PATH, sock_num);
573}
574
575/*
576 * initialize_llc_occu_resctrl:	Appropriately populate "llc_occup_path"
577 * @ctrlgrp:			Name of the control monitor group (con_mon grp)
578 * @mongrp:			Name of the monitor group (mon grp)
579 * @cpu_no:			CPU number that the benchmark PID is binded to
580 * @resctrl_val:		Resctrl feature (Eg: cat, cmt.. etc)
581 */
582static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
583					int cpu_no, char *resctrl_val)
584{
585	int resource_id;
586
587	if (get_resource_id(cpu_no, &resource_id) < 0) {
588		perror("# Unable to resource_id");
589		return;
590	}
591
592	if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
593		set_cmt_path(ctrlgrp, mongrp, resource_id);
594}
595
596static int
597measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
598{
599	unsigned long bw_resc, bw_resc_end;
600	float bw_imc;
601	int ret;
602
603	/*
604	 * Measure memory bandwidth from resctrl and from
605	 * another source which is perf imc value or could
606	 * be something else if perf imc event is not available.
607	 * Compare the two values to validate resctrl value.
608	 * It takes 1sec to measure the data.
609	 */
610	ret = get_mem_bw_imc(param->cpu_no, param->bw_report, &bw_imc);
611	if (ret < 0)
612		return ret;
613
614	ret = get_mem_bw_resctrl(&bw_resc_end);
615	if (ret < 0)
616		return ret;
617
618	bw_resc = (bw_resc_end - *bw_resc_start) / MB;
619	ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
620	if (ret)
621		return ret;
622
623	*bw_resc_start = bw_resc_end;
624
625	return 0;
626}
627
628/*
629 * resctrl_val:	execute benchmark and measure memory bandwidth on
630 *			the benchmark
631 * @benchmark_cmd:	benchmark command and its arguments
632 * @param:		parameters passed to resctrl_val()
633 *
634 * Return:		0 on success. non-zero on failure.
635 */
636int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param)
637{
638	char *resctrl_val = param->resctrl_val;
639	unsigned long bw_resc_start = 0;
640	struct sigaction sigact;
641	int ret = 0, pipefd[2];
642	char pipe_message = 0;
643	union sigval value;
644
645	if (strcmp(param->filename, "") == 0)
646		sprintf(param->filename, "stdio");
647
648	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
649	    !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
650		ret = validate_bw_report_request(param->bw_report);
651		if (ret)
652			return ret;
653	}
654
655	/*
656	 * If benchmark wasn't successfully started by child, then child should
657	 * kill parent, so save parent's pid
658	 */
659	ppid = getpid();
660
661	if (pipe(pipefd)) {
662		perror("# Unable to create pipe");
663
664		return -1;
665	}
666
667	/*
668	 * Fork to start benchmark, save child's pid so that it can be killed
669	 * when needed
670	 */
671	fflush(stdout);
672	bm_pid = fork();
673	if (bm_pid == -1) {
674		perror("# Unable to fork");
675
676		return -1;
677	}
678
679	if (bm_pid == 0) {
680		/*
681		 * Mask all signals except SIGUSR1, parent uses SIGUSR1 to
682		 * start benchmark
683		 */
684		sigfillset(&sigact.sa_mask);
685		sigdelset(&sigact.sa_mask, SIGUSR1);
686
687		sigact.sa_sigaction = run_benchmark;
688		sigact.sa_flags = SA_SIGINFO;
689
690		/* Register for "SIGUSR1" signal from parent */
691		if (sigaction(SIGUSR1, &sigact, NULL))
692			PARENT_EXIT("Can't register child for signal");
693
694		/* Tell parent that child is ready */
695		close(pipefd[0]);
696		pipe_message = 1;
697		if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
698		    sizeof(pipe_message)) {
699			perror("# failed signaling parent process");
700			close(pipefd[1]);
701			return -1;
702		}
703		close(pipefd[1]);
704
705		/* Suspend child until delivery of "SIGUSR1" from parent */
706		sigsuspend(&sigact.sa_mask);
707
708		PARENT_EXIT("Child is done");
709	}
710
711	ksft_print_msg("Benchmark PID: %d\n", bm_pid);
712
713	/*
714	 * The cast removes constness but nothing mutates benchmark_cmd within
715	 * the context of this process. At the receiving process, it becomes
716	 * argv, which is mutable, on exec() but that's after fork() so it
717	 * doesn't matter for the process running the tests.
718	 */
719	value.sival_ptr = (void *)benchmark_cmd;
720
721	/* Taskset benchmark to specified cpu */
722	ret = taskset_benchmark(bm_pid, param->cpu_no);
723	if (ret)
724		goto out;
725
726	/* Write benchmark to specified control&monitoring grp in resctrl FS */
727	ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
728				      resctrl_val);
729	if (ret)
730		goto out;
731
732	if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
733	    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
734		ret = initialize_mem_bw_imc();
735		if (ret)
736			goto out;
737
738		initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
739					  param->cpu_no, resctrl_val);
740	} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
741		initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp,
742					    param->cpu_no, resctrl_val);
743
744	/* Parent waits for child to be ready. */
745	close(pipefd[1]);
746	while (pipe_message != 1) {
747		if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) <
748		    sizeof(pipe_message)) {
749			perror("# failed reading message from child process");
750			close(pipefd[0]);
751			goto out;
752		}
753	}
754	close(pipefd[0]);
755
756	/* Signal child to start benchmark */
757	if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
758		perror("# sigqueue SIGUSR1 to child");
759		ret = errno;
760		goto out;
761	}
762
763	/* Give benchmark enough time to fully run */
764	sleep(1);
765
766	/* Test runs until the callback setup() tells the test to stop. */
767	while (1) {
768		ret = param->setup(param);
769		if (ret == END_OF_TESTS) {
770			ret = 0;
771			break;
772		}
773		if (ret < 0)
774			break;
775
776		if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
777		    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
778			ret = measure_vals(param, &bw_resc_start);
779			if (ret)
780				break;
781		} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
782			sleep(1);
783			ret = measure_cache_vals(param, bm_pid);
784			if (ret)
785				break;
786		}
787	}
788
789out:
790	kill(bm_pid, SIGKILL);
791
792	return ret;
793}
794