162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Benchmark find_next_bit and related bit operations.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright 2020 Google LLC.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci#include <stdlib.h>
862306a36Sopenharmony_ci#include "bench.h"
962306a36Sopenharmony_ci#include "../util/stat.h"
1062306a36Sopenharmony_ci#include <linux/bitmap.h>
1162306a36Sopenharmony_ci#include <linux/bitops.h>
1262306a36Sopenharmony_ci#include <linux/time64.h>
1362306a36Sopenharmony_ci#include <subcmd/parse-options.h>
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_cistatic unsigned int outer_iterations = 5;
1662306a36Sopenharmony_cistatic unsigned int inner_iterations = 100000;
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_cistatic const struct option options[] = {
1962306a36Sopenharmony_ci	OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
2062306a36Sopenharmony_ci		"Number of outer iterations used"),
2162306a36Sopenharmony_ci	OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
2262306a36Sopenharmony_ci		"Number of inner iterations used"),
2362306a36Sopenharmony_ci	OPT_END()
2462306a36Sopenharmony_ci};
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_cistatic const char *const bench_usage[] = {
2762306a36Sopenharmony_ci	"perf bench mem find_bit <options>",
2862306a36Sopenharmony_ci	NULL
2962306a36Sopenharmony_ci};
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_cistatic unsigned int accumulator;
3262306a36Sopenharmony_cistatic unsigned int use_of_val;
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_cistatic noinline void workload(int val)
3562306a36Sopenharmony_ci{
3662306a36Sopenharmony_ci	use_of_val += val;
3762306a36Sopenharmony_ci	accumulator++;
3862306a36Sopenharmony_ci}
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
4162306a36Sopenharmony_cistatic bool asm_test_bit(long nr, const unsigned long *addr)
4262306a36Sopenharmony_ci{
4362306a36Sopenharmony_ci	bool oldbit;
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci	asm volatile("bt %2,%1"
4662306a36Sopenharmony_ci		     : "=@ccc" (oldbit)
4762306a36Sopenharmony_ci		     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	return oldbit;
5062306a36Sopenharmony_ci}
5162306a36Sopenharmony_ci#else
5262306a36Sopenharmony_ci#define asm_test_bit test_bit
5362306a36Sopenharmony_ci#endif
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_cistatic int do_for_each_set_bit(unsigned int num_bits)
5662306a36Sopenharmony_ci{
5762306a36Sopenharmony_ci	unsigned long *to_test = bitmap_zalloc(num_bits);
5862306a36Sopenharmony_ci	struct timeval start, end, diff;
5962306a36Sopenharmony_ci	u64 runtime_us;
6062306a36Sopenharmony_ci	struct stats fb_time_stats, tb_time_stats;
6162306a36Sopenharmony_ci	double time_average, time_stddev;
6262306a36Sopenharmony_ci	unsigned int bit, i, j;
6362306a36Sopenharmony_ci	unsigned int set_bits, skip;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	init_stats(&fb_time_stats);
6662306a36Sopenharmony_ci	init_stats(&tb_time_stats);
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
6962306a36Sopenharmony_ci		bitmap_zero(to_test, num_bits);
7062306a36Sopenharmony_ci		skip = num_bits / set_bits;
7162306a36Sopenharmony_ci		for (i = 0; i < num_bits; i += skip)
7262306a36Sopenharmony_ci			__set_bit(i, to_test);
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci		for (i = 0; i < outer_iterations; i++) {
7562306a36Sopenharmony_ci#ifndef NDEBUG
7662306a36Sopenharmony_ci			unsigned int old = accumulator;
7762306a36Sopenharmony_ci#endif
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci			gettimeofday(&start, NULL);
8062306a36Sopenharmony_ci			for (j = 0; j < inner_iterations; j++) {
8162306a36Sopenharmony_ci				for_each_set_bit(bit, to_test, num_bits)
8262306a36Sopenharmony_ci					workload(bit);
8362306a36Sopenharmony_ci			}
8462306a36Sopenharmony_ci			gettimeofday(&end, NULL);
8562306a36Sopenharmony_ci			assert(old + (inner_iterations * set_bits) == accumulator);
8662306a36Sopenharmony_ci			timersub(&end, &start, &diff);
8762306a36Sopenharmony_ci			runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
8862306a36Sopenharmony_ci			update_stats(&fb_time_stats, runtime_us);
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci#ifndef NDEBUG
9162306a36Sopenharmony_ci			old = accumulator;
9262306a36Sopenharmony_ci#endif
9362306a36Sopenharmony_ci			gettimeofday(&start, NULL);
9462306a36Sopenharmony_ci			for (j = 0; j < inner_iterations; j++) {
9562306a36Sopenharmony_ci				for (bit = 0; bit < num_bits; bit++) {
9662306a36Sopenharmony_ci					if (asm_test_bit(bit, to_test))
9762306a36Sopenharmony_ci						workload(bit);
9862306a36Sopenharmony_ci				}
9962306a36Sopenharmony_ci			}
10062306a36Sopenharmony_ci			gettimeofday(&end, NULL);
10162306a36Sopenharmony_ci			assert(old + (inner_iterations * set_bits) == accumulator);
10262306a36Sopenharmony_ci			timersub(&end, &start, &diff);
10362306a36Sopenharmony_ci			runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
10462306a36Sopenharmony_ci			update_stats(&tb_time_stats, runtime_us);
10562306a36Sopenharmony_ci		}
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci		printf("%d operations %d bits set of %d bits\n",
10862306a36Sopenharmony_ci			inner_iterations, set_bits, num_bits);
10962306a36Sopenharmony_ci		time_average = avg_stats(&fb_time_stats);
11062306a36Sopenharmony_ci		time_stddev = stddev_stats(&fb_time_stats);
11162306a36Sopenharmony_ci		printf("  Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
11262306a36Sopenharmony_ci			time_average, time_stddev);
11362306a36Sopenharmony_ci		time_average = avg_stats(&tb_time_stats);
11462306a36Sopenharmony_ci		time_stddev = stddev_stats(&tb_time_stats);
11562306a36Sopenharmony_ci		printf("  Average test_bit loop took:    %.3f usec (+- %.3f usec)\n",
11662306a36Sopenharmony_ci			time_average, time_stddev);
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci		if (use_of_val == accumulator)  /* Try to avoid compiler tricks. */
11962306a36Sopenharmony_ci			printf("\n");
12062306a36Sopenharmony_ci	}
12162306a36Sopenharmony_ci	bitmap_free(to_test);
12262306a36Sopenharmony_ci	return 0;
12362306a36Sopenharmony_ci}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ciint bench_mem_find_bit(int argc, const char **argv)
12662306a36Sopenharmony_ci{
12762306a36Sopenharmony_ci	int err = 0, i;
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	argc = parse_options(argc, argv, options, bench_usage, 0);
13062306a36Sopenharmony_ci	if (argc) {
13162306a36Sopenharmony_ci		usage_with_options(bench_usage, options);
13262306a36Sopenharmony_ci		exit(EXIT_FAILURE);
13362306a36Sopenharmony_ci	}
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	for (i = 1; i <= 2048; i <<= 1)
13662306a36Sopenharmony_ci		do_for_each_set_bit(i);
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	return err;
13962306a36Sopenharmony_ci}
140