xref: /third_party/alsa-lib/test/mixtest.c (revision d5ac70f0)
1#include <stdlib.h>
2#include <stdio.h>
3#include <string.h>
4#include <unistd.h>
5#include <sched.h>
6#include <time.h>
7
8#define rdtscll(val) \
9     __asm__ __volatile__("rdtsc" : "=A" (val))
10
11#define likely(x)       __builtin_expect((x),1)
12#define unlikely(x)     __builtin_expect((x),0)
13
14typedef short int s16;
15typedef int s32;
16
17#if 0
18#define CONFIG_SMP
19#endif
20
21#ifdef CONFIG_SMP
22#define LOCK_PREFIX "lock ; "
23#else
24#define LOCK_PREFIX ""
25#endif
26
27struct __xchg_dummy { unsigned long a[100]; };
28#define __xg(x) ((struct __xchg_dummy *)(x))
29
30static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
31				      unsigned long new, int size)
32{
33	unsigned long prev;
34	switch (size) {
35	case 1:
36		__asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
37				     : "=a"(prev)
38				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
39				     : "memory");
40		return prev;
41	case 2:
42		__asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
43				     : "=a"(prev)
44				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
45				     : "memory");
46		return prev;
47	case 4:
48		__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
49				     : "=a"(prev)
50				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
51				     : "memory");
52		return prev;
53	}
54	return old;
55}
56
57#define cmpxchg(ptr,o,n)\
58	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
59				       (unsigned long)(n),sizeof(*(ptr))))
60
61static inline void atomic_add(volatile int *dst, int v)
62{
63	__asm__ __volatile__(
64		LOCK_PREFIX "addl %1,%0"
65		:"=m" (*dst)
66		:"ir" (v), "m" (*dst));
67}
68
69static double detect_cpu_clock()
70{
71	struct timespec tm_begin, tm_end;
72	unsigned long long tsc_begin, tsc_end;
73
74	/* Warm cache */
75	clock_gettime(CLOCK_MONOTONIC, &tm_begin);
76
77	rdtscll(tsc_begin);
78	clock_gettime(CLOCK_MONOTONIC, &tm_begin);
79
80	usleep(1000000);
81
82	rdtscll(tsc_end);
83	clock_gettime(CLOCK_MONOTONIC, &tm_end);
84
85	return (tsc_end - tsc_begin) / (tm_end.tv_sec - tm_begin.tv_sec + (tm_end.tv_nsec - tm_begin.tv_nsec) / 1e9);
86}
87
88void mix_areas_srv(unsigned int size,
89		   const s16 *src,
90		   volatile s32 *sum,
91		   unsigned int src_step, unsigned int sum_step)
92{
93	src_step /= sizeof(*src);
94	sum_step /= sizeof(*sum);
95        while (size-- > 0) {
96                atomic_add(sum, *src);
97                src += src_step;
98                sum += sum_step;
99        }
100}
101
102void saturate(unsigned int size,
103              s16 *dst, const s32 *sum,
104              unsigned int dst_step, unsigned int sum_step)
105{
106	dst_step /= sizeof(*dst);
107	sum_step /= sizeof(*sum);
108        while (size-- > 0) {
109                s32 sample = *sum;
110                if (unlikely(sample < -0x8000))
111                        *dst = -0x8000;
112                else if (unlikely(sample > 0x7fff))
113                        *dst = 0x7fff;
114                else
115                        *dst = sample;
116                dst += dst_step;
117                sum += sum_step;
118        }
119}
120
121void mix_areas0(unsigned int size,
122		volatile s16 *dst, s16 *src,
123		volatile s32 *sum,
124		unsigned int dst_step,
125		unsigned int src_step,
126		unsigned int sum_step)
127{
128	dst_step /= sizeof(*dst);
129	src_step /= sizeof(*src);
130	sum_step /= sizeof(*sum);
131	while (size-- > 0) {
132		s32 sample = *dst + *src;
133		if (unlikely(sample < -0x8000))
134			*dst = -0x8000;
135		else if (unlikely(sample > 0x7fff))
136			*dst = 0x7fff;
137		else
138			*dst = sample;
139		dst += dst_step;
140		src += src_step;
141		sum += sum_step;
142	}
143}
144
145#define MIX_AREAS_16 mix_areas1
146#define MIX_AREAS_16_MMX mix_areas1_mmx
147#define MIX_AREAS_32 mix_areas1_32
148#define MIX_AREAS_24 mix_areas1_24
149#define MIX_AREAS_24_CMOV mix_areas1_24_cmov
150#define XADD "addl"
151#define XSUB "subl"
152#include "../src/pcm/pcm_dmix_i386.h"
153static void *ptr_mix_areas1_32 __attribute__((unused)) = &mix_areas1_32;
154static void *ptr_mix_areas1_24 __attribute__((unused)) = &mix_areas1_24;
155static void *ptr_mix_areas1_24_cmov __attribute__((unused)) = &mix_areas1_24_cmov;
156
157void mix_areas2(unsigned int size,
158		volatile s16 *dst, const s16 *src,
159		volatile s32 *sum,
160		unsigned int dst_step,
161		unsigned int src_step)
162{
163	dst_step /= sizeof(*dst);
164	src_step /= sizeof(*src);
165	while (size-- > 0) {
166		s32 sample = *src;
167		s32 old_sample = *sum;
168		if (cmpxchg(dst, 0, 1) == 0)
169			sample -= old_sample;
170		atomic_add(sum, sample);
171		do {
172			sample = *sum;
173			if (unlikely(sample < -0x8000))
174				*dst = -0x8000;
175			else if (unlikely(sample > 0x7fff))
176				*dst = 0x7fff;
177			else
178				*dst = sample;
179		} while (unlikely(sample != *sum));
180		sum++;
181		dst += dst_step;
182		src += src_step;
183	}
184}
185
186void setscheduler(void)
187{
188	struct sched_param sched_param;
189
190	if (sched_getparam(0, &sched_param) < 0) {
191		printf("Scheduler getparam failed...\n");
192		return;
193	}
194	sched_param.sched_priority = sched_get_priority_max(SCHED_RR);
195	if (!sched_setscheduler(0, SCHED_RR, &sched_param)) {
196		printf("Scheduler set to Round Robin with priority %i...\n", sched_param.sched_priority);
197		fflush(stdout);
198		return;
199	}
200	printf("!!!Scheduler set to Round Robin with priority %i FAILED!!!\n", sched_param.sched_priority);
201}
202
203int cache_size = 1024*1024;
204
205void init(s16 *dst, s32 *sum, int size)
206{
207	int count;
208	char *a;
209
210	for (count = size - 1; count >= 0; count--)
211		*sum++ = 0;
212	for (count = size - 1; count >= 0; count--)
213		*dst++ = 0;
214	a = malloc(cache_size);
215	for (count = cache_size - 1; count >= 0; count--) {
216		a[count] = count & 0xff;
217		a[count] ^= 0x55;
218		a[count] ^= 0xaa;
219	}
220	free(a);
221}
222
223int main(int argc, char **argv)
224{
225	int size = 2048, n = 4, max = 32267;
226	int LOOP = 100;
227	int i, t;
228	unsigned long long begin, end, diff, diffS, diff0, diff1, diff1_mmx, diff2;
229        double cpu_clock = detect_cpu_clock();
230	s16 *dst = malloc(sizeof(*dst) * size);
231	s32 *sum = calloc(size, sizeof(*sum));
232	s16 **srcs = malloc(sizeof(*srcs) * n);
233
234	setscheduler();
235#ifndef CONFIG_SMP
236        printf("CPU clock: %fMhz (UP)\n\n", cpu_clock / 10e5);
237#else
238        printf("CPU clock: %fMhz (SMP)\n\n", cpu_clock / 10e5);
239#endif
240	if (argc > 3) {
241		size = atoi(argv[1]);
242		n = atoi(argv[2]);
243		max = atoi(argv[3]);
244	}
245	if (argc > 4)
246		cache_size = atoi(argv[4]) * 1024;
247	for (i = 0; i < n; i++) {
248		int k;
249		s16 *s;
250		srcs[i] = s = malloc(sizeof(s16) * size);
251		for (k = 0; k < size; ++k, ++s) {
252			*s = (rand() % (max * 2)) - max;
253		}
254	}
255
256	for (t = 0, diffS = -1; t < LOOP; t++) {
257		init(dst, sum, size);
258		rdtscll(begin);
259		for (i = 0; i < n; i++) {
260			mix_areas_srv(size, srcs[i], sum, 2, 4);
261		}
262		saturate(size, dst, sum, 2, 4);
263		rdtscll(end);
264		diff = end - begin;
265		if (diff < diffS)
266			diffS = diff;
267		printf("mix_areas_srv : %llu               \r", diff); fflush(stdout);
268	}
269
270	for (t = 0, diff0 = -1; t < LOOP; t++) {
271		init(dst, sum, size);
272		rdtscll(begin);
273		for (i = 0; i < n; i++) {
274			mix_areas0(size, dst, srcs[i], sum, 2, 2, 4);
275		}
276		rdtscll(end);
277		diff = end - begin;
278		if (diff < diff0)
279			diff0 = diff;
280		printf("mix_areas0    : %llu               \r", diff); fflush(stdout);
281	}
282
283	for (t = 0, diff1 = -1; t < LOOP; t++) {
284		init(dst, sum, size);
285		rdtscll(begin);
286		for (i = 0; i < n; i++) {
287			mix_areas1(size, dst, srcs[i], sum, 2, 2, 4);
288		}
289		rdtscll(end);
290		diff = end - begin;
291		if (diff < diff1)
292			diff1 = diff;
293		printf("mix_areas1    : %llu              \r", diff); fflush(stdout);
294	}
295
296	for (t = 0, diff1_mmx = -1; t < LOOP; t++) {
297		init(dst, sum, size);
298		rdtscll(begin);
299		for (i = 0; i < n; i++) {
300			mix_areas1_mmx(size, dst, srcs[i], sum, 2, 2, 4);
301		}
302		rdtscll(end);
303		diff = end - begin;
304		if (diff < diff1_mmx)
305			diff1_mmx = diff;
306		printf("mix_areas1_mmx: %llu              \r", diff); fflush(stdout);
307	}
308
309	for (t = 0, diff2 = -1; t < LOOP; t++) {
310		init(dst, sum, size);
311		rdtscll(begin);
312		for (i = 0; i < n; i++) {
313			mix_areas2(size, dst, srcs[i], sum, 2, 2);
314		}
315		rdtscll(end);
316		diff = end - begin;
317		if (diff < diff2)
318			diff2 = diff;
319		printf("mix_areas2    : %llu              \r", diff); fflush(stdout);
320	}
321
322	printf("                                                                           \r");
323	printf("Summary (the best times):\n");
324	printf("mix_areas_srv  : %8llu %f%%\n", diffS, 100*2*44100.0*diffS/(size*n*cpu_clock));
325	printf("mix_areas0     : %8llu %f%%\n", diff0, 100*2*44100.0*diff0/(size*n*cpu_clock));
326	printf("mix_areas1     : %8llu %f%%\n", diff1, 100*2*44100.0*diff1/(size*n*cpu_clock));
327	printf("mix_areas1_mmx : %8llu %f%%\n", diff1_mmx, 100*2*44100.0*diff1_mmx/(size*n*cpu_clock));
328	printf("mix_areas2     : %8llu %f%%\n", diff2, 100*2*44100.0*diff2/(size*n*cpu_clock));
329
330	printf("\n");
331	printf("areas1/srv ratio     : %f\n", (double)diff1 / diffS);
332	printf("areas1_mmx/srv ratio : %f\n", (double)diff1_mmx / diffS);
333
334	return 0;
335}
336