1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Mesa 3-D graphics library 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 8bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 9bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 11bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 12bf215546Sopenharmony_ci * 13bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included 14bf215546Sopenharmony_ci * in all copies or substantial portions of the Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 23bf215546Sopenharmony_ci * 24bf215546Sopenharmony_ci * Authors: 25bf215546Sopenharmony_ci * Gareth Hughes 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#ifndef __M_DEBUG_UTIL_H__ 29bf215546Sopenharmony_ci#define __M_DEBUG_UTIL_H__ 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci#ifdef DEBUG_MATH /* This code only used for debugging */ 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci#include <math.h> 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci/* Comment this out to deactivate the cycle counter. 39bf215546Sopenharmony_ci * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher) 40bf215546Sopenharmony_ci * (hope, you don't try to debug Mesa on a 386 ;) 41bf215546Sopenharmony_ci */ 42bf215546Sopenharmony_ci#if defined(__GNUC__) && \ 43bf215546Sopenharmony_ci ((defined(__i386__) && defined(USE_X86_ASM)) || \ 44bf215546Sopenharmony_ci (defined(__sparc__) && defined(USE_SPARC_ASM))) 45bf215546Sopenharmony_ci#define RUN_DEBUG_BENCHMARK 46bf215546Sopenharmony_ci#endif 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci#define TEST_COUNT 128 /* size of the tested vector array */ 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci#define REQUIRED_PRECISION 10 /* allow 4 bits to miss */ 51bf215546Sopenharmony_ci#define MAX_PRECISION 24 /* max. precision possible */ 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci#ifdef RUN_DEBUG_BENCHMARK 55bf215546Sopenharmony_ci/* Overhead of profiling counter in cycles. Automatically adjusted to 56bf215546Sopenharmony_ci * your machine at run time - counter initialization should give very 57bf215546Sopenharmony_ci * consistent results. 58bf215546Sopenharmony_ci */ 59bf215546Sopenharmony_ciextern long counter_overhead; 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci/* This is the value of the environment variable MESA_PROFILE, and is 62bf215546Sopenharmony_ci * used to determine if we should benchmark the functions as well as 63bf215546Sopenharmony_ci * verify their correctness. 64bf215546Sopenharmony_ci */ 65bf215546Sopenharmony_ciextern char *mesa_profile; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci/* Modify the number of tests if you like. 68bf215546Sopenharmony_ci * We take the minimum of all results, because every error should be 69bf215546Sopenharmony_ci * positive (time used by other processes, task switches etc). 70bf215546Sopenharmony_ci * It is assumed that all calculations are done in the cache. 71bf215546Sopenharmony_ci */ 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci#if defined(__i386__) 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci#if 1 /* PPro, PII, PIII version */ 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci/* Profiling on the P6 architecture requires a little more work, due to 78bf215546Sopenharmony_ci * the internal out-of-order execution. We must perform a serializing 79bf215546Sopenharmony_ci * 'cpuid' instruction before and after the 'rdtsc' instructions to make 80bf215546Sopenharmony_ci * sure no other uops are executed when we sample the timestamp counter. 81bf215546Sopenharmony_ci */ 82bf215546Sopenharmony_ci#define INIT_COUNTER() \ 83bf215546Sopenharmony_ci do { \ 84bf215546Sopenharmony_ci int cycle_i; \ 85bf215546Sopenharmony_ci counter_overhead = LONG_MAX; \ 86bf215546Sopenharmony_ci for ( cycle_i = 0 ; cycle_i < 8 ; cycle_i++ ) { \ 87bf215546Sopenharmony_ci long cycle_tmp1 = 0, cycle_tmp2 = 0; \ 88bf215546Sopenharmony_ci __asm__ __volatile__ ( "push %%ebx \n" \ 89bf215546Sopenharmony_ci "xor %%eax, %%eax \n" \ 90bf215546Sopenharmony_ci "cpuid \n" \ 91bf215546Sopenharmony_ci "rdtsc \n" \ 92bf215546Sopenharmony_ci "mov %%eax, %0 \n" \ 93bf215546Sopenharmony_ci "xor %%eax, %%eax \n" \ 94bf215546Sopenharmony_ci "cpuid \n" \ 95bf215546Sopenharmony_ci "pop %%ebx \n" \ 96bf215546Sopenharmony_ci "push %%ebx \n" \ 97bf215546Sopenharmony_ci "xor %%eax, %%eax \n" \ 98bf215546Sopenharmony_ci "cpuid \n" \ 99bf215546Sopenharmony_ci "rdtsc \n" \ 100bf215546Sopenharmony_ci "mov %%eax, %1 \n" \ 101bf215546Sopenharmony_ci "xor %%eax, %%eax \n" \ 102bf215546Sopenharmony_ci "cpuid \n" \ 103bf215546Sopenharmony_ci "pop %%ebx \n" \ 104bf215546Sopenharmony_ci : "=m" (cycle_tmp1), "=m" (cycle_tmp2) \ 105bf215546Sopenharmony_ci : : "eax", "ecx", "edx" ); \ 106bf215546Sopenharmony_ci if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \ 107bf215546Sopenharmony_ci counter_overhead = cycle_tmp2 - cycle_tmp1; \ 108bf215546Sopenharmony_ci } \ 109bf215546Sopenharmony_ci } \ 110bf215546Sopenharmony_ci } while (0) 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci#define BEGIN_RACE(x) \ 113bf215546Sopenharmony_ci x = LONG_MAX; \ 114bf215546Sopenharmony_ci for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \ 115bf215546Sopenharmony_ci long cycle_tmp1 = 0, cycle_tmp2 = 0; \ 116bf215546Sopenharmony_ci __asm__ __volatile__ ( "push %%ebx \n" \ 117bf215546Sopenharmony_ci "xor %%eax, %%eax \n" \ 118bf215546Sopenharmony_ci "cpuid \n" \ 119bf215546Sopenharmony_ci "rdtsc \n" \ 120bf215546Sopenharmony_ci "mov %%eax, %0 \n" \ 121bf215546Sopenharmony_ci "xor %%eax, %%eax \n" \ 122bf215546Sopenharmony_ci "cpuid \n" \ 123bf215546Sopenharmony_ci "pop %%ebx \n" \ 124bf215546Sopenharmony_ci : "=m" (cycle_tmp1) \ 125bf215546Sopenharmony_ci : : "eax", "ecx", "edx" ); 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci#define END_RACE(x) \ 128bf215546Sopenharmony_ci __asm__ __volatile__ ( "push %%ebx \n" \ 129bf215546Sopenharmony_ci "xor %%eax, %%eax \n" \ 130bf215546Sopenharmony_ci "cpuid \n" \ 131bf215546Sopenharmony_ci "rdtsc \n" \ 132bf215546Sopenharmony_ci "mov %%eax, %0 \n" \ 133bf215546Sopenharmony_ci "xor %%eax, %%eax \n" \ 134bf215546Sopenharmony_ci "cpuid \n" \ 135bf215546Sopenharmony_ci "pop %%ebx \n" \ 136bf215546Sopenharmony_ci : "=m" (cycle_tmp2) \ 137bf215546Sopenharmony_ci : : "eax", "ecx", "edx" ); \ 138bf215546Sopenharmony_ci if ( x > (cycle_tmp2 - cycle_tmp1) ) { \ 139bf215546Sopenharmony_ci x = cycle_tmp2 - cycle_tmp1; \ 140bf215546Sopenharmony_ci } \ 141bf215546Sopenharmony_ci } \ 142bf215546Sopenharmony_ci x -= counter_overhead; 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci#else /* PPlain, PMMX version */ 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci/* To ensure accurate results, we stall the pipelines with the 147bf215546Sopenharmony_ci * non-pairable 'cdq' instruction. This ensures all the code being 148bf215546Sopenharmony_ci * profiled is complete when the 'rdtsc' instruction executes. 149bf215546Sopenharmony_ci */ 150bf215546Sopenharmony_ci#define INIT_COUNTER(x) \ 151bf215546Sopenharmony_ci do { \ 152bf215546Sopenharmony_ci int cycle_i; \ 153bf215546Sopenharmony_ci x = LONG_MAX; \ 154bf215546Sopenharmony_ci for ( cycle_i = 0 ; cycle_i < 32 ; cycle_i++ ) { \ 155bf215546Sopenharmony_ci long cycle_tmp1, cycle_tmp2, dummy; \ 156bf215546Sopenharmony_ci __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \ 157bf215546Sopenharmony_ci __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \ 158bf215546Sopenharmony_ci __asm__ ( "cdq" ); \ 159bf215546Sopenharmony_ci __asm__ ( "cdq" ); \ 160bf215546Sopenharmony_ci __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); \ 161bf215546Sopenharmony_ci __asm__ ( "cdq" ); \ 162bf215546Sopenharmony_ci __asm__ ( "cdq" ); \ 163bf215546Sopenharmony_ci __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \ 164bf215546Sopenharmony_ci if ( x > (cycle_tmp2 - cycle_tmp1) ) \ 165bf215546Sopenharmony_ci x = cycle_tmp2 - cycle_tmp1; \ 166bf215546Sopenharmony_ci } \ 167bf215546Sopenharmony_ci } while (0) 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci#define BEGIN_RACE(x) \ 170bf215546Sopenharmony_ci x = LONG_MAX; \ 171bf215546Sopenharmony_ci for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \ 172bf215546Sopenharmony_ci long cycle_tmp1, cycle_tmp2, dummy; \ 173bf215546Sopenharmony_ci __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \ 174bf215546Sopenharmony_ci __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \ 175bf215546Sopenharmony_ci __asm__ ( "cdq" ); \ 176bf215546Sopenharmony_ci __asm__ ( "cdq" ); \ 177bf215546Sopenharmony_ci __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci#define END_RACE(x) \ 181bf215546Sopenharmony_ci __asm__ ( "cdq" ); \ 182bf215546Sopenharmony_ci __asm__ ( "cdq" ); \ 183bf215546Sopenharmony_ci __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \ 184bf215546Sopenharmony_ci if ( x > (cycle_tmp2 - cycle_tmp1) ) \ 185bf215546Sopenharmony_ci x = cycle_tmp2 - cycle_tmp1; \ 186bf215546Sopenharmony_ci } \ 187bf215546Sopenharmony_ci x -= counter_overhead; 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci#endif 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci#elif defined(__x86_64__) 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci#define rdtscll(val) do { \ 194bf215546Sopenharmony_ci unsigned int a,d; \ 195bf215546Sopenharmony_ci __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); \ 196bf215546Sopenharmony_ci (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \ 197bf215546Sopenharmony_ci} while(0) 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci/* Copied from i386 PIII version */ 200bf215546Sopenharmony_ci#define INIT_COUNTER() \ 201bf215546Sopenharmony_ci do { \ 202bf215546Sopenharmony_ci int cycle_i; \ 203bf215546Sopenharmony_ci counter_overhead = LONG_MAX; \ 204bf215546Sopenharmony_ci for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \ 205bf215546Sopenharmony_ci unsigned long cycle_tmp1, cycle_tmp2; \ 206bf215546Sopenharmony_ci rdtscll(cycle_tmp1); \ 207bf215546Sopenharmony_ci rdtscll(cycle_tmp2); \ 208bf215546Sopenharmony_ci if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \ 209bf215546Sopenharmony_ci counter_overhead = cycle_tmp2 - cycle_tmp1; \ 210bf215546Sopenharmony_ci } \ 211bf215546Sopenharmony_ci } \ 212bf215546Sopenharmony_ci } while (0) 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci#define BEGIN_RACE(x) \ 216bf215546Sopenharmony_ci x = LONG_MAX; \ 217bf215546Sopenharmony_ci for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \ 218bf215546Sopenharmony_ci unsigned long cycle_tmp1, cycle_tmp2; \ 219bf215546Sopenharmony_ci rdtscll(cycle_tmp1); 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci#define END_RACE(x) \ 222bf215546Sopenharmony_ci rdtscll(cycle_tmp2); \ 223bf215546Sopenharmony_ci if ( x > (cycle_tmp2 - cycle_tmp1) ) { \ 224bf215546Sopenharmony_ci x = cycle_tmp2 - cycle_tmp1; \ 225bf215546Sopenharmony_ci } \ 226bf215546Sopenharmony_ci } \ 227bf215546Sopenharmony_ci x -= counter_overhead; 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci#elif defined(__sparc__) 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci#define INIT_COUNTER() \ 232bf215546Sopenharmony_ci do { counter_overhead = 5; } while(0) 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci#define BEGIN_RACE(x) \ 235bf215546Sopenharmony_cix = LONG_MAX; \ 236bf215546Sopenharmony_cifor (cycle_i = 0; cycle_i <10; cycle_i++) { \ 237bf215546Sopenharmony_ci register long cycle_tmp1 __asm__("l0"); \ 238bf215546Sopenharmony_ci register long cycle_tmp2 __asm__("l1"); \ 239bf215546Sopenharmony_ci /* rd %tick, %l0 */ \ 240bf215546Sopenharmony_ci __asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1)); /* save timestamp */ 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci#define END_RACE(x) \ 243bf215546Sopenharmony_ci /* rd %tick, %l1 */ \ 244bf215546Sopenharmony_ci __asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2)); \ 245bf215546Sopenharmony_ci if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1; \ 246bf215546Sopenharmony_ci} \ 247bf215546Sopenharmony_cix -= counter_overhead; 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci#else 250bf215546Sopenharmony_ci#error Your processor is not supported for RUN_XFORM_BENCHMARK 251bf215546Sopenharmony_ci#endif 252bf215546Sopenharmony_ci 253bf215546Sopenharmony_ci#else 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci#define BEGIN_RACE(x) 256bf215546Sopenharmony_ci#define END_RACE(x) 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci#endif 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_ci/* ============================================================= 262bf215546Sopenharmony_ci * Helper functions 263bf215546Sopenharmony_ci */ 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_cistatic GLfloat rnd( void ) 266bf215546Sopenharmony_ci{ 267bf215546Sopenharmony_ci GLfloat f = (GLfloat)rand() / (GLfloat)RAND_MAX; 268bf215546Sopenharmony_ci GLfloat gran = (GLfloat)(1 << 13); 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci f = (GLfloat)(GLint)(f * gran) / gran; 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci return f * 2.0 - 1.0; 273bf215546Sopenharmony_ci} 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_cistatic int significand_match( GLfloat a, GLfloat b ) 276bf215546Sopenharmony_ci{ 277bf215546Sopenharmony_ci GLfloat d = a - b; 278bf215546Sopenharmony_ci int a_ex, b_ex, d_ex; 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci if ( d == 0.0F ) { 281bf215546Sopenharmony_ci return MAX_PRECISION; /* Exact match */ 282bf215546Sopenharmony_ci } 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci if ( a == 0.0F || b == 0.0F ) { 285bf215546Sopenharmony_ci /* It would probably be better to check if the 286bf215546Sopenharmony_ci * non-zero number is denormalized and return 287bf215546Sopenharmony_ci * the index of the highest set bit here. 288bf215546Sopenharmony_ci */ 289bf215546Sopenharmony_ci return 0; 290bf215546Sopenharmony_ci } 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci frexpf( a, &a_ex ); 293bf215546Sopenharmony_ci frexpf( b, &b_ex ); 294bf215546Sopenharmony_ci frexpf( d, &d_ex ); 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci if ( a_ex < b_ex ) { 297bf215546Sopenharmony_ci return a_ex - d_ex; 298bf215546Sopenharmony_ci } else { 299bf215546Sopenharmony_ci return b_ex - d_ex; 300bf215546Sopenharmony_ci } 301bf215546Sopenharmony_ci} 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_cienum { NIL = 0, ONE = 1, NEG = -1, VAR = 2 }; 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci#endif /* DEBUG_MATH */ 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci#endif /* __M_DEBUG_UTIL_H__ */ 308