1/************************************************************************** 2 * 3 * Copyright 2008 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#ifndef BITSCAN_H 30#define BITSCAN_H 31 32#include <assert.h> 33#include <stdint.h> 34#include <stdbool.h> 35#include <string.h> 36 37#if defined(_MSC_VER) 38#include <intrin.h> 39#endif 40 41#if defined(__POPCNT__) 42#include <popcntintrin.h> 43#endif 44 45#ifdef __cplusplus 46extern "C" { 47#endif 48 49 50/** 51 * Find first bit set in word. Least significant bit is 1. 52 * Return 0 if no bits set. 53 */ 54#ifdef HAVE___BUILTIN_FFS 55#define ffs __builtin_ffs 56#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64) 57static inline 58int ffs(int i) 59{ 60 unsigned long index; 61 if (_BitScanForward(&index, i)) 62 return index + 1; 63 else 64 return 0; 65} 66#else 67extern 68int ffs(int i); 69#endif 70 71#ifdef HAVE___BUILTIN_FFSLL 72#define ffsll __builtin_ffsll 73#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM64 || _M_IA64) 74static inline int 75ffsll(long long int i) 76{ 77 unsigned long index; 78 if (_BitScanForward64(&index, i)) 79 return index + 1; 80 else 81 return 0; 82} 83#else 84extern int 85ffsll(long long int val); 86#endif 87 88 89/* Destructively loop over all of the bits in a mask as in: 90 * 91 * while (mymask) { 92 * int i = u_bit_scan(&mymask); 93 * ... process element i 94 * } 95 * 96 */ 97static inline int 98u_bit_scan(unsigned *mask) 99{ 100 const int i = ffs(*mask) - 1; 101 *mask ^= (1u << i); 102 return i; 103} 104 105#define u_foreach_bit(b, dword) \ 106 for (uint32_t __dword = (dword), b; \ 107 ((b) = ffs(__dword) - 1, __dword); \ 108 __dword &= ~(1 << (b))) 109 110static inline int 111u_bit_scan64(uint64_t *mask) 112{ 113 const int i = ffsll(*mask) - 1; 114 *mask ^= (((uint64_t)1) << i); 115 return i; 116} 117 118#define u_foreach_bit64(b, dword) \ 119 for (uint64_t __dword = (dword), b; \ 120 ((b) = ffsll(__dword) - 1, __dword); \ 121 __dword &= ~(1ull << (b))) 122 123/* Determine if an unsigned value is a power of two. 124 * 125 * \note 126 * Zero is treated as a power of two. 127 */ 128static inline bool 129util_is_power_of_two_or_zero(unsigned v) 130{ 131 return (v & (v - 1)) == 0; 132} 133 134/* Determine if an uint64_t value is a power of two. 135 * 136 * \note 137 * Zero is treated as a power of two. 138 */ 139static inline bool 140util_is_power_of_two_or_zero64(uint64_t v) 141{ 142 return (v & (v - 1)) == 0; 143} 144 145/* Determine if an unsigned value is a power of two. 146 * 147 * \note 148 * Zero is \b not treated as a power of two. 149 */ 150static inline bool 151util_is_power_of_two_nonzero(unsigned v) 152{ 153 /* __POPCNT__ is different from HAVE___BUILTIN_POPCOUNT. The latter 154 * indicates the existence of the __builtin_popcount function. The former 155 * indicates that _mm_popcnt_u32 exists and is a native instruction. 156 * 157 * The other alternative is to use SSE 4.2 compile-time flags. This has 158 * two drawbacks. First, there is currently no build infrastructure for 159 * SSE 4.2 (only 4.1), so that would have to be added. Second, some AMD 160 * CPUs support POPCNT but not SSE 4.2 (e.g., Barcelona). 161 */ 162#ifdef __POPCNT__ 163 return _mm_popcnt_u32(v) == 1; 164#else 165 return v != 0 && (v & (v - 1)) == 0; 166#endif 167} 168 169/* For looping over a bitmask when you want to loop over consecutive bits 170 * manually, for example: 171 * 172 * while (mask) { 173 * int start, count, i; 174 * 175 * u_bit_scan_consecutive_range(&mask, &start, &count); 176 * 177 * for (i = 0; i < count; i++) 178 * ... process element (start+i) 179 * } 180 */ 181static inline void 182u_bit_scan_consecutive_range(unsigned *mask, int *start, int *count) 183{ 184 if (*mask == 0xffffffff) { 185 *start = 0; 186 *count = 32; 187 *mask = 0; 188 return; 189 } 190 *start = ffs(*mask) - 1; 191 *count = ffs(~(*mask >> *start)) - 1; 192 *mask &= ~(((1u << *count) - 1) << *start); 193} 194 195static inline void 196u_bit_scan_consecutive_range64(uint64_t *mask, int *start, int *count) 197{ 198 if (*mask == ~0ull) { 199 *start = 0; 200 *count = 64; 201 *mask = 0; 202 return; 203 } 204 *start = ffsll(*mask) - 1; 205 *count = ffsll(~(*mask >> *start)) - 1; 206 *mask &= ~(((((uint64_t)1) << *count) - 1) << *start); 207} 208 209 210/** 211 * Find last bit set in a word. The least significant bit is 1. 212 * Return 0 if no bits are set. 213 * Essentially ffs() in the reverse direction. 214 */ 215static inline unsigned 216util_last_bit(unsigned u) 217{ 218#if defined(HAVE___BUILTIN_CLZ) 219 return u == 0 ? 0 : 32 - __builtin_clz(u); 220#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64) 221 unsigned long index; 222 if (_BitScanReverse(&index, u)) 223 return index + 1; 224 else 225 return 0; 226#else 227 unsigned r = 0; 228 while (u) { 229 r++; 230 u >>= 1; 231 } 232 return r; 233#endif 234} 235 236/** 237 * Find last bit set in a word. The least significant bit is 1. 238 * Return 0 if no bits are set. 239 * Essentially ffsll() in the reverse direction. 240 */ 241static inline unsigned 242util_last_bit64(uint64_t u) 243{ 244#if defined(HAVE___BUILTIN_CLZLL) 245 return u == 0 ? 0 : 64 - __builtin_clzll(u); 246#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM64 || _M_IA64) 247 unsigned long index; 248 if (_BitScanReverse64(&index, u)) 249 return index + 1; 250 else 251 return 0; 252#else 253 unsigned r = 0; 254 while (u) { 255 r++; 256 u >>= 1; 257 } 258 return r; 259#endif 260} 261 262/** 263 * Find last bit in a word that does not match the sign bit. The least 264 * significant bit is 1. 265 * Return 0 if no bits are set. 266 */ 267static inline unsigned 268util_last_bit_signed(int i) 269{ 270 if (i >= 0) 271 return util_last_bit(i); 272 else 273 return util_last_bit(~(unsigned)i); 274} 275 276/* Returns a bitfield in which the first count bits starting at start are 277 * set. 278 */ 279static inline unsigned 280u_bit_consecutive(unsigned start, unsigned count) 281{ 282 assert(start + count <= 32); 283 if (count == 32) 284 return ~0; 285 return ((1u << count) - 1) << start; 286} 287 288static inline uint64_t 289u_bit_consecutive64(unsigned start, unsigned count) 290{ 291 assert(start + count <= 64); 292 if (count == 64) 293 return ~(uint64_t)0; 294 return (((uint64_t)1 << count) - 1) << start; 295} 296 297/** 298 * Return number of bits set in n. 299 */ 300static inline unsigned 301util_bitcount(unsigned n) 302{ 303#if defined(HAVE___BUILTIN_POPCOUNT) 304 return __builtin_popcount(n); 305#else 306 /* K&R classic bitcount. 307 * 308 * For each iteration, clear the LSB from the bitfield. 309 * Requires only one iteration per set bit, instead of 310 * one iteration per bit less than highest set bit. 311 */ 312 unsigned bits; 313 for (bits = 0; n; bits++) { 314 n &= n - 1; 315 } 316 return bits; 317#endif 318} 319 320/** 321 * Return the number of bits set in n using the native popcnt instruction. 322 * The caller is responsible for ensuring that popcnt is supported by the CPU. 323 * 324 * gcc doesn't use it if -mpopcnt or -march= that has popcnt is missing. 325 * 326 */ 327static inline unsigned 328util_popcnt_inline_asm(unsigned n) 329{ 330#if defined(USE_X86_64_ASM) || defined(USE_X86_ASM) 331 uint32_t out; 332 __asm volatile("popcnt %1, %0" : "=r"(out) : "r"(n)); 333 return out; 334#else 335 /* We should never get here by accident, but I'm sure it'll happen. */ 336 return util_bitcount(n); 337#endif 338} 339 340static inline unsigned 341util_bitcount64(uint64_t n) 342{ 343#ifdef HAVE___BUILTIN_POPCOUNTLL 344 return __builtin_popcountll(n); 345#else 346 return util_bitcount(n) + util_bitcount(n >> 32); 347#endif 348} 349 350/** 351 * Widens the given bit mask by a multiplier, meaning that it will 352 * replicate each bit by that amount. 353 * 354 * For example: 355 * 0b101 widened by 2 will become: 0b110011 356 * 357 * This is typically used in shader I/O to transform a 64-bit 358 * writemask to a 32-bit writemask. 359 */ 360static inline uint32_t 361util_widen_mask(uint32_t mask, unsigned multiplier) 362{ 363 uint32_t new_mask = 0; 364 u_foreach_bit(i, mask) 365 new_mask |= ((1u << multiplier) - 1u) << (i * multiplier); 366 return new_mask; 367} 368 369#ifdef __cplusplus 370} 371 372/* util_bitcount has large measurable overhead (~2%), so it's recommended to 373 * use the POPCNT instruction via inline assembly if the CPU supports it. 374 */ 375enum util_popcnt { 376 POPCNT_NO, 377 POPCNT_YES, 378}; 379 380/* Convenient function to select popcnt through a C++ template argument. 381 * This should be used as part of larger functions that are optimized 382 * as a whole. 383 */ 384template<util_popcnt POPCNT> inline unsigned 385util_bitcount_fast(unsigned n) 386{ 387 if (POPCNT == POPCNT_YES) 388 return util_popcnt_inline_asm(n); 389 else 390 return util_bitcount(n); 391} 392 393#endif /* __cplusplus */ 394 395#endif /* BITSCAN_H */ 396