xref: /third_party/mesa3d/src/util/bitscan.h (revision bf215546)
1/**************************************************************************
2 *
3 * Copyright 2008 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef BITSCAN_H
30#define BITSCAN_H
31
32#include <assert.h>
33#include <stdint.h>
34#include <stdbool.h>
35#include <string.h>
36
37#if defined(_MSC_VER)
38#include <intrin.h>
39#endif
40
41#if defined(__POPCNT__)
42#include <popcntintrin.h>
43#endif
44
45#ifdef __cplusplus
46extern "C" {
47#endif
48
49
50/**
51 * Find first bit set in word.  Least significant bit is 1.
52 * Return 0 if no bits set.
53 */
54#ifdef HAVE___BUILTIN_FFS
55#define ffs __builtin_ffs
56#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64)
57static inline
58int ffs(int i)
59{
60   unsigned long index;
61   if (_BitScanForward(&index, i))
62      return index + 1;
63   else
64      return 0;
65}
66#else
67extern
68int ffs(int i);
69#endif
70
71#ifdef HAVE___BUILTIN_FFSLL
72#define ffsll __builtin_ffsll
73#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM64 || _M_IA64)
74static inline int
75ffsll(long long int i)
76{
77   unsigned long index;
78   if (_BitScanForward64(&index, i))
79      return index + 1;
80   else
81      return 0;
82}
83#else
84extern int
85ffsll(long long int val);
86#endif
87
88
89/* Destructively loop over all of the bits in a mask as in:
90 *
91 * while (mymask) {
92 *   int i = u_bit_scan(&mymask);
93 *   ... process element i
94 * }
95 *
96 */
97static inline int
98u_bit_scan(unsigned *mask)
99{
100   const int i = ffs(*mask) - 1;
101   *mask ^= (1u << i);
102   return i;
103}
104
105#define u_foreach_bit(b, dword)                          \
106   for (uint32_t __dword = (dword), b;                     \
107        ((b) = ffs(__dword) - 1, __dword);      \
108        __dword &= ~(1 << (b)))
109
110static inline int
111u_bit_scan64(uint64_t *mask)
112{
113   const int i = ffsll(*mask) - 1;
114   *mask ^= (((uint64_t)1) << i);
115   return i;
116}
117
118#define u_foreach_bit64(b, dword)                          \
119   for (uint64_t __dword = (dword), b;                     \
120        ((b) = ffsll(__dword) - 1, __dword);      \
121        __dword &= ~(1ull << (b)))
122
123/* Determine if an unsigned value is a power of two.
124 *
125 * \note
126 * Zero is treated as a power of two.
127 */
128static inline bool
129util_is_power_of_two_or_zero(unsigned v)
130{
131   return (v & (v - 1)) == 0;
132}
133
134/* Determine if an uint64_t value is a power of two.
135 *
136 * \note
137 * Zero is treated as a power of two.
138 */
139static inline bool
140util_is_power_of_two_or_zero64(uint64_t v)
141{
142   return (v & (v - 1)) == 0;
143}
144
145/* Determine if an unsigned value is a power of two.
146 *
147 * \note
148 * Zero is \b not treated as a power of two.
149 */
150static inline bool
151util_is_power_of_two_nonzero(unsigned v)
152{
153   /* __POPCNT__ is different from HAVE___BUILTIN_POPCOUNT.  The latter
154    * indicates the existence of the __builtin_popcount function.  The former
155    * indicates that _mm_popcnt_u32 exists and is a native instruction.
156    *
157    * The other alternative is to use SSE 4.2 compile-time flags.  This has
158    * two drawbacks.  First, there is currently no build infrastructure for
159    * SSE 4.2 (only 4.1), so that would have to be added.  Second, some AMD
160    * CPUs support POPCNT but not SSE 4.2 (e.g., Barcelona).
161    */
162#ifdef __POPCNT__
163   return _mm_popcnt_u32(v) == 1;
164#else
165   return v != 0 && (v & (v - 1)) == 0;
166#endif
167}
168
169/* For looping over a bitmask when you want to loop over consecutive bits
170 * manually, for example:
171 *
172 * while (mask) {
173 *    int start, count, i;
174 *
175 *    u_bit_scan_consecutive_range(&mask, &start, &count);
176 *
177 *    for (i = 0; i < count; i++)
178 *       ... process element (start+i)
179 * }
180 */
181static inline void
182u_bit_scan_consecutive_range(unsigned *mask, int *start, int *count)
183{
184   if (*mask == 0xffffffff) {
185      *start = 0;
186      *count = 32;
187      *mask = 0;
188      return;
189   }
190   *start = ffs(*mask) - 1;
191   *count = ffs(~(*mask >> *start)) - 1;
192   *mask &= ~(((1u << *count) - 1) << *start);
193}
194
195static inline void
196u_bit_scan_consecutive_range64(uint64_t *mask, int *start, int *count)
197{
198   if (*mask == ~0ull) {
199      *start = 0;
200      *count = 64;
201      *mask = 0;
202      return;
203   }
204   *start = ffsll(*mask) - 1;
205   *count = ffsll(~(*mask >> *start)) - 1;
206   *mask &= ~(((((uint64_t)1) << *count) - 1) << *start);
207}
208
209
210/**
211 * Find last bit set in a word.  The least significant bit is 1.
212 * Return 0 if no bits are set.
213 * Essentially ffs() in the reverse direction.
214 */
215static inline unsigned
216util_last_bit(unsigned u)
217{
218#if defined(HAVE___BUILTIN_CLZ)
219   return u == 0 ? 0 : 32 - __builtin_clz(u);
220#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64)
221   unsigned long index;
222   if (_BitScanReverse(&index, u))
223      return index + 1;
224   else
225      return 0;
226#else
227   unsigned r = 0;
228   while (u) {
229      r++;
230      u >>= 1;
231   }
232   return r;
233#endif
234}
235
236/**
237 * Find last bit set in a word.  The least significant bit is 1.
238 * Return 0 if no bits are set.
239 * Essentially ffsll() in the reverse direction.
240 */
241static inline unsigned
242util_last_bit64(uint64_t u)
243{
244#if defined(HAVE___BUILTIN_CLZLL)
245   return u == 0 ? 0 : 64 - __builtin_clzll(u);
246#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM64 || _M_IA64)
247   unsigned long index;
248   if (_BitScanReverse64(&index, u))
249      return index + 1;
250   else
251      return 0;
252#else
253   unsigned r = 0;
254   while (u) {
255      r++;
256      u >>= 1;
257   }
258   return r;
259#endif
260}
261
262/**
263 * Find last bit in a word that does not match the sign bit. The least
264 * significant bit is 1.
265 * Return 0 if no bits are set.
266 */
267static inline unsigned
268util_last_bit_signed(int i)
269{
270   if (i >= 0)
271      return util_last_bit(i);
272   else
273      return util_last_bit(~(unsigned)i);
274}
275
276/* Returns a bitfield in which the first count bits starting at start are
277 * set.
278 */
279static inline unsigned
280u_bit_consecutive(unsigned start, unsigned count)
281{
282   assert(start + count <= 32);
283   if (count == 32)
284      return ~0;
285   return ((1u << count) - 1) << start;
286}
287
288static inline uint64_t
289u_bit_consecutive64(unsigned start, unsigned count)
290{
291   assert(start + count <= 64);
292   if (count == 64)
293      return ~(uint64_t)0;
294   return (((uint64_t)1 << count) - 1) << start;
295}
296
297/**
298 * Return number of bits set in n.
299 */
300static inline unsigned
301util_bitcount(unsigned n)
302{
303#if defined(HAVE___BUILTIN_POPCOUNT)
304   return __builtin_popcount(n);
305#else
306   /* K&R classic bitcount.
307    *
308    * For each iteration, clear the LSB from the bitfield.
309    * Requires only one iteration per set bit, instead of
310    * one iteration per bit less than highest set bit.
311    */
312   unsigned bits;
313   for (bits = 0; n; bits++) {
314      n &= n - 1;
315   }
316   return bits;
317#endif
318}
319
320/**
321 * Return the number of bits set in n using the native popcnt instruction.
322 * The caller is responsible for ensuring that popcnt is supported by the CPU.
323 *
324 * gcc doesn't use it if -mpopcnt or -march= that has popcnt is missing.
325 *
326 */
327static inline unsigned
328util_popcnt_inline_asm(unsigned n)
329{
330#if defined(USE_X86_64_ASM) || defined(USE_X86_ASM)
331   uint32_t out;
332   __asm volatile("popcnt %1, %0" : "=r"(out) : "r"(n));
333   return out;
334#else
335   /* We should never get here by accident, but I'm sure it'll happen. */
336   return util_bitcount(n);
337#endif
338}
339
340static inline unsigned
341util_bitcount64(uint64_t n)
342{
343#ifdef HAVE___BUILTIN_POPCOUNTLL
344   return __builtin_popcountll(n);
345#else
346   return util_bitcount(n) + util_bitcount(n >> 32);
347#endif
348}
349
350/**
351 * Widens the given bit mask by a multiplier, meaning that it will
352 * replicate each bit by that amount.
353 *
354 * For example:
355 * 0b101 widened by 2 will become: 0b110011
356 *
357 * This is typically used in shader I/O to transform a 64-bit
358 * writemask to a 32-bit writemask.
359 */
360static inline uint32_t
361util_widen_mask(uint32_t mask, unsigned multiplier)
362{
363   uint32_t new_mask = 0;
364   u_foreach_bit(i, mask)
365      new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
366   return new_mask;
367}
368
369#ifdef __cplusplus
370}
371
372/* util_bitcount has large measurable overhead (~2%), so it's recommended to
373 * use the POPCNT instruction via inline assembly if the CPU supports it.
374 */
375enum util_popcnt {
376   POPCNT_NO,
377   POPCNT_YES,
378};
379
380/* Convenient function to select popcnt through a C++ template argument.
381 * This should be used as part of larger functions that are optimized
382 * as a whole.
383 */
384template<util_popcnt POPCNT> inline unsigned
385util_bitcount_fast(unsigned n)
386{
387   if (POPCNT == POPCNT_YES)
388      return util_popcnt_inline_asm(n);
389   else
390      return util_bitcount(n);
391}
392
393#endif /* __cplusplus */
394
395#endif /* BITSCAN_H */
396