xref: /kernel/linux/linux-6.6/arch/x86/lib/hweight.S (revision 62306a36)
162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci#include <linux/linkage.h>
362306a36Sopenharmony_ci#include <asm/export.h>
462306a36Sopenharmony_ci
562306a36Sopenharmony_ci#include <asm/asm.h>
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci/*
862306a36Sopenharmony_ci * unsigned int __sw_hweight32(unsigned int w)
962306a36Sopenharmony_ci * %rdi: w
1062306a36Sopenharmony_ci */
1162306a36Sopenharmony_ciSYM_FUNC_START(__sw_hweight32)
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#ifdef CONFIG_X86_64
1462306a36Sopenharmony_ci	movl %edi, %eax				# w
1562306a36Sopenharmony_ci#endif
1662306a36Sopenharmony_ci	__ASM_SIZE(push,) %__ASM_REG(dx)
1762306a36Sopenharmony_ci	movl %eax, %edx				# w -> t
1862306a36Sopenharmony_ci	shrl %edx				# t >>= 1
1962306a36Sopenharmony_ci	andl $0x55555555, %edx			# t &= 0x55555555
2062306a36Sopenharmony_ci	subl %edx, %eax				# w -= t
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci	movl %eax, %edx				# w -> t
2362306a36Sopenharmony_ci	shrl $2, %eax				# w_tmp >>= 2
2462306a36Sopenharmony_ci	andl $0x33333333, %edx			# t	&= 0x33333333
2562306a36Sopenharmony_ci	andl $0x33333333, %eax			# w_tmp &= 0x33333333
2662306a36Sopenharmony_ci	addl %edx, %eax				# w = w_tmp + t
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci	movl %eax, %edx				# w -> t
2962306a36Sopenharmony_ci	shrl $4, %edx				# t >>= 4
3062306a36Sopenharmony_ci	addl %edx, %eax				# w_tmp += t
3162306a36Sopenharmony_ci	andl  $0x0f0f0f0f, %eax			# w_tmp &= 0x0f0f0f0f
3262306a36Sopenharmony_ci	imull $0x01010101, %eax, %eax		# w_tmp *= 0x01010101
3362306a36Sopenharmony_ci	shrl $24, %eax				# w = w_tmp >> 24
3462306a36Sopenharmony_ci	__ASM_SIZE(pop,) %__ASM_REG(dx)
3562306a36Sopenharmony_ci	RET
3662306a36Sopenharmony_ciSYM_FUNC_END(__sw_hweight32)
3762306a36Sopenharmony_ciEXPORT_SYMBOL(__sw_hweight32)
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ciSYM_FUNC_START(__sw_hweight64)
4062306a36Sopenharmony_ci#ifdef CONFIG_X86_64
4162306a36Sopenharmony_ci	pushq   %rdi
4262306a36Sopenharmony_ci	pushq   %rdx
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	movq    %rdi, %rdx                      # w -> t
4562306a36Sopenharmony_ci	movabsq $0x5555555555555555, %rax
4662306a36Sopenharmony_ci	shrq    %rdx                            # t >>= 1
4762306a36Sopenharmony_ci	andq    %rdx, %rax                      # t &= 0x5555555555555555
4862306a36Sopenharmony_ci	movabsq $0x3333333333333333, %rdx
4962306a36Sopenharmony_ci	subq    %rax, %rdi                      # w -= t
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	movq    %rdi, %rax                      # w -> t
5262306a36Sopenharmony_ci	shrq    $2, %rdi                        # w_tmp >>= 2
5362306a36Sopenharmony_ci	andq    %rdx, %rax                      # t     &= 0x3333333333333333
5462306a36Sopenharmony_ci	andq    %rdi, %rdx                      # w_tmp &= 0x3333333333333333
5562306a36Sopenharmony_ci	addq    %rdx, %rax                      # w = w_tmp + t
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	movq    %rax, %rdx                      # w -> t
5862306a36Sopenharmony_ci	shrq    $4, %rdx                        # t >>= 4
5962306a36Sopenharmony_ci	addq    %rdx, %rax                      # w_tmp += t
6062306a36Sopenharmony_ci	movabsq $0x0f0f0f0f0f0f0f0f, %rdx
6162306a36Sopenharmony_ci	andq    %rdx, %rax                      # w_tmp &= 0x0f0f0f0f0f0f0f0f
6262306a36Sopenharmony_ci	movabsq $0x0101010101010101, %rdx
6362306a36Sopenharmony_ci	imulq   %rdx, %rax                      # w_tmp *= 0x0101010101010101
6462306a36Sopenharmony_ci	shrq    $56, %rax                       # w = w_tmp >> 56
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	popq    %rdx
6762306a36Sopenharmony_ci	popq    %rdi
6862306a36Sopenharmony_ci	RET
6962306a36Sopenharmony_ci#else /* CONFIG_X86_32 */
7062306a36Sopenharmony_ci	/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
7162306a36Sopenharmony_ci	pushl   %ecx
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	call    __sw_hweight32
7462306a36Sopenharmony_ci	movl    %eax, %ecx                      # stash away result
7562306a36Sopenharmony_ci	movl    %edx, %eax                      # second part of input
7662306a36Sopenharmony_ci	call    __sw_hweight32
7762306a36Sopenharmony_ci	addl    %ecx, %eax                      # result
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	popl    %ecx
8062306a36Sopenharmony_ci	RET
8162306a36Sopenharmony_ci#endif
8262306a36Sopenharmony_ciSYM_FUNC_END(__sw_hweight64)
8362306a36Sopenharmony_ciEXPORT_SYMBOL(__sw_hweight64)
84