18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci#include <linux/linkage.h>
38c2ecf20Sopenharmony_ci#include <asm/export.h>
48c2ecf20Sopenharmony_ci
58c2ecf20Sopenharmony_ci#include <asm/asm.h>
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci/*
88c2ecf20Sopenharmony_ci * unsigned int __sw_hweight32(unsigned int w)
98c2ecf20Sopenharmony_ci * %rdi: w
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ciSYM_FUNC_START(__sw_hweight32)
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
148c2ecf20Sopenharmony_ci	movl %edi, %eax				# w
158c2ecf20Sopenharmony_ci#endif
168c2ecf20Sopenharmony_ci	__ASM_SIZE(push,) %__ASM_REG(dx)
178c2ecf20Sopenharmony_ci	movl %eax, %edx				# w -> t
188c2ecf20Sopenharmony_ci	shrl %edx				# t >>= 1
198c2ecf20Sopenharmony_ci	andl $0x55555555, %edx			# t &= 0x55555555
208c2ecf20Sopenharmony_ci	subl %edx, %eax				# w -= t
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci	movl %eax, %edx				# w -> t
238c2ecf20Sopenharmony_ci	shrl $2, %eax				# w_tmp >>= 2
248c2ecf20Sopenharmony_ci	andl $0x33333333, %edx			# t	&= 0x33333333
258c2ecf20Sopenharmony_ci	andl $0x33333333, %eax			# w_tmp &= 0x33333333
268c2ecf20Sopenharmony_ci	addl %edx, %eax				# w = w_tmp + t
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci	movl %eax, %edx				# w -> t
298c2ecf20Sopenharmony_ci	shrl $4, %edx				# t >>= 4
308c2ecf20Sopenharmony_ci	addl %edx, %eax				# w_tmp += t
318c2ecf20Sopenharmony_ci	andl  $0x0f0f0f0f, %eax			# w_tmp &= 0x0f0f0f0f
328c2ecf20Sopenharmony_ci	imull $0x01010101, %eax, %eax		# w_tmp *= 0x01010101
338c2ecf20Sopenharmony_ci	shrl $24, %eax				# w = w_tmp >> 24
348c2ecf20Sopenharmony_ci	__ASM_SIZE(pop,) %__ASM_REG(dx)
358c2ecf20Sopenharmony_ci	RET
368c2ecf20Sopenharmony_ciSYM_FUNC_END(__sw_hweight32)
378c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__sw_hweight32)
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ciSYM_FUNC_START(__sw_hweight64)
408c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
418c2ecf20Sopenharmony_ci	pushq   %rdi
428c2ecf20Sopenharmony_ci	pushq   %rdx
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci	movq    %rdi, %rdx                      # w -> t
458c2ecf20Sopenharmony_ci	movabsq $0x5555555555555555, %rax
468c2ecf20Sopenharmony_ci	shrq    %rdx                            # t >>= 1
478c2ecf20Sopenharmony_ci	andq    %rdx, %rax                      # t &= 0x5555555555555555
488c2ecf20Sopenharmony_ci	movabsq $0x3333333333333333, %rdx
498c2ecf20Sopenharmony_ci	subq    %rax, %rdi                      # w -= t
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci	movq    %rdi, %rax                      # w -> t
528c2ecf20Sopenharmony_ci	shrq    $2, %rdi                        # w_tmp >>= 2
538c2ecf20Sopenharmony_ci	andq    %rdx, %rax                      # t     &= 0x3333333333333333
548c2ecf20Sopenharmony_ci	andq    %rdi, %rdx                      # w_tmp &= 0x3333333333333333
558c2ecf20Sopenharmony_ci	addq    %rdx, %rax                      # w = w_tmp + t
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci	movq    %rax, %rdx                      # w -> t
588c2ecf20Sopenharmony_ci	shrq    $4, %rdx                        # t >>= 4
598c2ecf20Sopenharmony_ci	addq    %rdx, %rax                      # w_tmp += t
608c2ecf20Sopenharmony_ci	movabsq $0x0f0f0f0f0f0f0f0f, %rdx
618c2ecf20Sopenharmony_ci	andq    %rdx, %rax                      # w_tmp &= 0x0f0f0f0f0f0f0f0f
628c2ecf20Sopenharmony_ci	movabsq $0x0101010101010101, %rdx
638c2ecf20Sopenharmony_ci	imulq   %rdx, %rax                      # w_tmp *= 0x0101010101010101
648c2ecf20Sopenharmony_ci	shrq    $56, %rax                       # w = w_tmp >> 56
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	popq    %rdx
678c2ecf20Sopenharmony_ci	popq    %rdi
688c2ecf20Sopenharmony_ci	RET
698c2ecf20Sopenharmony_ci#else /* CONFIG_X86_32 */
708c2ecf20Sopenharmony_ci	/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
718c2ecf20Sopenharmony_ci	pushl   %ecx
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	call    __sw_hweight32
748c2ecf20Sopenharmony_ci	movl    %eax, %ecx                      # stash away result
758c2ecf20Sopenharmony_ci	movl    %edx, %eax                      # second part of input
768c2ecf20Sopenharmony_ci	call    __sw_hweight32
778c2ecf20Sopenharmony_ci	addl    %ecx, %eax                      # result
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	popl    %ecx
808c2ecf20Sopenharmony_ci	RET
818c2ecf20Sopenharmony_ci#endif
828c2ecf20Sopenharmony_ciSYM_FUNC_END(__sw_hweight64)
838c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__sw_hweight64)
84