18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci#include <linux/linkage.h> 38c2ecf20Sopenharmony_ci#include <asm/export.h> 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_ci#include <asm/asm.h> 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci/* 88c2ecf20Sopenharmony_ci * unsigned int __sw_hweight32(unsigned int w) 98c2ecf20Sopenharmony_ci * %rdi: w 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ciSYM_FUNC_START(__sw_hweight32) 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 148c2ecf20Sopenharmony_ci movl %edi, %eax # w 158c2ecf20Sopenharmony_ci#endif 168c2ecf20Sopenharmony_ci __ASM_SIZE(push,) %__ASM_REG(dx) 178c2ecf20Sopenharmony_ci movl %eax, %edx # w -> t 188c2ecf20Sopenharmony_ci shrl %edx # t >>= 1 198c2ecf20Sopenharmony_ci andl $0x55555555, %edx # t &= 0x55555555 208c2ecf20Sopenharmony_ci subl %edx, %eax # w -= t 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci movl %eax, %edx # w -> t 238c2ecf20Sopenharmony_ci shrl $2, %eax # w_tmp >>= 2 248c2ecf20Sopenharmony_ci andl $0x33333333, %edx # t &= 0x33333333 258c2ecf20Sopenharmony_ci andl $0x33333333, %eax # w_tmp &= 0x33333333 268c2ecf20Sopenharmony_ci addl %edx, %eax # w = w_tmp + t 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci movl %eax, %edx # w -> t 298c2ecf20Sopenharmony_ci shrl $4, %edx # t >>= 4 308c2ecf20Sopenharmony_ci addl %edx, %eax # w_tmp += t 318c2ecf20Sopenharmony_ci andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f 328c2ecf20Sopenharmony_ci imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101 338c2ecf20Sopenharmony_ci shrl $24, %eax # w = w_tmp >> 24 348c2ecf20Sopenharmony_ci __ASM_SIZE(pop,) %__ASM_REG(dx) 358c2ecf20Sopenharmony_ci RET 368c2ecf20Sopenharmony_ciSYM_FUNC_END(__sw_hweight32) 378c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__sw_hweight32) 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ciSYM_FUNC_START(__sw_hweight64) 408c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 418c2ecf20Sopenharmony_ci pushq %rdi 428c2ecf20Sopenharmony_ci pushq %rdx 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci movq %rdi, %rdx # w -> t 458c2ecf20Sopenharmony_ci movabsq $0x5555555555555555, %rax 468c2ecf20Sopenharmony_ci shrq %rdx # t >>= 1 478c2ecf20Sopenharmony_ci andq %rdx, %rax # t &= 0x5555555555555555 488c2ecf20Sopenharmony_ci movabsq $0x3333333333333333, %rdx 498c2ecf20Sopenharmony_ci subq %rax, %rdi # w -= t 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci movq %rdi, %rax # w -> t 528c2ecf20Sopenharmony_ci shrq $2, %rdi # w_tmp >>= 2 538c2ecf20Sopenharmony_ci andq %rdx, %rax # t &= 0x3333333333333333 548c2ecf20Sopenharmony_ci andq %rdi, %rdx # w_tmp &= 0x3333333333333333 558c2ecf20Sopenharmony_ci addq %rdx, %rax # w = w_tmp + t 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci movq %rax, %rdx # w -> t 588c2ecf20Sopenharmony_ci shrq $4, %rdx # t >>= 4 598c2ecf20Sopenharmony_ci addq %rdx, %rax # w_tmp += t 608c2ecf20Sopenharmony_ci movabsq $0x0f0f0f0f0f0f0f0f, %rdx 618c2ecf20Sopenharmony_ci andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f 628c2ecf20Sopenharmony_ci movabsq $0x0101010101010101, %rdx 638c2ecf20Sopenharmony_ci imulq %rdx, %rax # w_tmp *= 0x0101010101010101 648c2ecf20Sopenharmony_ci shrq $56, %rax # w = w_tmp >> 56 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci popq %rdx 678c2ecf20Sopenharmony_ci popq %rdi 688c2ecf20Sopenharmony_ci RET 698c2ecf20Sopenharmony_ci#else /* CONFIG_X86_32 */ 708c2ecf20Sopenharmony_ci /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */ 718c2ecf20Sopenharmony_ci pushl %ecx 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci call __sw_hweight32 748c2ecf20Sopenharmony_ci movl %eax, %ecx # stash away result 758c2ecf20Sopenharmony_ci movl %edx, %eax # second part of input 768c2ecf20Sopenharmony_ci call __sw_hweight32 778c2ecf20Sopenharmony_ci addl %ecx, %eax # result 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci popl %ecx 808c2ecf20Sopenharmony_ci RET 818c2ecf20Sopenharmony_ci#endif 828c2ecf20Sopenharmony_ciSYM_FUNC_END(__sw_hweight64) 838c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__sw_hweight64) 84