18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci#ifndef __ASM_ARM_DIV64 38c2ecf20Sopenharmony_ci#define __ASM_ARM_DIV64 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_ci#include <linux/types.h> 68c2ecf20Sopenharmony_ci#include <asm/compiler.h> 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci/* 98c2ecf20Sopenharmony_ci * The semantics of __div64_32() are: 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * uint32_t __div64_32(uint64_t *n, uint32_t base) 128c2ecf20Sopenharmony_ci * { 138c2ecf20Sopenharmony_ci * uint32_t remainder = *n % base; 148c2ecf20Sopenharmony_ci * *n = *n / base; 158c2ecf20Sopenharmony_ci * return remainder; 168c2ecf20Sopenharmony_ci * } 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * In other words, a 64-bit dividend with a 32-bit divisor producing 198c2ecf20Sopenharmony_ci * a 64-bit result and a 32-bit remainder. To accomplish this optimally 208c2ecf20Sopenharmony_ci * we override the generic version in lib/div64.c to call our __do_div64 218c2ecf20Sopenharmony_ci * assembly implementation with completely non standard calling convention 228c2ecf20Sopenharmony_ci * for arguments and results (beware). 238c2ecf20Sopenharmony_ci */ 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#ifdef __ARMEB__ 268c2ecf20Sopenharmony_ci#define __xh "r0" 278c2ecf20Sopenharmony_ci#define __xl "r1" 288c2ecf20Sopenharmony_ci#else 298c2ecf20Sopenharmony_ci#define __xl "r0" 308c2ecf20Sopenharmony_ci#define __xh "r1" 318c2ecf20Sopenharmony_ci#endif 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_cistatic inline uint32_t __div64_32(uint64_t *n, uint32_t base) 348c2ecf20Sopenharmony_ci{ 358c2ecf20Sopenharmony_ci register unsigned int __base asm("r4") = base; 368c2ecf20Sopenharmony_ci register unsigned long long __n asm("r0") = *n; 378c2ecf20Sopenharmony_ci register unsigned long long __res asm("r2"); 388c2ecf20Sopenharmony_ci register unsigned int __rem asm(__xh); 398c2ecf20Sopenharmony_ci asm( __asmeq("%0", __xh) 408c2ecf20Sopenharmony_ci __asmeq("%1", "r2") 418c2ecf20Sopenharmony_ci __asmeq("%2", "r0") 428c2ecf20Sopenharmony_ci __asmeq("%3", "r4") 438c2ecf20Sopenharmony_ci "bl __do_div64" 448c2ecf20Sopenharmony_ci : "=r" (__rem), "=r" (__res) 458c2ecf20Sopenharmony_ci : "r" (__n), "r" (__base) 468c2ecf20Sopenharmony_ci : "ip", "lr", "cc"); 478c2ecf20Sopenharmony_ci *n = __res; 488c2ecf20Sopenharmony_ci return __rem; 498c2ecf20Sopenharmony_ci} 508c2ecf20Sopenharmony_ci#define __div64_32 __div64_32 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci#if !defined(CONFIG_AEABI) 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci/* 558c2ecf20Sopenharmony_ci * In OABI configurations, some uses of the do_div function 568c2ecf20Sopenharmony_ci * cause gcc to run out of registers. To work around that, 578c2ecf20Sopenharmony_ci * we can force the use of the out-of-line version for 588c2ecf20Sopenharmony_ci * configurations that build a OABI kernel. 598c2ecf20Sopenharmony_ci */ 608c2ecf20Sopenharmony_ci#define do_div(n, base) __div64_32(&(n), base) 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci#else 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci/* 658c2ecf20Sopenharmony_ci * gcc versions earlier than 4.0 are simply too problematic for the 668c2ecf20Sopenharmony_ci * __div64_const32() code in asm-generic/div64.h. First there is 678c2ecf20Sopenharmony_ci * gcc PR 15089 that tend to trig on more complex constructs, spurious 688c2ecf20Sopenharmony_ci * .global __udivsi3 are inserted even if none of those symbols are 698c2ecf20Sopenharmony_ci * referenced in the generated code, and those gcc versions are not able 708c2ecf20Sopenharmony_ci * to do constant propagation on long long values anyway. 718c2ecf20Sopenharmony_ci */ 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci#define __div64_const32_is_OK (__GNUC__ >= 4) 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_cistatic inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias) 768c2ecf20Sopenharmony_ci{ 778c2ecf20Sopenharmony_ci unsigned long long res; 788c2ecf20Sopenharmony_ci register unsigned int tmp asm("ip") = 0; 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci if (!bias) { 818c2ecf20Sopenharmony_ci asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" 828c2ecf20Sopenharmony_ci "mov %Q0, #0" 838c2ecf20Sopenharmony_ci : "=&r" (res) 848c2ecf20Sopenharmony_ci : "r" (m), "r" (n) 858c2ecf20Sopenharmony_ci : "cc"); 868c2ecf20Sopenharmony_ci } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) { 878c2ecf20Sopenharmony_ci res = m; 888c2ecf20Sopenharmony_ci asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t" 898c2ecf20Sopenharmony_ci "mov %Q0, #0" 908c2ecf20Sopenharmony_ci : "+&r" (res) 918c2ecf20Sopenharmony_ci : "r" (m), "r" (n) 928c2ecf20Sopenharmony_ci : "cc"); 938c2ecf20Sopenharmony_ci } else { 948c2ecf20Sopenharmony_ci asm ( "umull %Q0, %R0, %Q2, %Q3\n\t" 958c2ecf20Sopenharmony_ci "cmn %Q0, %Q2\n\t" 968c2ecf20Sopenharmony_ci "adcs %R0, %R0, %R2\n\t" 978c2ecf20Sopenharmony_ci "adc %Q0, %1, #0" 988c2ecf20Sopenharmony_ci : "=&r" (res), "+&r" (tmp) 998c2ecf20Sopenharmony_ci : "r" (m), "r" (n) 1008c2ecf20Sopenharmony_ci : "cc"); 1018c2ecf20Sopenharmony_ci } 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci if (!(m & ((1ULL << 63) | (1ULL << 31)))) { 1048c2ecf20Sopenharmony_ci asm ( "umlal %R0, %Q0, %R1, %Q2\n\t" 1058c2ecf20Sopenharmony_ci "umlal %R0, %Q0, %Q1, %R2\n\t" 1068c2ecf20Sopenharmony_ci "mov %R0, #0\n\t" 1078c2ecf20Sopenharmony_ci "umlal %Q0, %R0, %R1, %R2" 1088c2ecf20Sopenharmony_ci : "+&r" (res) 1098c2ecf20Sopenharmony_ci : "r" (m), "r" (n) 1108c2ecf20Sopenharmony_ci : "cc"); 1118c2ecf20Sopenharmony_ci } else { 1128c2ecf20Sopenharmony_ci asm ( "umlal %R0, %Q0, %R2, %Q3\n\t" 1138c2ecf20Sopenharmony_ci "umlal %R0, %1, %Q2, %R3\n\t" 1148c2ecf20Sopenharmony_ci "mov %R0, #0\n\t" 1158c2ecf20Sopenharmony_ci "adds %Q0, %1, %Q0\n\t" 1168c2ecf20Sopenharmony_ci "adc %R0, %R0, #0\n\t" 1178c2ecf20Sopenharmony_ci "umlal %Q0, %R0, %R2, %R3" 1188c2ecf20Sopenharmony_ci : "+&r" (res), "+&r" (tmp) 1198c2ecf20Sopenharmony_ci : "r" (m), "r" (n) 1208c2ecf20Sopenharmony_ci : "cc"); 1218c2ecf20Sopenharmony_ci } 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci return res; 1248c2ecf20Sopenharmony_ci} 1258c2ecf20Sopenharmony_ci#define __arch_xprod_64 __arch_xprod_64 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci#include <asm-generic/div64.h> 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci#endif 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci#endif 132