18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci#ifndef _ASM_HASH_H 38c2ecf20Sopenharmony_ci#define _ASM_HASH_H 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_ci/* 68c2ecf20Sopenharmony_ci * If CONFIG_M68000=y (original mc68000/010), this file is #included 78c2ecf20Sopenharmony_ci * to work around the lack of a MULU.L instruction. 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#define HAVE_ARCH__HASH_32 1 118c2ecf20Sopenharmony_ci/* 128c2ecf20Sopenharmony_ci * While it would be legal to substitute a different hash operation 138c2ecf20Sopenharmony_ci * entirely, let's keep it simple and just use an optimized multiply 148c2ecf20Sopenharmony_ci * by GOLDEN_RATIO_32 = 0x61C88647. 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * The best way to do that appears to be to multiply by 0x8647 with 178c2ecf20Sopenharmony_ci * shifts and adds, and use mulu.w to multiply the high half by 0x61C8. 188c2ecf20Sopenharmony_ci * 198c2ecf20Sopenharmony_ci * Because the 68000 has multi-cycle shifts, this addition chain is 208c2ecf20Sopenharmony_ci * chosen to minimise the shift distances. 218c2ecf20Sopenharmony_ci * 228c2ecf20Sopenharmony_ci * Despite every attempt to spoon-feed it simple operations, GCC 238c2ecf20Sopenharmony_ci * 6.1.1 doggedly insists on doing annoying things like converting 248c2ecf20Sopenharmony_ci * "lsl.l #2,<reg>" (12 cycles) to two adds (8+8 cycles). 258c2ecf20Sopenharmony_ci * 268c2ecf20Sopenharmony_ci * It also likes to notice two shifts in a row, like "a = x << 2" and 278c2ecf20Sopenharmony_ci * "a <<= 7", and convert that to "a = x << 9". But shifts longer 288c2ecf20Sopenharmony_ci * than 8 bits are extra-slow on m68k, so that's a lose. 298c2ecf20Sopenharmony_ci * 308c2ecf20Sopenharmony_ci * Since the 68000 is a very simple in-order processor with no 318c2ecf20Sopenharmony_ci * instruction scheduling effects on execution time, we can safely 328c2ecf20Sopenharmony_ci * take it out of GCC's hands and write one big asm() block. 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * Without calling overhead, this operation is 30 bytes (14 instructions 358c2ecf20Sopenharmony_ci * plus one immediate constant) and 166 cycles. 368c2ecf20Sopenharmony_ci * 378c2ecf20Sopenharmony_ci * (Because %2 is fetched twice, it can't be postincrement, and thus it 388c2ecf20Sopenharmony_ci * can't be a fully general "g" or "m". Register is preferred, but 398c2ecf20Sopenharmony_ci * offsettable memory or immediate will work.) 408c2ecf20Sopenharmony_ci */ 418c2ecf20Sopenharmony_cistatic inline u32 __attribute_const__ __hash_32(u32 x) 428c2ecf20Sopenharmony_ci{ 438c2ecf20Sopenharmony_ci u32 a, b; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci asm( "move.l %2,%0" /* a = x * 0x0001 */ 468c2ecf20Sopenharmony_ci "\n lsl.l #2,%0" /* a = x * 0x0004 */ 478c2ecf20Sopenharmony_ci "\n move.l %0,%1" 488c2ecf20Sopenharmony_ci "\n lsl.l #7,%0" /* a = x * 0x0200 */ 498c2ecf20Sopenharmony_ci "\n add.l %2,%0" /* a = x * 0x0201 */ 508c2ecf20Sopenharmony_ci "\n add.l %0,%1" /* b = x * 0x0205 */ 518c2ecf20Sopenharmony_ci "\n add.l %0,%0" /* a = x * 0x0402 */ 528c2ecf20Sopenharmony_ci "\n add.l %0,%1" /* b = x * 0x0607 */ 538c2ecf20Sopenharmony_ci "\n lsl.l #5,%0" /* a = x * 0x8040 */ 548c2ecf20Sopenharmony_ci : "=&d,d" (a), "=&r,r" (b) 558c2ecf20Sopenharmony_ci : "r,roi?" (x)); /* a+b = x*0x8647 */ 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci return ((u16)(x*0x61c8) << 16) + a + b; 588c2ecf20Sopenharmony_ci} 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci#endif /* _ASM_HASH_H */ 61