1370b324cSopenharmony_ci/* Bra.c -- Branch converters for RISC code 2370b324cSopenharmony_ci2023-04-02 : Igor Pavlov : Public domain */ 3370b324cSopenharmony_ci 4370b324cSopenharmony_ci#include "Precomp.h" 5370b324cSopenharmony_ci 6370b324cSopenharmony_ci#include "Bra.h" 7370b324cSopenharmony_ci#include "CpuArch.h" 8370b324cSopenharmony_ci#include "RotateDefs.h" 9370b324cSopenharmony_ci 10370b324cSopenharmony_ci#if defined(MY_CPU_SIZEOF_POINTER) \ 11370b324cSopenharmony_ci && ( MY_CPU_SIZEOF_POINTER == 4 \ 12370b324cSopenharmony_ci || MY_CPU_SIZEOF_POINTER == 8) 13370b324cSopenharmony_ci #define BR_CONV_USE_OPT_PC_PTR 14370b324cSopenharmony_ci#endif 15370b324cSopenharmony_ci 16370b324cSopenharmony_ci#ifdef BR_CONV_USE_OPT_PC_PTR 17370b324cSopenharmony_ci#define BR_PC_INIT pc -= (UInt32)(SizeT)p; 18370b324cSopenharmony_ci#define BR_PC_GET (pc + (UInt32)(SizeT)p) 19370b324cSopenharmony_ci#else 20370b324cSopenharmony_ci#define BR_PC_INIT pc += (UInt32)size; 21370b324cSopenharmony_ci#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) 22370b324cSopenharmony_ci// #define BR_PC_INIT 23370b324cSopenharmony_ci// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) 24370b324cSopenharmony_ci#endif 25370b324cSopenharmony_ci 26370b324cSopenharmony_ci#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; 27370b324cSopenharmony_ci// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; 28370b324cSopenharmony_ci 29370b324cSopenharmony_ci#define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name 30370b324cSopenharmony_ci 31370b324cSopenharmony_ci#define Z7_BRANCH_FUNC_MAIN(name) \ 32370b324cSopenharmony_cistatic \ 33370b324cSopenharmony_ciZ7_FORCE_INLINE \ 34370b324cSopenharmony_ciZ7_ATTRIB_NO_VECTOR \ 35370b324cSopenharmony_ciByte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding) 36370b324cSopenharmony_ci 37370b324cSopenharmony_ci#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \ 38370b324cSopenharmony_ciZ7_NO_INLINE \ 39370b324cSopenharmony_ciZ7_ATTRIB_NO_VECTOR \ 40370b324cSopenharmony_ciByte *m(name)(Byte *data, SizeT size, UInt32 pc) \ 41370b324cSopenharmony_ci { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \ 42370b324cSopenharmony_ci 43370b324cSopenharmony_ci#ifdef Z7_EXTRACT_ONLY 44370b324cSopenharmony_ci#define Z7_BRANCH_FUNCS_IMP(name) \ 45370b324cSopenharmony_ci Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) 46370b324cSopenharmony_ci#else 47370b324cSopenharmony_ci#define Z7_BRANCH_FUNCS_IMP(name) \ 48370b324cSopenharmony_ci Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \ 49370b324cSopenharmony_ci Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1) 50370b324cSopenharmony_ci#endif 51370b324cSopenharmony_ci 52370b324cSopenharmony_ci#if defined(__clang__) 53370b324cSopenharmony_ci#define BR_EXTERNAL_FOR 54370b324cSopenharmony_ci#define BR_NEXT_ITERATION continue; 55370b324cSopenharmony_ci#else 56370b324cSopenharmony_ci#define BR_EXTERNAL_FOR for (;;) 57370b324cSopenharmony_ci#define BR_NEXT_ITERATION break; 58370b324cSopenharmony_ci#endif 59370b324cSopenharmony_ci 60370b324cSopenharmony_ci#if defined(__clang__) && (__clang_major__ >= 8) \ 61370b324cSopenharmony_ci || defined(__GNUC__) && (__GNUC__ >= 1000) \ 62370b324cSopenharmony_ci // GCC is not good for __builtin_expect() here 63370b324cSopenharmony_ci /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ 64370b324cSopenharmony_ci // #define Z7_unlikely [[unlikely]] 65370b324cSopenharmony_ci // #define Z7_LIKELY(x) (__builtin_expect((x), 1)) 66370b324cSopenharmony_ci #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) 67370b324cSopenharmony_ci // #define Z7_likely [[likely]] 68370b324cSopenharmony_ci#else 69370b324cSopenharmony_ci // #define Z7_LIKELY(x) (x) 70370b324cSopenharmony_ci #define Z7_UNLIKELY(x) (x) 71370b324cSopenharmony_ci // #define Z7_likely 72370b324cSopenharmony_ci#endif 73370b324cSopenharmony_ci 74370b324cSopenharmony_ci 75370b324cSopenharmony_ciZ7_BRANCH_FUNC_MAIN(ARM64) 76370b324cSopenharmony_ci{ 77370b324cSopenharmony_ci // Byte *p = data; 78370b324cSopenharmony_ci const Byte *lim; 79370b324cSopenharmony_ci const UInt32 flag = (UInt32)1 << (24 - 4); 80370b324cSopenharmony_ci const UInt32 mask = ((UInt32)1 << 24) - (flag << 1); 81370b324cSopenharmony_ci size &= ~(SizeT)3; 82370b324cSopenharmony_ci // if (size == 0) return p; 83370b324cSopenharmony_ci lim = p + size; 84370b324cSopenharmony_ci BR_PC_INIT 85370b324cSopenharmony_ci pc -= 4; // because (p) will point to next instruction 86370b324cSopenharmony_ci 87370b324cSopenharmony_ci BR_EXTERNAL_FOR 88370b324cSopenharmony_ci { 89370b324cSopenharmony_ci // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE 90370b324cSopenharmony_ci for (;;) 91370b324cSopenharmony_ci { 92370b324cSopenharmony_ci UInt32 v; 93370b324cSopenharmony_ci if Z7_UNLIKELY(p == lim) 94370b324cSopenharmony_ci return p; 95370b324cSopenharmony_ci v = GetUi32a(p); 96370b324cSopenharmony_ci p += 4; 97370b324cSopenharmony_ci if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0) 98370b324cSopenharmony_ci { 99370b324cSopenharmony_ci UInt32 c = BR_PC_GET >> 2; 100370b324cSopenharmony_ci BR_CONVERT_VAL(v, c) 101370b324cSopenharmony_ci v &= 0x03ffffff; 102370b324cSopenharmony_ci v |= 0x94000000; 103370b324cSopenharmony_ci SetUi32a(p - 4, v) 104370b324cSopenharmony_ci BR_NEXT_ITERATION 105370b324cSopenharmony_ci } 106370b324cSopenharmony_ci // v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0) 107370b324cSopenharmony_ci v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0) 108370b324cSopenharmony_ci { 109370b324cSopenharmony_ci UInt32 z, c; 110370b324cSopenharmony_ci // v = rotrFixed(v, 8); 111370b324cSopenharmony_ci v += flag; if Z7_UNLIKELY(v & mask) continue; 112370b324cSopenharmony_ci z = (v & 0xffffffe0) | (v >> 26); 113370b324cSopenharmony_ci c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7; 114370b324cSopenharmony_ci BR_CONVERT_VAL(z, c) 115370b324cSopenharmony_ci v &= 0x1f; 116370b324cSopenharmony_ci v |= 0x90000000; 117370b324cSopenharmony_ci v |= z << 26; 118370b324cSopenharmony_ci v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag); 119370b324cSopenharmony_ci SetUi32a(p - 4, v) 120370b324cSopenharmony_ci } 121370b324cSopenharmony_ci } 122370b324cSopenharmony_ci } 123370b324cSopenharmony_ci} 124370b324cSopenharmony_ciZ7_BRANCH_FUNCS_IMP(ARM64) 125370b324cSopenharmony_ci 126370b324cSopenharmony_ci 127370b324cSopenharmony_ciZ7_BRANCH_FUNC_MAIN(ARM) 128370b324cSopenharmony_ci{ 129370b324cSopenharmony_ci // Byte *p = data; 130370b324cSopenharmony_ci const Byte *lim; 131370b324cSopenharmony_ci size &= ~(SizeT)3; 132370b324cSopenharmony_ci lim = p + size; 133370b324cSopenharmony_ci BR_PC_INIT 134370b324cSopenharmony_ci /* in ARM: branch offset is relative to the +2 instructions from current instruction. 135370b324cSopenharmony_ci (p) will point to next instruction */ 136370b324cSopenharmony_ci pc += 8 - 4; 137370b324cSopenharmony_ci 138370b324cSopenharmony_ci for (;;) 139370b324cSopenharmony_ci { 140370b324cSopenharmony_ci for (;;) 141370b324cSopenharmony_ci { 142370b324cSopenharmony_ci if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; 143370b324cSopenharmony_ci if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; 144370b324cSopenharmony_ci } 145370b324cSopenharmony_ci { 146370b324cSopenharmony_ci UInt32 v = GetUi32a(p - 4); 147370b324cSopenharmony_ci UInt32 c = BR_PC_GET >> 2; 148370b324cSopenharmony_ci BR_CONVERT_VAL(v, c) 149370b324cSopenharmony_ci v &= 0x00ffffff; 150370b324cSopenharmony_ci v |= 0xeb000000; 151370b324cSopenharmony_ci SetUi32a(p - 4, v) 152370b324cSopenharmony_ci } 153370b324cSopenharmony_ci } 154370b324cSopenharmony_ci} 155370b324cSopenharmony_ciZ7_BRANCH_FUNCS_IMP(ARM) 156370b324cSopenharmony_ci 157370b324cSopenharmony_ci 158370b324cSopenharmony_ciZ7_BRANCH_FUNC_MAIN(PPC) 159370b324cSopenharmony_ci{ 160370b324cSopenharmony_ci // Byte *p = data; 161370b324cSopenharmony_ci const Byte *lim; 162370b324cSopenharmony_ci size &= ~(SizeT)3; 163370b324cSopenharmony_ci lim = p + size; 164370b324cSopenharmony_ci BR_PC_INIT 165370b324cSopenharmony_ci pc -= 4; // because (p) will point to next instruction 166370b324cSopenharmony_ci 167370b324cSopenharmony_ci for (;;) 168370b324cSopenharmony_ci { 169370b324cSopenharmony_ci UInt32 v; 170370b324cSopenharmony_ci for (;;) 171370b324cSopenharmony_ci { 172370b324cSopenharmony_ci if Z7_UNLIKELY(p == lim) 173370b324cSopenharmony_ci return p; 174370b324cSopenharmony_ci // v = GetBe32a(p); 175370b324cSopenharmony_ci v = *(UInt32 *)(void *)p; 176370b324cSopenharmony_ci p += 4; 177370b324cSopenharmony_ci // if ((v & 0xfc000003) == 0x48000001) break; 178370b324cSopenharmony_ci // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break; 179370b324cSopenharmony_ci if Z7_UNLIKELY( 180370b324cSopenharmony_ci ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001)) 181370b324cSopenharmony_ci & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break; 182370b324cSopenharmony_ci } 183370b324cSopenharmony_ci { 184370b324cSopenharmony_ci v = Z7_CONV_NATIVE_TO_BE_32(v); 185370b324cSopenharmony_ci { 186370b324cSopenharmony_ci UInt32 c = BR_PC_GET; 187370b324cSopenharmony_ci BR_CONVERT_VAL(v, c) 188370b324cSopenharmony_ci } 189370b324cSopenharmony_ci v &= 0x03ffffff; 190370b324cSopenharmony_ci v |= 0x48000000; 191370b324cSopenharmony_ci SetBe32a(p - 4, v) 192370b324cSopenharmony_ci } 193370b324cSopenharmony_ci } 194370b324cSopenharmony_ci} 195370b324cSopenharmony_ciZ7_BRANCH_FUNCS_IMP(PPC) 196370b324cSopenharmony_ci 197370b324cSopenharmony_ci 198370b324cSopenharmony_ci#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED 199370b324cSopenharmony_ci#define BR_SPARC_USE_ROTATE 200370b324cSopenharmony_ci#endif 201370b324cSopenharmony_ci 202370b324cSopenharmony_ciZ7_BRANCH_FUNC_MAIN(SPARC) 203370b324cSopenharmony_ci{ 204370b324cSopenharmony_ci // Byte *p = data; 205370b324cSopenharmony_ci const Byte *lim; 206370b324cSopenharmony_ci const UInt32 flag = (UInt32)1 << 22; 207370b324cSopenharmony_ci size &= ~(SizeT)3; 208370b324cSopenharmony_ci lim = p + size; 209370b324cSopenharmony_ci BR_PC_INIT 210370b324cSopenharmony_ci pc -= 4; // because (p) will point to next instruction 211370b324cSopenharmony_ci for (;;) 212370b324cSopenharmony_ci { 213370b324cSopenharmony_ci UInt32 v; 214370b324cSopenharmony_ci for (;;) 215370b324cSopenharmony_ci { 216370b324cSopenharmony_ci if Z7_UNLIKELY(p == lim) 217370b324cSopenharmony_ci return p; 218370b324cSopenharmony_ci /* // the code without GetBe32a(): 219370b324cSopenharmony_ci { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; } 220370b324cSopenharmony_ci */ 221370b324cSopenharmony_ci v = GetBe32a(p); 222370b324cSopenharmony_ci p += 4; 223370b324cSopenharmony_ci #ifdef BR_SPARC_USE_ROTATE 224370b324cSopenharmony_ci v = rotlFixed(v, 2); 225370b324cSopenharmony_ci v += (flag << 2) - 1; 226370b324cSopenharmony_ci if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0) 227370b324cSopenharmony_ci #else 228370b324cSopenharmony_ci v += (UInt32)5 << 29; 229370b324cSopenharmony_ci v ^= (UInt32)7 << 29; 230370b324cSopenharmony_ci v += flag; 231370b324cSopenharmony_ci if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0) 232370b324cSopenharmony_ci #endif 233370b324cSopenharmony_ci break; 234370b324cSopenharmony_ci } 235370b324cSopenharmony_ci { 236370b324cSopenharmony_ci // UInt32 v = GetBe32a(p - 4); 237370b324cSopenharmony_ci #ifndef BR_SPARC_USE_ROTATE 238370b324cSopenharmony_ci v <<= 2; 239370b324cSopenharmony_ci #endif 240370b324cSopenharmony_ci { 241370b324cSopenharmony_ci UInt32 c = BR_PC_GET; 242370b324cSopenharmony_ci BR_CONVERT_VAL(v, c) 243370b324cSopenharmony_ci } 244370b324cSopenharmony_ci v &= (flag << 3) - 1; 245370b324cSopenharmony_ci #ifdef BR_SPARC_USE_ROTATE 246370b324cSopenharmony_ci v -= (flag << 2) - 1; 247370b324cSopenharmony_ci v = rotrFixed(v, 2); 248370b324cSopenharmony_ci #else 249370b324cSopenharmony_ci v -= (flag << 2); 250370b324cSopenharmony_ci v >>= 2; 251370b324cSopenharmony_ci v |= (UInt32)1 << 30; 252370b324cSopenharmony_ci #endif 253370b324cSopenharmony_ci SetBe32a(p - 4, v) 254370b324cSopenharmony_ci } 255370b324cSopenharmony_ci } 256370b324cSopenharmony_ci} 257370b324cSopenharmony_ciZ7_BRANCH_FUNCS_IMP(SPARC) 258370b324cSopenharmony_ci 259370b324cSopenharmony_ci 260370b324cSopenharmony_ciZ7_BRANCH_FUNC_MAIN(ARMT) 261370b324cSopenharmony_ci{ 262370b324cSopenharmony_ci // Byte *p = data; 263370b324cSopenharmony_ci Byte *lim; 264370b324cSopenharmony_ci size &= ~(SizeT)1; 265370b324cSopenharmony_ci // if (size == 0) return p; 266370b324cSopenharmony_ci if (size <= 2) return p; 267370b324cSopenharmony_ci size -= 2; 268370b324cSopenharmony_ci lim = p + size; 269370b324cSopenharmony_ci BR_PC_INIT 270370b324cSopenharmony_ci /* in ARM: branch offset is relative to the +2 instructions from current instruction. 271370b324cSopenharmony_ci (p) will point to the +2 instructions from current instruction */ 272370b324cSopenharmony_ci // pc += 4 - 4; 273370b324cSopenharmony_ci // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1; 274370b324cSopenharmony_ci // #define ARMT_TAIL_PROC { goto armt_tail; } 275370b324cSopenharmony_ci #define ARMT_TAIL_PROC { return p; } 276370b324cSopenharmony_ci 277370b324cSopenharmony_ci do 278370b324cSopenharmony_ci { 279370b324cSopenharmony_ci /* in MSVC 32-bit x86 compilers: 280370b324cSopenharmony_ci UInt32 version : it loads value from memory with movzx 281370b324cSopenharmony_ci Byte version : it loads value to 8-bit register (AL/CL) 282370b324cSopenharmony_ci movzx version is slightly faster in some cpus 283370b324cSopenharmony_ci */ 284370b324cSopenharmony_ci unsigned b1; 285370b324cSopenharmony_ci // Byte / unsigned 286370b324cSopenharmony_ci b1 = p[1]; 287370b324cSopenharmony_ci // optimized version to reduce one (p >= lim) check: 288370b324cSopenharmony_ci // unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8) 289370b324cSopenharmony_ci for (;;) 290370b324cSopenharmony_ci { 291370b324cSopenharmony_ci unsigned b3; // Byte / UInt32 292370b324cSopenharmony_ci /* (Byte)(b3) normalization can use low byte computations in MSVC. 293370b324cSopenharmony_ci It gives smaller code, and no loss of speed in some compilers/cpus. 294370b324cSopenharmony_ci But new MSVC 32-bit x86 compilers use more slow load 295370b324cSopenharmony_ci from memory to low byte register in that case. 296370b324cSopenharmony_ci So we try to use full 32-bit computations for faster code. 297370b324cSopenharmony_ci */ 298370b324cSopenharmony_ci // if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break; 299370b324cSopenharmony_ci if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break; 300370b324cSopenharmony_ci if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break; 301370b324cSopenharmony_ci } 302370b324cSopenharmony_ci { 303370b324cSopenharmony_ci /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation. 304370b324cSopenharmony_ci But gcc/clang for arm64 can use bfi instruction for full code here */ 305370b324cSopenharmony_ci UInt32 v = 306370b324cSopenharmony_ci ((UInt32)GetUi16a(p - 2) << 11) | 307370b324cSopenharmony_ci ((UInt32)GetUi16a(p) & 0x7FF); 308370b324cSopenharmony_ci /* 309370b324cSopenharmony_ci UInt32 v = 310370b324cSopenharmony_ci ((UInt32)p[1 - 2] << 19) 311370b324cSopenharmony_ci + (((UInt32)p[1] & 0x7) << 8) 312370b324cSopenharmony_ci + (((UInt32)p[-2] << 11)) 313370b324cSopenharmony_ci + (p[0]); 314370b324cSopenharmony_ci */ 315370b324cSopenharmony_ci p += 2; 316370b324cSopenharmony_ci { 317370b324cSopenharmony_ci UInt32 c = BR_PC_GET >> 1; 318370b324cSopenharmony_ci BR_CONVERT_VAL(v, c) 319370b324cSopenharmony_ci } 320370b324cSopenharmony_ci SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000)) 321370b324cSopenharmony_ci SetUi16a(p - 2, (UInt16)(v | 0xf800)) 322370b324cSopenharmony_ci /* 323370b324cSopenharmony_ci p[-4] = (Byte)(v >> 11); 324370b324cSopenharmony_ci p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7)); 325370b324cSopenharmony_ci p[-2] = (Byte)v; 326370b324cSopenharmony_ci p[-1] = (Byte)(0xf8 | (v >> 8)); 327370b324cSopenharmony_ci */ 328370b324cSopenharmony_ci } 329370b324cSopenharmony_ci } 330370b324cSopenharmony_ci while (p < lim); 331370b324cSopenharmony_ci return p; 332370b324cSopenharmony_ci // armt_tail: 333370b324cSopenharmony_ci // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim; 334370b324cSopenharmony_ci // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2)); 335370b324cSopenharmony_ci // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); 336370b324cSopenharmony_ci // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); 337370b324cSopenharmony_ci} 338370b324cSopenharmony_ciZ7_BRANCH_FUNCS_IMP(ARMT) 339370b324cSopenharmony_ci 340370b324cSopenharmony_ci 341370b324cSopenharmony_ci// #define BR_IA64_NO_INLINE 342370b324cSopenharmony_ci 343370b324cSopenharmony_ciZ7_BRANCH_FUNC_MAIN(IA64) 344370b324cSopenharmony_ci{ 345370b324cSopenharmony_ci // Byte *p = data; 346370b324cSopenharmony_ci const Byte *lim; 347370b324cSopenharmony_ci size &= ~(SizeT)15; 348370b324cSopenharmony_ci lim = p + size; 349370b324cSopenharmony_ci pc -= 1 << 4; 350370b324cSopenharmony_ci pc >>= 4 - 1; 351370b324cSopenharmony_ci // pc -= 1 << 1; 352370b324cSopenharmony_ci 353370b324cSopenharmony_ci for (;;) 354370b324cSopenharmony_ci { 355370b324cSopenharmony_ci unsigned m; 356370b324cSopenharmony_ci for (;;) 357370b324cSopenharmony_ci { 358370b324cSopenharmony_ci if Z7_UNLIKELY(p == lim) 359370b324cSopenharmony_ci return p; 360370b324cSopenharmony_ci m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e)); 361370b324cSopenharmony_ci p += 16; 362370b324cSopenharmony_ci pc += 1 << 1; 363370b324cSopenharmony_ci if (m &= 3) 364370b324cSopenharmony_ci break; 365370b324cSopenharmony_ci } 366370b324cSopenharmony_ci { 367370b324cSopenharmony_ci p += (ptrdiff_t)m * 5 - 20; // negative value is expected here. 368370b324cSopenharmony_ci do 369370b324cSopenharmony_ci { 370370b324cSopenharmony_ci const UInt32 t = 371370b324cSopenharmony_ci #if defined(MY_CPU_X86_OR_AMD64) 372370b324cSopenharmony_ci // we use 32-bit load here to reduce code size on x86: 373370b324cSopenharmony_ci GetUi32(p); 374370b324cSopenharmony_ci #else 375370b324cSopenharmony_ci GetUi16(p); 376370b324cSopenharmony_ci #endif 377370b324cSopenharmony_ci UInt32 z = GetUi32(p + 1) >> m; 378370b324cSopenharmony_ci p += 5; 379370b324cSopenharmony_ci if (((t >> m) & (0x70 << 1)) == 0 380370b324cSopenharmony_ci && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0) 381370b324cSopenharmony_ci { 382370b324cSopenharmony_ci UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z; 383370b324cSopenharmony_ci z ^= v; 384370b324cSopenharmony_ci #ifdef BR_IA64_NO_INLINE 385370b324cSopenharmony_ci v |= (v & ((UInt32)1 << (23 + 1))) >> 3; 386370b324cSopenharmony_ci { 387370b324cSopenharmony_ci UInt32 c = pc; 388370b324cSopenharmony_ci BR_CONVERT_VAL(v, c) 389370b324cSopenharmony_ci } 390370b324cSopenharmony_ci v &= (0x1fffff << 1) | 1; 391370b324cSopenharmony_ci #else 392370b324cSopenharmony_ci { 393370b324cSopenharmony_ci if (encoding) 394370b324cSopenharmony_ci { 395370b324cSopenharmony_ci // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits 396370b324cSopenharmony_ci pc &= (0x1fffff << 1) | 1; 397370b324cSopenharmony_ci v += pc; 398370b324cSopenharmony_ci } 399370b324cSopenharmony_ci else 400370b324cSopenharmony_ci { 401370b324cSopenharmony_ci // pc |= 0xc00000 << 1; // we need to set at least 2 bits 402370b324cSopenharmony_ci pc |= ~(UInt32)((0x1fffff << 1) | 1); 403370b324cSopenharmony_ci v -= pc; 404370b324cSopenharmony_ci } 405370b324cSopenharmony_ci } 406370b324cSopenharmony_ci v &= ~(UInt32)(0x600000 << 1); 407370b324cSopenharmony_ci #endif 408370b324cSopenharmony_ci v += (0x700000 << 1); 409370b324cSopenharmony_ci v &= (0x8fffff << 1) | 1; 410370b324cSopenharmony_ci z |= v; 411370b324cSopenharmony_ci z <<= m; 412370b324cSopenharmony_ci SetUi32(p + 1 - 5, z) 413370b324cSopenharmony_ci } 414370b324cSopenharmony_ci m++; 415370b324cSopenharmony_ci } 416370b324cSopenharmony_ci while (m &= 3); // while (m < 4); 417370b324cSopenharmony_ci } 418370b324cSopenharmony_ci } 419370b324cSopenharmony_ci} 420370b324cSopenharmony_ciZ7_BRANCH_FUNCS_IMP(IA64) 421