1370b324cSopenharmony_ci/* Bra86.c -- Branch converter for X86 code (BCJ) 2370b324cSopenharmony_ci2023-04-02 : Igor Pavlov : Public domain */ 3370b324cSopenharmony_ci 4370b324cSopenharmony_ci#include "Precomp.h" 5370b324cSopenharmony_ci 6370b324cSopenharmony_ci#include "Bra.h" 7370b324cSopenharmony_ci#include "CpuArch.h" 8370b324cSopenharmony_ci 9370b324cSopenharmony_ci 10370b324cSopenharmony_ci#if defined(MY_CPU_SIZEOF_POINTER) \ 11370b324cSopenharmony_ci && ( MY_CPU_SIZEOF_POINTER == 4 \ 12370b324cSopenharmony_ci || MY_CPU_SIZEOF_POINTER == 8) 13370b324cSopenharmony_ci #define BR_CONV_USE_OPT_PC_PTR 14370b324cSopenharmony_ci#endif 15370b324cSopenharmony_ci 16370b324cSopenharmony_ci#ifdef BR_CONV_USE_OPT_PC_PTR 17370b324cSopenharmony_ci#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t) 18370b324cSopenharmony_ci#define BR_PC_GET (pc + (UInt32)(SizeT)p) 19370b324cSopenharmony_ci#else 20370b324cSopenharmony_ci#define BR_PC_INIT pc += (UInt32)size; 21370b324cSopenharmony_ci#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) 22370b324cSopenharmony_ci// #define BR_PC_INIT 23370b324cSopenharmony_ci// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) 24370b324cSopenharmony_ci#endif 25370b324cSopenharmony_ci 26370b324cSopenharmony_ci#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; 27370b324cSopenharmony_ci// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; 28370b324cSopenharmony_ci 29370b324cSopenharmony_ci#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name 30370b324cSopenharmony_ci 31370b324cSopenharmony_ci#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0) 32370b324cSopenharmony_ci 33370b324cSopenharmony_ci#ifdef MY_CPU_LE_UNALIGN 34370b324cSopenharmony_ci #define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8; 35370b324cSopenharmony_ci #define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0) 36370b324cSopenharmony_ci#else 37370b324cSopenharmony_ci #define BR86_PREPARE_BCJ_SCAN 38370b324cSopenharmony_ci // bad for MSVC X86 (partial write to byte reg): 39370b324cSopenharmony_ci #define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8) 40370b324cSopenharmony_ci // bad for old MSVC (partial write to byte reg): 41370b324cSopenharmony_ci // #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0) 42370b324cSopenharmony_ci#endif 43370b324cSopenharmony_ci 44370b324cSopenharmony_cistatic 45370b324cSopenharmony_ciZ7_FORCE_INLINE 46370b324cSopenharmony_ciZ7_ATTRIB_NO_VECTOR 47370b324cSopenharmony_ciByte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding) 48370b324cSopenharmony_ci{ 49370b324cSopenharmony_ci if (size < 5) 50370b324cSopenharmony_ci return p; 51370b324cSopenharmony_ci { 52370b324cSopenharmony_ci // Byte *p = data; 53370b324cSopenharmony_ci const Byte *lim = p + size - 4; 54370b324cSopenharmony_ci unsigned mask = (unsigned)*state; // & 7; 55370b324cSopenharmony_ci#ifdef BR_CONV_USE_OPT_PC_PTR 56370b324cSopenharmony_ci /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4), 57370b324cSopenharmony_ci because call/jump offset is relative to the next instruction. 58370b324cSopenharmony_ci if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4), 59370b324cSopenharmony_ci because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before. 60370b324cSopenharmony_ci */ 61370b324cSopenharmony_ci pc += 4; 62370b324cSopenharmony_ci#endif 63370b324cSopenharmony_ci BR_PC_INIT 64370b324cSopenharmony_ci goto start; 65370b324cSopenharmony_ci 66370b324cSopenharmony_ci for (;; mask |= 4) 67370b324cSopenharmony_ci { 68370b324cSopenharmony_ci // cont: mask |= 4; 69370b324cSopenharmony_ci start: 70370b324cSopenharmony_ci if (p >= lim) 71370b324cSopenharmony_ci goto fin; 72370b324cSopenharmony_ci { 73370b324cSopenharmony_ci BR86_PREPARE_BCJ_SCAN 74370b324cSopenharmony_ci p += 4; 75370b324cSopenharmony_ci if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1; 76370b324cSopenharmony_ci if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1; 77370b324cSopenharmony_ci if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0; 78370b324cSopenharmony_ci if (BR86_IS_BCJ_BYTE(3)) { goto a3; } 79370b324cSopenharmony_ci } 80370b324cSopenharmony_ci goto main_loop; 81370b324cSopenharmony_ci 82370b324cSopenharmony_ci m0: p--; 83370b324cSopenharmony_ci m1: p--; 84370b324cSopenharmony_ci m2: p--; 85370b324cSopenharmony_ci if (mask == 0) 86370b324cSopenharmony_ci goto a3; 87370b324cSopenharmony_ci if (p > lim) 88370b324cSopenharmony_ci goto fin_p; 89370b324cSopenharmony_ci 90370b324cSopenharmony_ci // if (((0x17u >> mask) & 1) == 0) 91370b324cSopenharmony_ci if (mask > 4 || mask == 3) 92370b324cSopenharmony_ci { 93370b324cSopenharmony_ci mask >>= 1; 94370b324cSopenharmony_ci continue; // goto cont; 95370b324cSopenharmony_ci } 96370b324cSopenharmony_ci mask >>= 1; 97370b324cSopenharmony_ci if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask])) 98370b324cSopenharmony_ci continue; // goto cont; 99370b324cSopenharmony_ci // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; 100370b324cSopenharmony_ci { 101370b324cSopenharmony_ci UInt32 v = GetUi32(p); 102370b324cSopenharmony_ci UInt32 c; 103370b324cSopenharmony_ci v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; 104370b324cSopenharmony_ci c = BR_PC_GET; 105370b324cSopenharmony_ci BR_CONVERT_VAL(v, c) 106370b324cSopenharmony_ci { 107370b324cSopenharmony_ci mask <<= 3; 108370b324cSopenharmony_ci if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask)) 109370b324cSopenharmony_ci { 110370b324cSopenharmony_ci v ^= (((UInt32)0x100 << mask) - 1); 111370b324cSopenharmony_ci #ifdef MY_CPU_X86 112370b324cSopenharmony_ci // for X86 : we can recalculate (c) to reduce register pressure 113370b324cSopenharmony_ci c = BR_PC_GET; 114370b324cSopenharmony_ci #endif 115370b324cSopenharmony_ci BR_CONVERT_VAL(v, c) 116370b324cSopenharmony_ci } 117370b324cSopenharmony_ci mask = 0; 118370b324cSopenharmony_ci } 119370b324cSopenharmony_ci // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); 120370b324cSopenharmony_ci v &= (1 << 25) - 1; v -= (1 << 24); 121370b324cSopenharmony_ci SetUi32(p, v) 122370b324cSopenharmony_ci p += 4; 123370b324cSopenharmony_ci goto main_loop; 124370b324cSopenharmony_ci } 125370b324cSopenharmony_ci 126370b324cSopenharmony_ci main_loop: 127370b324cSopenharmony_ci if (p >= lim) 128370b324cSopenharmony_ci goto fin; 129370b324cSopenharmony_ci for (;;) 130370b324cSopenharmony_ci { 131370b324cSopenharmony_ci BR86_PREPARE_BCJ_SCAN 132370b324cSopenharmony_ci p += 4; 133370b324cSopenharmony_ci if (BR86_IS_BCJ_BYTE(0)) { goto a0; } 134370b324cSopenharmony_ci if (BR86_IS_BCJ_BYTE(1)) { goto a1; } 135370b324cSopenharmony_ci if (BR86_IS_BCJ_BYTE(2)) { goto a2; } 136370b324cSopenharmony_ci if (BR86_IS_BCJ_BYTE(3)) { goto a3; } 137370b324cSopenharmony_ci if (p >= lim) 138370b324cSopenharmony_ci goto fin; 139370b324cSopenharmony_ci } 140370b324cSopenharmony_ci 141370b324cSopenharmony_ci a0: p--; 142370b324cSopenharmony_ci a1: p--; 143370b324cSopenharmony_ci a2: p--; 144370b324cSopenharmony_ci a3: 145370b324cSopenharmony_ci if (p > lim) 146370b324cSopenharmony_ci goto fin_p; 147370b324cSopenharmony_ci // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; 148370b324cSopenharmony_ci { 149370b324cSopenharmony_ci UInt32 v = GetUi32(p); 150370b324cSopenharmony_ci UInt32 c; 151370b324cSopenharmony_ci v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; 152370b324cSopenharmony_ci c = BR_PC_GET; 153370b324cSopenharmony_ci BR_CONVERT_VAL(v, c) 154370b324cSopenharmony_ci // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); 155370b324cSopenharmony_ci v &= (1 << 25) - 1; v -= (1 << 24); 156370b324cSopenharmony_ci SetUi32(p, v) 157370b324cSopenharmony_ci p += 4; 158370b324cSopenharmony_ci goto main_loop; 159370b324cSopenharmony_ci } 160370b324cSopenharmony_ci } 161370b324cSopenharmony_ci 162370b324cSopenharmony_cifin_p: 163370b324cSopenharmony_ci p--; 164370b324cSopenharmony_cifin: 165370b324cSopenharmony_ci // the following processing for tail is optional and can be commented 166370b324cSopenharmony_ci /* 167370b324cSopenharmony_ci lim += 4; 168370b324cSopenharmony_ci for (; p < lim; p++, mask >>= 1) 169370b324cSopenharmony_ci if ((*p & 0xfe) == 0xe8) 170370b324cSopenharmony_ci break; 171370b324cSopenharmony_ci */ 172370b324cSopenharmony_ci *state = (UInt32)mask; 173370b324cSopenharmony_ci return p; 174370b324cSopenharmony_ci } 175370b324cSopenharmony_ci} 176370b324cSopenharmony_ci 177370b324cSopenharmony_ci 178370b324cSopenharmony_ci#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \ 179370b324cSopenharmony_ciZ7_NO_INLINE \ 180370b324cSopenharmony_ciZ7_ATTRIB_NO_VECTOR \ 181370b324cSopenharmony_ciByte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \ 182370b324cSopenharmony_ci { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); } 183370b324cSopenharmony_ci 184370b324cSopenharmony_ciZ7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0) 185370b324cSopenharmony_ci#ifndef Z7_EXTRACT_ONLY 186370b324cSopenharmony_ciZ7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1) 187370b324cSopenharmony_ci#endif 188