1370b324cSopenharmony_ci/* Bra.c -- Branch converters for RISC code
2370b324cSopenharmony_ci2023-04-02 : Igor Pavlov : Public domain */
3370b324cSopenharmony_ci
4370b324cSopenharmony_ci#include "Precomp.h"
5370b324cSopenharmony_ci
6370b324cSopenharmony_ci#include "Bra.h"
7370b324cSopenharmony_ci#include "CpuArch.h"
8370b324cSopenharmony_ci#include "RotateDefs.h"
9370b324cSopenharmony_ci
10370b324cSopenharmony_ci#if defined(MY_CPU_SIZEOF_POINTER) \
11370b324cSopenharmony_ci    && ( MY_CPU_SIZEOF_POINTER == 4 \
12370b324cSopenharmony_ci      || MY_CPU_SIZEOF_POINTER == 8)
13370b324cSopenharmony_ci  #define BR_CONV_USE_OPT_PC_PTR
14370b324cSopenharmony_ci#endif
15370b324cSopenharmony_ci
16370b324cSopenharmony_ci#ifdef BR_CONV_USE_OPT_PC_PTR
17370b324cSopenharmony_ci#define BR_PC_INIT  pc -= (UInt32)(SizeT)p;
18370b324cSopenharmony_ci#define BR_PC_GET   (pc + (UInt32)(SizeT)p)
19370b324cSopenharmony_ci#else
20370b324cSopenharmony_ci#define BR_PC_INIT  pc += (UInt32)size;
21370b324cSopenharmony_ci#define BR_PC_GET   (pc - (UInt32)(SizeT)(lim - p))
22370b324cSopenharmony_ci// #define BR_PC_INIT
23370b324cSopenharmony_ci// #define BR_PC_GET   (pc + (UInt32)(SizeT)(p - data))
24370b324cSopenharmony_ci#endif
25370b324cSopenharmony_ci
26370b324cSopenharmony_ci#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
27370b324cSopenharmony_ci// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
28370b324cSopenharmony_ci
29370b324cSopenharmony_ci#define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name
30370b324cSopenharmony_ci
31370b324cSopenharmony_ci#define Z7_BRANCH_FUNC_MAIN(name) \
32370b324cSopenharmony_cistatic \
33370b324cSopenharmony_ciZ7_FORCE_INLINE \
34370b324cSopenharmony_ciZ7_ATTRIB_NO_VECTOR \
35370b324cSopenharmony_ciByte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding)
36370b324cSopenharmony_ci
37370b324cSopenharmony_ci#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \
38370b324cSopenharmony_ciZ7_NO_INLINE \
39370b324cSopenharmony_ciZ7_ATTRIB_NO_VECTOR \
40370b324cSopenharmony_ciByte *m(name)(Byte *data, SizeT size, UInt32 pc) \
41370b324cSopenharmony_ci  { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \
42370b324cSopenharmony_ci
43370b324cSopenharmony_ci#ifdef Z7_EXTRACT_ONLY
44370b324cSopenharmony_ci#define Z7_BRANCH_FUNCS_IMP(name) \
45370b324cSopenharmony_ci  Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0)
46370b324cSopenharmony_ci#else
47370b324cSopenharmony_ci#define Z7_BRANCH_FUNCS_IMP(name) \
48370b324cSopenharmony_ci  Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \
49370b324cSopenharmony_ci  Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1)
50370b324cSopenharmony_ci#endif
51370b324cSopenharmony_ci
52370b324cSopenharmony_ci#if defined(__clang__)
53370b324cSopenharmony_ci#define BR_EXTERNAL_FOR
54370b324cSopenharmony_ci#define BR_NEXT_ITERATION  continue;
55370b324cSopenharmony_ci#else
56370b324cSopenharmony_ci#define BR_EXTERNAL_FOR    for (;;)
57370b324cSopenharmony_ci#define BR_NEXT_ITERATION  break;
58370b324cSopenharmony_ci#endif
59370b324cSopenharmony_ci
60370b324cSopenharmony_ci#if defined(__clang__) && (__clang_major__ >= 8) \
61370b324cSopenharmony_ci  || defined(__GNUC__) && (__GNUC__ >= 1000) \
62370b324cSopenharmony_ci  // GCC is not good for __builtin_expect() here
63370b324cSopenharmony_ci  /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
64370b324cSopenharmony_ci  // #define Z7_unlikely [[unlikely]]
65370b324cSopenharmony_ci  // #define Z7_LIKELY(x)   (__builtin_expect((x), 1))
66370b324cSopenharmony_ci  #define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
67370b324cSopenharmony_ci  // #define Z7_likely [[likely]]
68370b324cSopenharmony_ci#else
69370b324cSopenharmony_ci  // #define Z7_LIKELY(x)   (x)
70370b324cSopenharmony_ci  #define Z7_UNLIKELY(x) (x)
71370b324cSopenharmony_ci  // #define Z7_likely
72370b324cSopenharmony_ci#endif
73370b324cSopenharmony_ci
74370b324cSopenharmony_ci
75370b324cSopenharmony_ciZ7_BRANCH_FUNC_MAIN(ARM64)
76370b324cSopenharmony_ci{
77370b324cSopenharmony_ci  // Byte *p = data;
78370b324cSopenharmony_ci  const Byte *lim;
79370b324cSopenharmony_ci  const UInt32 flag = (UInt32)1 << (24 - 4);
80370b324cSopenharmony_ci  const UInt32 mask = ((UInt32)1 << 24) - (flag << 1);
81370b324cSopenharmony_ci  size &= ~(SizeT)3;
82370b324cSopenharmony_ci  // if (size == 0) return p;
83370b324cSopenharmony_ci  lim = p + size;
84370b324cSopenharmony_ci  BR_PC_INIT
85370b324cSopenharmony_ci  pc -= 4;  // because (p) will point to next instruction
86370b324cSopenharmony_ci
87370b324cSopenharmony_ci  BR_EXTERNAL_FOR
88370b324cSopenharmony_ci  {
89370b324cSopenharmony_ci    // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
90370b324cSopenharmony_ci    for (;;)
91370b324cSopenharmony_ci    {
92370b324cSopenharmony_ci      UInt32 v;
93370b324cSopenharmony_ci      if Z7_UNLIKELY(p == lim)
94370b324cSopenharmony_ci        return p;
95370b324cSopenharmony_ci      v = GetUi32a(p);
96370b324cSopenharmony_ci      p += 4;
97370b324cSopenharmony_ci      if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0)
98370b324cSopenharmony_ci      {
99370b324cSopenharmony_ci        UInt32 c = BR_PC_GET >> 2;
100370b324cSopenharmony_ci        BR_CONVERT_VAL(v, c)
101370b324cSopenharmony_ci        v &= 0x03ffffff;
102370b324cSopenharmony_ci        v |= 0x94000000;
103370b324cSopenharmony_ci        SetUi32a(p - 4, v)
104370b324cSopenharmony_ci        BR_NEXT_ITERATION
105370b324cSopenharmony_ci      }
106370b324cSopenharmony_ci      // v = rotlFixed(v, 8);  v += (flag << 8) - 0x90;  if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0)
107370b324cSopenharmony_ci      v -= 0x90000000;  if Z7_UNLIKELY((v & 0x9f000000) == 0)
108370b324cSopenharmony_ci      {
109370b324cSopenharmony_ci        UInt32 z, c;
110370b324cSopenharmony_ci        // v = rotrFixed(v, 8);
111370b324cSopenharmony_ci        v += flag; if Z7_UNLIKELY(v & mask) continue;
112370b324cSopenharmony_ci        z = (v & 0xffffffe0) | (v >> 26);
113370b324cSopenharmony_ci        c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7;
114370b324cSopenharmony_ci        BR_CONVERT_VAL(z, c)
115370b324cSopenharmony_ci        v &= 0x1f;
116370b324cSopenharmony_ci        v |= 0x90000000;
117370b324cSopenharmony_ci        v |= z << 26;
118370b324cSopenharmony_ci        v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag);
119370b324cSopenharmony_ci        SetUi32a(p - 4, v)
120370b324cSopenharmony_ci      }
121370b324cSopenharmony_ci    }
122370b324cSopenharmony_ci  }
123370b324cSopenharmony_ci}
124370b324cSopenharmony_ciZ7_BRANCH_FUNCS_IMP(ARM64)
125370b324cSopenharmony_ci
126370b324cSopenharmony_ci
127370b324cSopenharmony_ciZ7_BRANCH_FUNC_MAIN(ARM)
128370b324cSopenharmony_ci{
129370b324cSopenharmony_ci  // Byte *p = data;
130370b324cSopenharmony_ci  const Byte *lim;
131370b324cSopenharmony_ci  size &= ~(SizeT)3;
132370b324cSopenharmony_ci  lim = p + size;
133370b324cSopenharmony_ci  BR_PC_INIT
134370b324cSopenharmony_ci  /* in ARM: branch offset is relative to the +2 instructions from current instruction.
135370b324cSopenharmony_ci     (p) will point to next instruction */
136370b324cSopenharmony_ci  pc += 8 - 4;
137370b324cSopenharmony_ci
138370b324cSopenharmony_ci  for (;;)
139370b324cSopenharmony_ci  {
140370b324cSopenharmony_ci    for (;;)
141370b324cSopenharmony_ci    {
142370b324cSopenharmony_ci      if Z7_UNLIKELY(p >= lim) { return p; }  p += 4;  if Z7_UNLIKELY(p[-1] == 0xeb) break;
143370b324cSopenharmony_ci      if Z7_UNLIKELY(p >= lim) { return p; }  p += 4;  if Z7_UNLIKELY(p[-1] == 0xeb) break;
144370b324cSopenharmony_ci    }
145370b324cSopenharmony_ci    {
146370b324cSopenharmony_ci      UInt32 v = GetUi32a(p - 4);
147370b324cSopenharmony_ci      UInt32 c = BR_PC_GET >> 2;
148370b324cSopenharmony_ci      BR_CONVERT_VAL(v, c)
149370b324cSopenharmony_ci      v &= 0x00ffffff;
150370b324cSopenharmony_ci      v |= 0xeb000000;
151370b324cSopenharmony_ci      SetUi32a(p - 4, v)
152370b324cSopenharmony_ci    }
153370b324cSopenharmony_ci  }
154370b324cSopenharmony_ci}
155370b324cSopenharmony_ciZ7_BRANCH_FUNCS_IMP(ARM)
156370b324cSopenharmony_ci
157370b324cSopenharmony_ci
158370b324cSopenharmony_ciZ7_BRANCH_FUNC_MAIN(PPC)
159370b324cSopenharmony_ci{
160370b324cSopenharmony_ci  // Byte *p = data;
161370b324cSopenharmony_ci  const Byte *lim;
162370b324cSopenharmony_ci  size &= ~(SizeT)3;
163370b324cSopenharmony_ci  lim = p + size;
164370b324cSopenharmony_ci  BR_PC_INIT
165370b324cSopenharmony_ci  pc -= 4;  // because (p) will point to next instruction
166370b324cSopenharmony_ci
167370b324cSopenharmony_ci  for (;;)
168370b324cSopenharmony_ci  {
169370b324cSopenharmony_ci    UInt32 v;
170370b324cSopenharmony_ci    for (;;)
171370b324cSopenharmony_ci    {
172370b324cSopenharmony_ci      if Z7_UNLIKELY(p == lim)
173370b324cSopenharmony_ci        return p;
174370b324cSopenharmony_ci      // v = GetBe32a(p);
175370b324cSopenharmony_ci      v = *(UInt32 *)(void *)p;
176370b324cSopenharmony_ci      p += 4;
177370b324cSopenharmony_ci      // if ((v & 0xfc000003) == 0x48000001) break;
178370b324cSopenharmony_ci      // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break;
179370b324cSopenharmony_ci      if Z7_UNLIKELY(
180370b324cSopenharmony_ci          ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001))
181370b324cSopenharmony_ci              & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break;
182370b324cSopenharmony_ci    }
183370b324cSopenharmony_ci    {
184370b324cSopenharmony_ci      v = Z7_CONV_NATIVE_TO_BE_32(v);
185370b324cSopenharmony_ci      {
186370b324cSopenharmony_ci        UInt32 c = BR_PC_GET;
187370b324cSopenharmony_ci        BR_CONVERT_VAL(v, c)
188370b324cSopenharmony_ci      }
189370b324cSopenharmony_ci      v &= 0x03ffffff;
190370b324cSopenharmony_ci      v |= 0x48000000;
191370b324cSopenharmony_ci      SetBe32a(p - 4, v)
192370b324cSopenharmony_ci    }
193370b324cSopenharmony_ci  }
194370b324cSopenharmony_ci}
195370b324cSopenharmony_ciZ7_BRANCH_FUNCS_IMP(PPC)
196370b324cSopenharmony_ci
197370b324cSopenharmony_ci
198370b324cSopenharmony_ci#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
199370b324cSopenharmony_ci#define BR_SPARC_USE_ROTATE
200370b324cSopenharmony_ci#endif
201370b324cSopenharmony_ci
202370b324cSopenharmony_ciZ7_BRANCH_FUNC_MAIN(SPARC)
203370b324cSopenharmony_ci{
204370b324cSopenharmony_ci  // Byte *p = data;
205370b324cSopenharmony_ci  const Byte *lim;
206370b324cSopenharmony_ci  const UInt32 flag = (UInt32)1 << 22;
207370b324cSopenharmony_ci  size &= ~(SizeT)3;
208370b324cSopenharmony_ci  lim = p + size;
209370b324cSopenharmony_ci  BR_PC_INIT
210370b324cSopenharmony_ci  pc -= 4;  // because (p) will point to next instruction
211370b324cSopenharmony_ci  for (;;)
212370b324cSopenharmony_ci  {
213370b324cSopenharmony_ci    UInt32 v;
214370b324cSopenharmony_ci    for (;;)
215370b324cSopenharmony_ci    {
216370b324cSopenharmony_ci      if Z7_UNLIKELY(p == lim)
217370b324cSopenharmony_ci        return p;
218370b324cSopenharmony_ci      /* // the code without GetBe32a():
219370b324cSopenharmony_ci      { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; }
220370b324cSopenharmony_ci      */
221370b324cSopenharmony_ci      v = GetBe32a(p);
222370b324cSopenharmony_ci      p += 4;
223370b324cSopenharmony_ci    #ifdef BR_SPARC_USE_ROTATE
224370b324cSopenharmony_ci      v = rotlFixed(v, 2);
225370b324cSopenharmony_ci      v += (flag << 2) - 1;
226370b324cSopenharmony_ci      if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0)
227370b324cSopenharmony_ci    #else
228370b324cSopenharmony_ci      v += (UInt32)5 << 29;
229370b324cSopenharmony_ci      v ^= (UInt32)7 << 29;
230370b324cSopenharmony_ci      v += flag;
231370b324cSopenharmony_ci      if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0)
232370b324cSopenharmony_ci    #endif
233370b324cSopenharmony_ci        break;
234370b324cSopenharmony_ci    }
235370b324cSopenharmony_ci    {
236370b324cSopenharmony_ci      // UInt32 v = GetBe32a(p - 4);
237370b324cSopenharmony_ci    #ifndef BR_SPARC_USE_ROTATE
238370b324cSopenharmony_ci      v <<= 2;
239370b324cSopenharmony_ci    #endif
240370b324cSopenharmony_ci      {
241370b324cSopenharmony_ci        UInt32 c = BR_PC_GET;
242370b324cSopenharmony_ci        BR_CONVERT_VAL(v, c)
243370b324cSopenharmony_ci      }
244370b324cSopenharmony_ci      v &= (flag << 3) - 1;
245370b324cSopenharmony_ci    #ifdef BR_SPARC_USE_ROTATE
246370b324cSopenharmony_ci      v -= (flag << 2) - 1;
247370b324cSopenharmony_ci      v = rotrFixed(v, 2);
248370b324cSopenharmony_ci    #else
249370b324cSopenharmony_ci      v -= (flag << 2);
250370b324cSopenharmony_ci      v >>= 2;
251370b324cSopenharmony_ci      v |= (UInt32)1 << 30;
252370b324cSopenharmony_ci    #endif
253370b324cSopenharmony_ci      SetBe32a(p - 4, v)
254370b324cSopenharmony_ci    }
255370b324cSopenharmony_ci  }
256370b324cSopenharmony_ci}
257370b324cSopenharmony_ciZ7_BRANCH_FUNCS_IMP(SPARC)
258370b324cSopenharmony_ci
259370b324cSopenharmony_ci
260370b324cSopenharmony_ciZ7_BRANCH_FUNC_MAIN(ARMT)
261370b324cSopenharmony_ci{
262370b324cSopenharmony_ci  // Byte *p = data;
263370b324cSopenharmony_ci  Byte *lim;
264370b324cSopenharmony_ci  size &= ~(SizeT)1;
265370b324cSopenharmony_ci  // if (size == 0) return p;
266370b324cSopenharmony_ci  if (size <= 2) return p;
267370b324cSopenharmony_ci  size -= 2;
268370b324cSopenharmony_ci  lim = p + size;
269370b324cSopenharmony_ci  BR_PC_INIT
270370b324cSopenharmony_ci  /* in ARM: branch offset is relative to the +2 instructions from current instruction.
271370b324cSopenharmony_ci     (p) will point to the +2 instructions from current instruction */
272370b324cSopenharmony_ci  // pc += 4 - 4;
273370b324cSopenharmony_ci  // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1;
274370b324cSopenharmony_ci  // #define ARMT_TAIL_PROC { goto armt_tail; }
275370b324cSopenharmony_ci  #define ARMT_TAIL_PROC { return p; }
276370b324cSopenharmony_ci
277370b324cSopenharmony_ci  do
278370b324cSopenharmony_ci  {
279370b324cSopenharmony_ci    /* in MSVC 32-bit x86 compilers:
280370b324cSopenharmony_ci       UInt32 version : it loads value from memory with movzx
281370b324cSopenharmony_ci       Byte   version : it loads value to 8-bit register (AL/CL)
282370b324cSopenharmony_ci       movzx version is slightly faster in some cpus
283370b324cSopenharmony_ci    */
284370b324cSopenharmony_ci    unsigned b1;
285370b324cSopenharmony_ci    // Byte / unsigned
286370b324cSopenharmony_ci    b1 = p[1];
287370b324cSopenharmony_ci    // optimized version to reduce one (p >= lim) check:
288370b324cSopenharmony_ci    // unsigned a1 = p[1];  b1 = p[3];  p += 2;  if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8)
289370b324cSopenharmony_ci    for (;;)
290370b324cSopenharmony_ci    {
291370b324cSopenharmony_ci      unsigned b3; // Byte / UInt32
292370b324cSopenharmony_ci      /* (Byte)(b3) normalization can use low byte computations in MSVC.
293370b324cSopenharmony_ci         It gives smaller code, and no loss of speed in some compilers/cpus.
294370b324cSopenharmony_ci         But new MSVC 32-bit x86 compilers use more slow load
295370b324cSopenharmony_ci         from memory to low byte register in that case.
296370b324cSopenharmony_ci         So we try to use full 32-bit computations for faster code.
297370b324cSopenharmony_ci      */
298370b324cSopenharmony_ci      // if (p >= lim) { ARMT_TAIL_PROC }  b3 = b1 + 8;  b1 = p[3];  p += 2;  if ((b3 & b1) >= 0xf8) break;
299370b324cSopenharmony_ci      if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC }  b3 = p[3];  p += 2;  if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break;
300370b324cSopenharmony_ci      if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC }  b1 = p[3];  p += 2;  if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break;
301370b324cSopenharmony_ci    }
302370b324cSopenharmony_ci    {
303370b324cSopenharmony_ci      /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation.
304370b324cSopenharmony_ci         But gcc/clang for arm64 can use bfi instruction for full code here */
305370b324cSopenharmony_ci      UInt32 v =
306370b324cSopenharmony_ci          ((UInt32)GetUi16a(p - 2) << 11) |
307370b324cSopenharmony_ci          ((UInt32)GetUi16a(p) & 0x7FF);
308370b324cSopenharmony_ci      /*
309370b324cSopenharmony_ci      UInt32 v =
310370b324cSopenharmony_ci            ((UInt32)p[1 - 2] << 19)
311370b324cSopenharmony_ci          + (((UInt32)p[1] & 0x7) << 8)
312370b324cSopenharmony_ci          + (((UInt32)p[-2] << 11))
313370b324cSopenharmony_ci          + (p[0]);
314370b324cSopenharmony_ci      */
315370b324cSopenharmony_ci      p += 2;
316370b324cSopenharmony_ci      {
317370b324cSopenharmony_ci        UInt32 c = BR_PC_GET >> 1;
318370b324cSopenharmony_ci        BR_CONVERT_VAL(v, c)
319370b324cSopenharmony_ci      }
320370b324cSopenharmony_ci      SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000))
321370b324cSopenharmony_ci      SetUi16a(p - 2, (UInt16)(v | 0xf800))
322370b324cSopenharmony_ci      /*
323370b324cSopenharmony_ci      p[-4] = (Byte)(v >> 11);
324370b324cSopenharmony_ci      p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7));
325370b324cSopenharmony_ci      p[-2] = (Byte)v;
326370b324cSopenharmony_ci      p[-1] = (Byte)(0xf8 | (v >> 8));
327370b324cSopenharmony_ci      */
328370b324cSopenharmony_ci    }
329370b324cSopenharmony_ci  }
330370b324cSopenharmony_ci  while (p < lim);
331370b324cSopenharmony_ci  return p;
332370b324cSopenharmony_ci  // armt_tail:
333370b324cSopenharmony_ci  // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; }  return lim;
334370b324cSopenharmony_ci  // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2));
335370b324cSopenharmony_ci  // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
336370b324cSopenharmony_ci  // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
337370b324cSopenharmony_ci}
338370b324cSopenharmony_ciZ7_BRANCH_FUNCS_IMP(ARMT)
339370b324cSopenharmony_ci
340370b324cSopenharmony_ci
341370b324cSopenharmony_ci// #define BR_IA64_NO_INLINE
342370b324cSopenharmony_ci
343370b324cSopenharmony_ciZ7_BRANCH_FUNC_MAIN(IA64)
344370b324cSopenharmony_ci{
345370b324cSopenharmony_ci  // Byte *p = data;
346370b324cSopenharmony_ci  const Byte *lim;
347370b324cSopenharmony_ci  size &= ~(SizeT)15;
348370b324cSopenharmony_ci  lim = p + size;
349370b324cSopenharmony_ci  pc -= 1 << 4;
350370b324cSopenharmony_ci  pc >>= 4 - 1;
351370b324cSopenharmony_ci  // pc -= 1 << 1;
352370b324cSopenharmony_ci
353370b324cSopenharmony_ci  for (;;)
354370b324cSopenharmony_ci  {
355370b324cSopenharmony_ci    unsigned m;
356370b324cSopenharmony_ci    for (;;)
357370b324cSopenharmony_ci    {
358370b324cSopenharmony_ci      if Z7_UNLIKELY(p == lim)
359370b324cSopenharmony_ci        return p;
360370b324cSopenharmony_ci      m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e));
361370b324cSopenharmony_ci      p += 16;
362370b324cSopenharmony_ci      pc += 1 << 1;
363370b324cSopenharmony_ci      if (m &= 3)
364370b324cSopenharmony_ci        break;
365370b324cSopenharmony_ci    }
366370b324cSopenharmony_ci    {
367370b324cSopenharmony_ci      p += (ptrdiff_t)m * 5 - 20; // negative value is expected here.
368370b324cSopenharmony_ci      do
369370b324cSopenharmony_ci      {
370370b324cSopenharmony_ci        const UInt32 t =
371370b324cSopenharmony_ci          #if defined(MY_CPU_X86_OR_AMD64)
372370b324cSopenharmony_ci            // we use 32-bit load here to reduce code size on x86:
373370b324cSopenharmony_ci            GetUi32(p);
374370b324cSopenharmony_ci          #else
375370b324cSopenharmony_ci            GetUi16(p);
376370b324cSopenharmony_ci          #endif
377370b324cSopenharmony_ci        UInt32 z = GetUi32(p + 1) >> m;
378370b324cSopenharmony_ci        p += 5;
379370b324cSopenharmony_ci        if (((t >> m) & (0x70 << 1)) == 0
380370b324cSopenharmony_ci            && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0)
381370b324cSopenharmony_ci        {
382370b324cSopenharmony_ci          UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z;
383370b324cSopenharmony_ci          z ^= v;
384370b324cSopenharmony_ci        #ifdef BR_IA64_NO_INLINE
385370b324cSopenharmony_ci          v |= (v & ((UInt32)1 << (23 + 1))) >> 3;
386370b324cSopenharmony_ci          {
387370b324cSopenharmony_ci            UInt32 c = pc;
388370b324cSopenharmony_ci            BR_CONVERT_VAL(v, c)
389370b324cSopenharmony_ci          }
390370b324cSopenharmony_ci          v &= (0x1fffff << 1) | 1;
391370b324cSopenharmony_ci        #else
392370b324cSopenharmony_ci          {
393370b324cSopenharmony_ci            if (encoding)
394370b324cSopenharmony_ci            {
395370b324cSopenharmony_ci              // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits
396370b324cSopenharmony_ci              pc &= (0x1fffff << 1) | 1;
397370b324cSopenharmony_ci              v += pc;
398370b324cSopenharmony_ci            }
399370b324cSopenharmony_ci            else
400370b324cSopenharmony_ci            {
401370b324cSopenharmony_ci              // pc |= 0xc00000 << 1; // we need to set at least 2 bits
402370b324cSopenharmony_ci              pc |= ~(UInt32)((0x1fffff << 1) | 1);
403370b324cSopenharmony_ci              v -= pc;
404370b324cSopenharmony_ci            }
405370b324cSopenharmony_ci          }
406370b324cSopenharmony_ci          v &= ~(UInt32)(0x600000 << 1);
407370b324cSopenharmony_ci        #endif
408370b324cSopenharmony_ci          v += (0x700000 << 1);
409370b324cSopenharmony_ci          v &= (0x8fffff << 1) | 1;
410370b324cSopenharmony_ci          z |= v;
411370b324cSopenharmony_ci          z <<= m;
412370b324cSopenharmony_ci          SetUi32(p + 1 - 5, z)
413370b324cSopenharmony_ci        }
414370b324cSopenharmony_ci        m++;
415370b324cSopenharmony_ci      }
416370b324cSopenharmony_ci      while (m &= 3); // while (m < 4);
417370b324cSopenharmony_ci    }
418370b324cSopenharmony_ci  }
419370b324cSopenharmony_ci}
420370b324cSopenharmony_ciZ7_BRANCH_FUNCS_IMP(IA64)
421