xref: /third_party/lzma/C/Bcj2.c (revision 370b324c)
1370b324cSopenharmony_ci/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
2370b324cSopenharmony_ci2023-03-01 : Igor Pavlov : Public domain */
3370b324cSopenharmony_ci
4370b324cSopenharmony_ci#include "Precomp.h"
5370b324cSopenharmony_ci
6370b324cSopenharmony_ci#include "Bcj2.h"
7370b324cSopenharmony_ci#include "CpuArch.h"
8370b324cSopenharmony_ci
9370b324cSopenharmony_ci#define kTopValue ((UInt32)1 << 24)
10370b324cSopenharmony_ci#define kNumBitModelTotalBits 11
11370b324cSopenharmony_ci#define kBitModelTotal (1 << kNumBitModelTotalBits)
12370b324cSopenharmony_ci#define kNumMoveBits 5
13370b324cSopenharmony_ci
14370b324cSopenharmony_ci// UInt32 bcj2_stats[256 + 2][2];
15370b324cSopenharmony_ci
16370b324cSopenharmony_civoid Bcj2Dec_Init(CBcj2Dec *p)
17370b324cSopenharmony_ci{
18370b324cSopenharmony_ci  unsigned i;
19370b324cSopenharmony_ci  p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK;
20370b324cSopenharmony_ci  p->ip = 0;
21370b324cSopenharmony_ci  p->temp = 0;
22370b324cSopenharmony_ci  p->range = 0;
23370b324cSopenharmony_ci  p->code = 0;
24370b324cSopenharmony_ci  for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
25370b324cSopenharmony_ci    p->probs[i] = kBitModelTotal >> 1;
26370b324cSopenharmony_ci}
27370b324cSopenharmony_ci
28370b324cSopenharmony_ciSRes Bcj2Dec_Decode(CBcj2Dec *p)
29370b324cSopenharmony_ci{
30370b324cSopenharmony_ci  UInt32 v = p->temp;
31370b324cSopenharmony_ci  // const Byte *src;
32370b324cSopenharmony_ci  if (p->range <= 5)
33370b324cSopenharmony_ci  {
34370b324cSopenharmony_ci    UInt32 code = p->code;
35370b324cSopenharmony_ci    p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */
36370b324cSopenharmony_ci    for (; p->range != 5; p->range++)
37370b324cSopenharmony_ci    {
38370b324cSopenharmony_ci      if (p->range == 1 && code != 0)
39370b324cSopenharmony_ci        return SZ_ERROR_DATA;
40370b324cSopenharmony_ci      if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
41370b324cSopenharmony_ci      {
42370b324cSopenharmony_ci        p->state = BCJ2_STREAM_RC;
43370b324cSopenharmony_ci        return SZ_OK;
44370b324cSopenharmony_ci      }
45370b324cSopenharmony_ci      code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
46370b324cSopenharmony_ci      p->code = code;
47370b324cSopenharmony_ci    }
48370b324cSopenharmony_ci    if (code == 0xffffffff)
49370b324cSopenharmony_ci      return SZ_ERROR_DATA;
50370b324cSopenharmony_ci    p->range = 0xffffffff;
51370b324cSopenharmony_ci  }
52370b324cSopenharmony_ci  // else
53370b324cSopenharmony_ci  {
54370b324cSopenharmony_ci    unsigned state = p->state;
55370b324cSopenharmony_ci    // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop
56370b324cSopenharmony_ci    if (BCJ2_IS_32BIT_STREAM(state))
57370b324cSopenharmony_ci    {
58370b324cSopenharmony_ci      const Byte *cur = p->bufs[state];
59370b324cSopenharmony_ci      if (cur == p->lims[state])
60370b324cSopenharmony_ci        return SZ_OK;
61370b324cSopenharmony_ci      p->bufs[state] = cur + 4;
62370b324cSopenharmony_ci      {
63370b324cSopenharmony_ci        const UInt32 ip = p->ip + 4;
64370b324cSopenharmony_ci        v = GetBe32a(cur) - ip;
65370b324cSopenharmony_ci        p->ip = ip;
66370b324cSopenharmony_ci      }
67370b324cSopenharmony_ci      state = BCJ2_DEC_STATE_ORIG_0;
68370b324cSopenharmony_ci    }
69370b324cSopenharmony_ci    if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4)
70370b324cSopenharmony_ci    {
71370b324cSopenharmony_ci      Byte *dest = p->dest;
72370b324cSopenharmony_ci      for (;;)
73370b324cSopenharmony_ci      {
74370b324cSopenharmony_ci        if (dest == p->destLim)
75370b324cSopenharmony_ci        {
76370b324cSopenharmony_ci          p->state = state;
77370b324cSopenharmony_ci          p->temp = v;
78370b324cSopenharmony_ci          return SZ_OK;
79370b324cSopenharmony_ci        }
80370b324cSopenharmony_ci        *dest++ = (Byte)v;
81370b324cSopenharmony_ci        p->dest = dest;
82370b324cSopenharmony_ci        if (++state == BCJ2_DEC_STATE_ORIG_3 + 1)
83370b324cSopenharmony_ci          break;
84370b324cSopenharmony_ci        v >>= 8;
85370b324cSopenharmony_ci      }
86370b324cSopenharmony_ci    }
87370b324cSopenharmony_ci  }
88370b324cSopenharmony_ci
89370b324cSopenharmony_ci  // src = p->bufs[BCJ2_STREAM_MAIN];
90370b324cSopenharmony_ci  for (;;)
91370b324cSopenharmony_ci  {
92370b324cSopenharmony_ci    /*
93370b324cSopenharmony_ci    if (BCJ2_IS_32BIT_STREAM(p->state))
94370b324cSopenharmony_ci      p->state = BCJ2_DEC_STATE_OK;
95370b324cSopenharmony_ci    else
96370b324cSopenharmony_ci    */
97370b324cSopenharmony_ci    {
98370b324cSopenharmony_ci      if (p->range < kTopValue)
99370b324cSopenharmony_ci      {
100370b324cSopenharmony_ci        if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
101370b324cSopenharmony_ci        {
102370b324cSopenharmony_ci          p->state = BCJ2_STREAM_RC;
103370b324cSopenharmony_ci          p->temp = v;
104370b324cSopenharmony_ci          return SZ_OK;
105370b324cSopenharmony_ci        }
106370b324cSopenharmony_ci        p->range <<= 8;
107370b324cSopenharmony_ci        p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
108370b324cSopenharmony_ci      }
109370b324cSopenharmony_ci      {
110370b324cSopenharmony_ci        const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
111370b324cSopenharmony_ci        const Byte *srcLim;
112370b324cSopenharmony_ci        Byte *dest = p->dest;
113370b324cSopenharmony_ci        {
114370b324cSopenharmony_ci          const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
115370b324cSopenharmony_ci          SizeT num = (SizeT)(p->destLim - dest);
116370b324cSopenharmony_ci          if (num >= rem)
117370b324cSopenharmony_ci            num = rem;
118370b324cSopenharmony_ci        #define NUM_ITERS 4
119370b324cSopenharmony_ci        #if (NUM_ITERS & (NUM_ITERS - 1)) == 0
120370b324cSopenharmony_ci          num &= ~((SizeT)NUM_ITERS - 1);   // if (NUM_ITERS == (1 << x))
121370b324cSopenharmony_ci        #else
122370b324cSopenharmony_ci          num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x))
123370b324cSopenharmony_ci        #endif
124370b324cSopenharmony_ci          srcLim = src + num;
125370b324cSopenharmony_ci        }
126370b324cSopenharmony_ci
127370b324cSopenharmony_ci        #define NUM_SHIFT_BITS  24
128370b324cSopenharmony_ci        #define ONE_ITER(indx) { \
129370b324cSopenharmony_ci          const unsigned b = src[indx]; \
130370b324cSopenharmony_ci          *dest++ = (Byte)b; \
131370b324cSopenharmony_ci          v = (v << NUM_SHIFT_BITS) | b; \
132370b324cSopenharmony_ci          if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
133370b324cSopenharmony_ci          if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
134370b324cSopenharmony_ci              ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
135370b324cSopenharmony_ci            /* ++dest */; /* v = b; */ }
136370b324cSopenharmony_ci
137370b324cSopenharmony_ci        if (src != srcLim)
138370b324cSopenharmony_ci        for (;;)
139370b324cSopenharmony_ci        {
140370b324cSopenharmony_ci            /* The dependency chain of 2-cycle for (v) calculation is not big problem here.
141370b324cSopenharmony_ci               But we can remove dependency chain with v = b in the end of loop. */
142370b324cSopenharmony_ci          ONE_ITER(0)
143370b324cSopenharmony_ci          #if (NUM_ITERS > 1)
144370b324cSopenharmony_ci            ONE_ITER(1)
145370b324cSopenharmony_ci          #if (NUM_ITERS > 2)
146370b324cSopenharmony_ci            ONE_ITER(2)
147370b324cSopenharmony_ci          #if (NUM_ITERS > 3)
148370b324cSopenharmony_ci            ONE_ITER(3)
149370b324cSopenharmony_ci          #if (NUM_ITERS > 4)
150370b324cSopenharmony_ci            ONE_ITER(4)
151370b324cSopenharmony_ci          #if (NUM_ITERS > 5)
152370b324cSopenharmony_ci            ONE_ITER(5)
153370b324cSopenharmony_ci          #if (NUM_ITERS > 6)
154370b324cSopenharmony_ci            ONE_ITER(6)
155370b324cSopenharmony_ci          #if (NUM_ITERS > 7)
156370b324cSopenharmony_ci            ONE_ITER(7)
157370b324cSopenharmony_ci          #endif
158370b324cSopenharmony_ci          #endif
159370b324cSopenharmony_ci          #endif
160370b324cSopenharmony_ci          #endif
161370b324cSopenharmony_ci          #endif
162370b324cSopenharmony_ci          #endif
163370b324cSopenharmony_ci          #endif
164370b324cSopenharmony_ci
165370b324cSopenharmony_ci          src += NUM_ITERS;
166370b324cSopenharmony_ci          if (src == srcLim)
167370b324cSopenharmony_ci            break;
168370b324cSopenharmony_ci        }
169370b324cSopenharmony_ci
170370b324cSopenharmony_ci        if (src == srcLim)
171370b324cSopenharmony_ci      #if (NUM_ITERS > 1)
172370b324cSopenharmony_ci        for (;;)
173370b324cSopenharmony_ci      #endif
174370b324cSopenharmony_ci        {
175370b324cSopenharmony_ci        #if (NUM_ITERS > 1)
176370b324cSopenharmony_ci          if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim)
177370b324cSopenharmony_ci        #endif
178370b324cSopenharmony_ci          {
179370b324cSopenharmony_ci            const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
180370b324cSopenharmony_ci            p->bufs[BCJ2_STREAM_MAIN] = src;
181370b324cSopenharmony_ci            p->dest = dest;
182370b324cSopenharmony_ci            p->ip += (UInt32)num;
183370b324cSopenharmony_ci            /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
184370b324cSopenharmony_ci            p->state =
185370b324cSopenharmony_ci              src == p->lims[BCJ2_STREAM_MAIN] ?
186370b324cSopenharmony_ci                (unsigned)BCJ2_STREAM_MAIN :
187370b324cSopenharmony_ci                (unsigned)BCJ2_DEC_STATE_ORIG;
188370b324cSopenharmony_ci            p->temp = v;
189370b324cSopenharmony_ci            return SZ_OK;
190370b324cSopenharmony_ci          }
191370b324cSopenharmony_ci        #if (NUM_ITERS > 1)
192370b324cSopenharmony_ci          ONE_ITER(0)
193370b324cSopenharmony_ci          src++;
194370b324cSopenharmony_ci        #endif
195370b324cSopenharmony_ci        }
196370b324cSopenharmony_ci
197370b324cSopenharmony_ci        {
198370b324cSopenharmony_ci          const SizeT num = (SizeT)(dest - p->dest);
199370b324cSopenharmony_ci          p->dest = dest; // p->dest += num;
200370b324cSopenharmony_ci          p->bufs[BCJ2_STREAM_MAIN] += num; // = src;
201370b324cSopenharmony_ci          p->ip += (UInt32)num;
202370b324cSopenharmony_ci        }
203370b324cSopenharmony_ci        {
204370b324cSopenharmony_ci          UInt32 bound, ttt;
205370b324cSopenharmony_ci          CBcj2Prob *prob; // unsigned index;
206370b324cSopenharmony_ci          /*
207370b324cSopenharmony_ci          prob = p->probs + (unsigned)((Byte)v == 0xe8 ?
208370b324cSopenharmony_ci              2 + (Byte)(v >> 8) :
209370b324cSopenharmony_ci              ((v >> 5) & 1));  // ((Byte)v < 0xe8 ? 0 : 1));
210370b324cSopenharmony_ci          */
211370b324cSopenharmony_ci          {
212370b324cSopenharmony_ci            const unsigned c = ((v + 0x17) >> 6) & 1;
213370b324cSopenharmony_ci            prob = p->probs + (unsigned)
214370b324cSopenharmony_ci                (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
215370b324cSopenharmony_ci                // (Byte)
216370b324cSopenharmony_ci                // 8x->0     : e9->1     : xxe8->xx+2
217370b324cSopenharmony_ci                // 8x->0x100 : e9->0x101 : xxe8->xx
218370b324cSopenharmony_ci                // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v));
219370b324cSopenharmony_ci                // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v));
220370b324cSopenharmony_ci          }
221370b324cSopenharmony_ci          ttt = *prob;
222370b324cSopenharmony_ci          bound = (p->range >> kNumBitModelTotalBits) * ttt;
223370b324cSopenharmony_ci          if (p->code < bound)
224370b324cSopenharmony_ci          {
225370b324cSopenharmony_ci            // bcj2_stats[prob - p->probs][0]++;
226370b324cSopenharmony_ci            p->range = bound;
227370b324cSopenharmony_ci            *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
228370b324cSopenharmony_ci            continue;
229370b324cSopenharmony_ci          }
230370b324cSopenharmony_ci          {
231370b324cSopenharmony_ci            // bcj2_stats[prob - p->probs][1]++;
232370b324cSopenharmony_ci            p->range -= bound;
233370b324cSopenharmony_ci            p->code -= bound;
234370b324cSopenharmony_ci            *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
235370b324cSopenharmony_ci          }
236370b324cSopenharmony_ci        }
237370b324cSopenharmony_ci      }
238370b324cSopenharmony_ci    }
239370b324cSopenharmony_ci    {
240370b324cSopenharmony_ci      /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */
241370b324cSopenharmony_ci      // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
242370b324cSopenharmony_ci      const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
243370b324cSopenharmony_ci      const Byte *cur = p->bufs[cj];
244370b324cSopenharmony_ci      Byte *dest;
245370b324cSopenharmony_ci      SizeT rem;
246370b324cSopenharmony_ci      if (cur == p->lims[cj])
247370b324cSopenharmony_ci      {
248370b324cSopenharmony_ci        p->state = cj;
249370b324cSopenharmony_ci        break;
250370b324cSopenharmony_ci      }
251370b324cSopenharmony_ci      v = GetBe32a(cur);
252370b324cSopenharmony_ci      p->bufs[cj] = cur + 4;
253370b324cSopenharmony_ci      {
254370b324cSopenharmony_ci        const UInt32 ip = p->ip + 4;
255370b324cSopenharmony_ci        v -= ip;
256370b324cSopenharmony_ci        p->ip = ip;
257370b324cSopenharmony_ci      }
258370b324cSopenharmony_ci      dest = p->dest;
259370b324cSopenharmony_ci      rem = (SizeT)(p->destLim - dest);
260370b324cSopenharmony_ci      if (rem < 4)
261370b324cSopenharmony_ci      {
262370b324cSopenharmony_ci        if ((unsigned)rem > 0) { dest[0] = (Byte)v;  v >>= 8;
263370b324cSopenharmony_ci        if ((unsigned)rem > 1) { dest[1] = (Byte)v;  v >>= 8;
264370b324cSopenharmony_ci        if ((unsigned)rem > 2) { dest[2] = (Byte)v;  v >>= 8; }}}
265370b324cSopenharmony_ci        p->temp = v;
266370b324cSopenharmony_ci        p->dest = dest + rem;
267370b324cSopenharmony_ci        p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
268370b324cSopenharmony_ci        break;
269370b324cSopenharmony_ci      }
270370b324cSopenharmony_ci      SetUi32(dest, v)
271370b324cSopenharmony_ci      v >>= 24;
272370b324cSopenharmony_ci      p->dest = dest + 4;
273370b324cSopenharmony_ci    }
274370b324cSopenharmony_ci  }
275370b324cSopenharmony_ci
276370b324cSopenharmony_ci  if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC])
277370b324cSopenharmony_ci  {
278370b324cSopenharmony_ci    p->range <<= 8;
279370b324cSopenharmony_ci    p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
280370b324cSopenharmony_ci  }
281370b324cSopenharmony_ci  return SZ_OK;
282370b324cSopenharmony_ci}
283370b324cSopenharmony_ci
284370b324cSopenharmony_ci#undef NUM_ITERS
285370b324cSopenharmony_ci#undef ONE_ITER
286370b324cSopenharmony_ci#undef NUM_SHIFT_BITS
287370b324cSopenharmony_ci#undef kTopValue
288370b324cSopenharmony_ci#undef kNumBitModelTotalBits
289370b324cSopenharmony_ci#undef kBitModelTotal
290370b324cSopenharmony_ci#undef kNumMoveBits
291