xref: /third_party/lzma/C/Bcj2Enc.c (revision 370b324c)
1370b324cSopenharmony_ci/* Bcj2Enc.c -- BCJ2 Encoder converter for x86 code (Branch CALL/JUMP variant2)
2370b324cSopenharmony_ci2023-04-02 : Igor Pavlov : Public domain */
3370b324cSopenharmony_ci
4370b324cSopenharmony_ci#include "Precomp.h"
5370b324cSopenharmony_ci
6370b324cSopenharmony_ci/* #define SHOW_STAT */
7370b324cSopenharmony_ci#ifdef SHOW_STAT
8370b324cSopenharmony_ci#include <stdio.h>
9370b324cSopenharmony_ci#define PRF2(s) printf("%s ip=%8x  tempPos=%d  src= %8x\n", s, (unsigned)p->ip64, p->tempPos, (unsigned)(p->srcLim - p->src));
10370b324cSopenharmony_ci#else
11370b324cSopenharmony_ci#define PRF2(s)
12370b324cSopenharmony_ci#endif
13370b324cSopenharmony_ci
14370b324cSopenharmony_ci#include "Bcj2.h"
15370b324cSopenharmony_ci#include "CpuArch.h"
16370b324cSopenharmony_ci
17370b324cSopenharmony_ci#define kTopValue ((UInt32)1 << 24)
18370b324cSopenharmony_ci#define kNumBitModelTotalBits 11
19370b324cSopenharmony_ci#define kBitModelTotal (1 << kNumBitModelTotalBits)
20370b324cSopenharmony_ci#define kNumMoveBits 5
21370b324cSopenharmony_ci
22370b324cSopenharmony_civoid Bcj2Enc_Init(CBcj2Enc *p)
23370b324cSopenharmony_ci{
24370b324cSopenharmony_ci  unsigned i;
25370b324cSopenharmony_ci  p->state = BCJ2_ENC_STATE_ORIG;
26370b324cSopenharmony_ci  p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
27370b324cSopenharmony_ci  p->context = 0;
28370b324cSopenharmony_ci  p->flushRem = 5;
29370b324cSopenharmony_ci  p->isFlushState = 0;
30370b324cSopenharmony_ci  p->cache = 0;
31370b324cSopenharmony_ci  p->range = 0xffffffff;
32370b324cSopenharmony_ci  p->low = 0;
33370b324cSopenharmony_ci  p->cacheSize = 1;
34370b324cSopenharmony_ci  p->ip64 = 0;
35370b324cSopenharmony_ci  p->fileIp64 = 0;
36370b324cSopenharmony_ci  p->fileSize64_minus1 = BCJ2_ENC_FileSizeField_UNLIMITED;
37370b324cSopenharmony_ci  p->relatLimit = BCJ2_ENC_RELAT_LIMIT_DEFAULT;
38370b324cSopenharmony_ci  // p->relatExcludeBits = 0;
39370b324cSopenharmony_ci  p->tempPos = 0;
40370b324cSopenharmony_ci  for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
41370b324cSopenharmony_ci    p->probs[i] = kBitModelTotal >> 1;
42370b324cSopenharmony_ci}
43370b324cSopenharmony_ci
44370b324cSopenharmony_ci// Z7_NO_INLINE
45370b324cSopenharmony_ciZ7_FORCE_INLINE
46370b324cSopenharmony_cistatic BoolInt Bcj2_RangeEnc_ShiftLow(CBcj2Enc *p)
47370b324cSopenharmony_ci{
48370b324cSopenharmony_ci  const UInt32 low = (UInt32)p->low;
49370b324cSopenharmony_ci  const unsigned high = (unsigned)
50370b324cSopenharmony_ci    #if defined(Z7_MSC_VER_ORIGINAL) \
51370b324cSopenharmony_ci        && defined(MY_CPU_X86) \
52370b324cSopenharmony_ci        && defined(MY_CPU_LE) \
53370b324cSopenharmony_ci        && !defined(MY_CPU_64BIT)
54370b324cSopenharmony_ci      // we try to rid of __aullshr() call in MSVS-x86
55370b324cSopenharmony_ci      (((const UInt32 *)&p->low)[1]); // [1] : for little-endian only
56370b324cSopenharmony_ci    #else
57370b324cSopenharmony_ci      (p->low >> 32);
58370b324cSopenharmony_ci    #endif
59370b324cSopenharmony_ci  if (low < (UInt32)0xff000000 || high != 0)
60370b324cSopenharmony_ci  {
61370b324cSopenharmony_ci    Byte *buf = p->bufs[BCJ2_STREAM_RC];
62370b324cSopenharmony_ci    do
63370b324cSopenharmony_ci    {
64370b324cSopenharmony_ci      if (buf == p->lims[BCJ2_STREAM_RC])
65370b324cSopenharmony_ci      {
66370b324cSopenharmony_ci        p->state = BCJ2_STREAM_RC;
67370b324cSopenharmony_ci        p->bufs[BCJ2_STREAM_RC] = buf;
68370b324cSopenharmony_ci        return True;
69370b324cSopenharmony_ci      }
70370b324cSopenharmony_ci      *buf++ = (Byte)(p->cache + high);
71370b324cSopenharmony_ci      p->cache = 0xff;
72370b324cSopenharmony_ci    }
73370b324cSopenharmony_ci    while (--p->cacheSize);
74370b324cSopenharmony_ci    p->bufs[BCJ2_STREAM_RC] = buf;
75370b324cSopenharmony_ci    p->cache = (Byte)(low >> 24);
76370b324cSopenharmony_ci  }
77370b324cSopenharmony_ci  p->cacheSize++;
78370b324cSopenharmony_ci  p->low = low << 8;
79370b324cSopenharmony_ci  return False;
80370b324cSopenharmony_ci}
81370b324cSopenharmony_ci
82370b324cSopenharmony_ci
83370b324cSopenharmony_ci/*
84370b324cSopenharmony_ciWe can use 2 alternative versions of code:
85370b324cSopenharmony_ci1) non-marker version:
86370b324cSopenharmony_ci  Byte CBcj2Enc::context
87370b324cSopenharmony_ci  Byte temp[8];
88370b324cSopenharmony_ci  Last byte of marker (e8/e9/[0f]8x) can be written to temp[] buffer.
89370b324cSopenharmony_ci  Encoder writes last byte of marker (e8/e9/[0f]8x) to dest, only in conjunction
90370b324cSopenharmony_ci  with writing branch symbol to range coder in same Bcj2Enc_Encode_2() call.
91370b324cSopenharmony_ci
92370b324cSopenharmony_ci2) marker version:
93370b324cSopenharmony_ci  UInt32 CBcj2Enc::context
94370b324cSopenharmony_ci  Byte CBcj2Enc::temp[4];
95370b324cSopenharmony_ci  MARKER_FLAG in CBcj2Enc::context shows that CBcj2Enc::context contains finded marker.
96370b324cSopenharmony_ci  it's allowed that
97370b324cSopenharmony_ci    one call of Bcj2Enc_Encode_2() writes last byte of marker (e8/e9/[0f]8x) to dest,
98370b324cSopenharmony_ci    and another call of Bcj2Enc_Encode_2() does offset conversion.
99370b324cSopenharmony_ci    So different values of (fileIp) and (fileSize) are possible
100370b324cSopenharmony_ci    in these different Bcj2Enc_Encode_2() calls.
101370b324cSopenharmony_ci
102370b324cSopenharmony_ciAlso marker version requires additional if((v & MARKER_FLAG) == 0) check in main loop.
103370b324cSopenharmony_ciSo we use non-marker version.
104370b324cSopenharmony_ci*/
105370b324cSopenharmony_ci
106370b324cSopenharmony_ci/*
107370b324cSopenharmony_ci  Corner cases with overlap in multi-block.
108370b324cSopenharmony_ci  before v23: there was one corner case, where converted instruction
109370b324cSopenharmony_ci    could start in one sub-stream and finish in next sub-stream.
110370b324cSopenharmony_ci  If multi-block (solid) encoding is used,
111370b324cSopenharmony_ci    and BCJ2_ENC_FINISH_MODE_END_BLOCK is used for each sub-stream.
112370b324cSopenharmony_ci    and (0f) is last byte of previous sub-stream
113370b324cSopenharmony_ci    and (8x) is first byte of current sub-stream
114370b324cSopenharmony_ci  then (0f 8x) pair is treated as marker by BCJ2 encoder and decoder.
115370b324cSopenharmony_ci  BCJ2 encoder can converts 32-bit offset for that (0f 8x) cortage,
116370b324cSopenharmony_ci  if that offset meets limit requirements.
117370b324cSopenharmony_ci  If encoder allows 32-bit offset conversion for such overlap case,
118370b324cSopenharmony_ci  then the data in 3 uncompressed BCJ2 streams for some sub-stream
119370b324cSopenharmony_ci  can depend from data of previous sub-stream.
120370b324cSopenharmony_ci  That corner case is not big problem, and it's rare case.
121370b324cSopenharmony_ci  Since v23.00 we do additional check to prevent conversions in such overlap cases.
122370b324cSopenharmony_ci*/
123370b324cSopenharmony_ci
124370b324cSopenharmony_ci/*
125370b324cSopenharmony_ci  Bcj2Enc_Encode_2() output variables at exit:
126370b324cSopenharmony_ci  {
127370b324cSopenharmony_ci    if (Bcj2Enc_Encode_2() exits with (p->state == BCJ2_ENC_STATE_ORIG))
128370b324cSopenharmony_ci    {
129370b324cSopenharmony_ci      it means that encoder needs more input data.
130370b324cSopenharmony_ci      if (p->srcLim == p->src) at exit, then
131370b324cSopenharmony_ci      {
132370b324cSopenharmony_ci        (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
133370b324cSopenharmony_ci        all input data were read and processed, and we are ready for
134370b324cSopenharmony_ci        new input data.
135370b324cSopenharmony_ci      }
136370b324cSopenharmony_ci      else
137370b324cSopenharmony_ci      {
138370b324cSopenharmony_ci        (p->srcLim != p->src)
139370b324cSopenharmony_ci        (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
140370b324cSopenharmony_ci          The encoder have found e8/e9/0f_8x marker,
141370b324cSopenharmony_ci          and p->src points to last byte of that marker,
142370b324cSopenharmony_ci          Bcj2Enc_Encode_2() needs more input data to get totally
143370b324cSopenharmony_ci          5 bytes (last byte of marker and 32-bit branch offset)
144370b324cSopenharmony_ci          as continuous array starting from p->src.
145370b324cSopenharmony_ci        (p->srcLim - p->src < 5) requirement is met after exit.
146370b324cSopenharmony_ci          So non-processed resedue from p->src to p->srcLim is always less than 5 bytes.
147370b324cSopenharmony_ci      }
148370b324cSopenharmony_ci    }
149370b324cSopenharmony_ci  }
150370b324cSopenharmony_ci*/
151370b324cSopenharmony_ci
152370b324cSopenharmony_ciZ7_NO_INLINE
153370b324cSopenharmony_cistatic void Bcj2Enc_Encode_2(CBcj2Enc *p)
154370b324cSopenharmony_ci{
155370b324cSopenharmony_ci  if (!p->isFlushState)
156370b324cSopenharmony_ci  {
157370b324cSopenharmony_ci    const Byte *src;
158370b324cSopenharmony_ci    UInt32 v;
159370b324cSopenharmony_ci    {
160370b324cSopenharmony_ci      const unsigned state = p->state;
161370b324cSopenharmony_ci      if (BCJ2_IS_32BIT_STREAM(state))
162370b324cSopenharmony_ci      {
163370b324cSopenharmony_ci        Byte *cur = p->bufs[state];
164370b324cSopenharmony_ci        if (cur == p->lims[state])
165370b324cSopenharmony_ci          return;
166370b324cSopenharmony_ci        SetBe32a(cur, p->tempTarget)
167370b324cSopenharmony_ci        p->bufs[state] = cur + 4;
168370b324cSopenharmony_ci      }
169370b324cSopenharmony_ci    }
170370b324cSopenharmony_ci    p->state = BCJ2_ENC_STATE_ORIG; // for main reason of exit
171370b324cSopenharmony_ci    src = p->src;
172370b324cSopenharmony_ci    v = p->context;
173370b324cSopenharmony_ci
174370b324cSopenharmony_ci    // #define WRITE_CONTEXT  p->context = v; // for marker version
175370b324cSopenharmony_ci    #define WRITE_CONTEXT           p->context = (Byte)v;
176370b324cSopenharmony_ci    #define WRITE_CONTEXT_AND_SRC   p->src = src;  WRITE_CONTEXT
177370b324cSopenharmony_ci
178370b324cSopenharmony_ci    for (;;)
179370b324cSopenharmony_ci    {
180370b324cSopenharmony_ci      // const Byte *src;
181370b324cSopenharmony_ci      // UInt32 v;
182370b324cSopenharmony_ci      CBcj2Enc_ip_unsigned ip;
183370b324cSopenharmony_ci      if (p->range < kTopValue)
184370b324cSopenharmony_ci      {
185370b324cSopenharmony_ci        // to reduce register pressure and code size: we save and restore local variables.
186370b324cSopenharmony_ci        WRITE_CONTEXT_AND_SRC
187370b324cSopenharmony_ci        if (Bcj2_RangeEnc_ShiftLow(p))
188370b324cSopenharmony_ci          return;
189370b324cSopenharmony_ci        p->range <<= 8;
190370b324cSopenharmony_ci        src = p->src;
191370b324cSopenharmony_ci        v = p->context;
192370b324cSopenharmony_ci      }
193370b324cSopenharmony_ci      // src = p->src;
194370b324cSopenharmony_ci      // #define MARKER_FLAG  ((UInt32)1 << 17)
195370b324cSopenharmony_ci      // if ((v & MARKER_FLAG) == 0) // for marker version
196370b324cSopenharmony_ci      {
197370b324cSopenharmony_ci        const Byte *srcLim;
198370b324cSopenharmony_ci        Byte *dest = p->bufs[BCJ2_STREAM_MAIN];
199370b324cSopenharmony_ci        {
200370b324cSopenharmony_ci          const SizeT remSrc = (SizeT)(p->srcLim - src);
201370b324cSopenharmony_ci          SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);
202370b324cSopenharmony_ci          if (rem >= remSrc)
203370b324cSopenharmony_ci            rem = remSrc;
204370b324cSopenharmony_ci          srcLim = src + rem;
205370b324cSopenharmony_ci        }
206370b324cSopenharmony_ci        /* p->context contains context of previous byte:
207370b324cSopenharmony_ci           bits [0 : 7]  : src[-1], if (src) was changed in this call
208370b324cSopenharmony_ci           bits [8 : 31] : are undefined for non-marker version
209370b324cSopenharmony_ci        */
210370b324cSopenharmony_ci        // v = p->context;
211370b324cSopenharmony_ci        #define NUM_SHIFT_BITS  24
212370b324cSopenharmony_ci        #define CONV_FLAG  ((UInt32)1 << 16)
213370b324cSopenharmony_ci        #define ONE_ITER { \
214370b324cSopenharmony_ci          b = src[0]; \
215370b324cSopenharmony_ci          *dest++ = (Byte)b; \
216370b324cSopenharmony_ci          v = (v << NUM_SHIFT_BITS) | b; \
217370b324cSopenharmony_ci          if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
218370b324cSopenharmony_ci          if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
219370b324cSopenharmony_ci              ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
220370b324cSopenharmony_ci          src++; if (src == srcLim) { break; } }
221370b324cSopenharmony_ci
222370b324cSopenharmony_ci        if (src != srcLim)
223370b324cSopenharmony_ci        for (;;)
224370b324cSopenharmony_ci        {
225370b324cSopenharmony_ci          /* clang can generate ineffective code with setne instead of two jcc instructions.
226370b324cSopenharmony_ci             we can use 2 iterations and external (unsigned b) to avoid that ineffective code genaration. */
227370b324cSopenharmony_ci          unsigned b;
228370b324cSopenharmony_ci          ONE_ITER
229370b324cSopenharmony_ci          ONE_ITER
230370b324cSopenharmony_ci        }
231370b324cSopenharmony_ci
232370b324cSopenharmony_ci        ip = p->ip64 + (CBcj2Enc_ip_unsigned)(SizeT)(dest - p->bufs[BCJ2_STREAM_MAIN]);
233370b324cSopenharmony_ci        p->bufs[BCJ2_STREAM_MAIN] = dest;
234370b324cSopenharmony_ci        p->ip64 = ip;
235370b324cSopenharmony_ci
236370b324cSopenharmony_ci        if (src == srcLim)
237370b324cSopenharmony_ci        {
238370b324cSopenharmony_ci          WRITE_CONTEXT_AND_SRC
239370b324cSopenharmony_ci          if (src != p->srcLim)
240370b324cSopenharmony_ci          {
241370b324cSopenharmony_ci            p->state = BCJ2_STREAM_MAIN;
242370b324cSopenharmony_ci            return;
243370b324cSopenharmony_ci          }
244370b324cSopenharmony_ci          /* (p->src == p->srcLim)
245370b324cSopenharmony_ci          (p->state == BCJ2_ENC_STATE_ORIG) */
246370b324cSopenharmony_ci          if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
247370b324cSopenharmony_ci            return;
248370b324cSopenharmony_ci          /* (p->finishMode == BCJ2_ENC_FINISH_MODE_END_STREAM */
249370b324cSopenharmony_ci          // (p->flushRem == 5);
250370b324cSopenharmony_ci          p->isFlushState = 1;
251370b324cSopenharmony_ci          break;
252370b324cSopenharmony_ci        }
253370b324cSopenharmony_ci        src++;
254370b324cSopenharmony_ci        // p->src = src;
255370b324cSopenharmony_ci      }
256370b324cSopenharmony_ci      // ip = p->ip; // for marker version
257370b324cSopenharmony_ci      /* marker was found */
258370b324cSopenharmony_ci      /* (v) contains marker that was found:
259370b324cSopenharmony_ci           bits [NUM_SHIFT_BITS : NUM_SHIFT_BITS + 7]
260370b324cSopenharmony_ci                         : value of src[-2] : xx/xx/0f
261370b324cSopenharmony_ci           bits [0 : 7]  : value of src[-1] : e8/e9/8x
262370b324cSopenharmony_ci      */
263370b324cSopenharmony_ci      {
264370b324cSopenharmony_ci        {
265370b324cSopenharmony_ci        #if NUM_SHIFT_BITS != 24
266370b324cSopenharmony_ci          v &= ~(UInt32)CONV_FLAG;
267370b324cSopenharmony_ci        #endif
268370b324cSopenharmony_ci          // UInt32 relat = 0;
269370b324cSopenharmony_ci          if ((SizeT)(p->srcLim - src) >= 4)
270370b324cSopenharmony_ci          {
271370b324cSopenharmony_ci            /*
272370b324cSopenharmony_ci            if (relat != 0 || (Byte)v != 0xe8)
273370b324cSopenharmony_ci            BoolInt isBigOffset = True;
274370b324cSopenharmony_ci            */
275370b324cSopenharmony_ci            const UInt32 relat = GetUi32(src);
276370b324cSopenharmony_ci            /*
277370b324cSopenharmony_ci            #define EXCLUDE_FLAG  ((UInt32)1 << 4)
278370b324cSopenharmony_ci            #define NEED_CONVERT(rel) ((((rel) + EXCLUDE_FLAG) & (0 - EXCLUDE_FLAG * 2)) != 0)
279370b324cSopenharmony_ci            if (p->relatExcludeBits != 0)
280370b324cSopenharmony_ci            {
281370b324cSopenharmony_ci              const UInt32 flag = (UInt32)1 << (p->relatExcludeBits - 1);
282370b324cSopenharmony_ci              isBigOffset = (((relat + flag) & (0 - flag * 2)) != 0);
283370b324cSopenharmony_ci            }
284370b324cSopenharmony_ci            // isBigOffset = False; // for debug
285370b324cSopenharmony_ci            */
286370b324cSopenharmony_ci            ip -= p->fileIp64;
287370b324cSopenharmony_ci            // Use the following if check, if (ip) is 64-bit:
288370b324cSopenharmony_ci            if (ip > (((v + 0x20) >> 5) & 1))  // 23.00 : we eliminate milti-block overlap for (Of 80) and (e8/e9)
289370b324cSopenharmony_ci            if ((CBcj2Enc_ip_unsigned)((CBcj2Enc_ip_signed)ip + 4 + (Int32)relat) <= p->fileSize64_minus1)
290370b324cSopenharmony_ci            if (((UInt32)(relat + p->relatLimit) >> 1) < p->relatLimit)
291370b324cSopenharmony_ci              v |= CONV_FLAG;
292370b324cSopenharmony_ci          }
293370b324cSopenharmony_ci          else if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
294370b324cSopenharmony_ci          {
295370b324cSopenharmony_ci            // (p->srcLim - src < 4)
296370b324cSopenharmony_ci            // /*
297370b324cSopenharmony_ci            // for non-marker version
298370b324cSopenharmony_ci            p->ip64--; // p->ip = ip - 1;
299370b324cSopenharmony_ci            p->bufs[BCJ2_STREAM_MAIN]--;
300370b324cSopenharmony_ci            src--;
301370b324cSopenharmony_ci            v >>= NUM_SHIFT_BITS;
302370b324cSopenharmony_ci            // (0 < p->srcLim - p->src <= 4)
303370b324cSopenharmony_ci            // */
304370b324cSopenharmony_ci            // v |= MARKER_FLAG; // for marker version
305370b324cSopenharmony_ci            /* (p->state == BCJ2_ENC_STATE_ORIG) */
306370b324cSopenharmony_ci            WRITE_CONTEXT_AND_SRC
307370b324cSopenharmony_ci            return;
308370b324cSopenharmony_ci          }
309370b324cSopenharmony_ci          {
310370b324cSopenharmony_ci            const unsigned c = ((v + 0x17) >> 6) & 1;
311370b324cSopenharmony_ci            CBcj2Prob *prob = p->probs + (unsigned)
312370b324cSopenharmony_ci                (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
313370b324cSopenharmony_ci            /*
314370b324cSopenharmony_ci                ((Byte)v == 0xe8 ? 2 + ((Byte)(v >> 8)) :
315370b324cSopenharmony_ci                ((Byte)v < 0xe8 ? 0 : 1));  // ((v >> 5) & 1));
316370b324cSopenharmony_ci            */
317370b324cSopenharmony_ci            const unsigned ttt = *prob;
318370b324cSopenharmony_ci            const UInt32 bound = (p->range >> kNumBitModelTotalBits) * ttt;
319370b324cSopenharmony_ci            if ((v & CONV_FLAG) == 0)
320370b324cSopenharmony_ci            {
321370b324cSopenharmony_ci              // static int yyy = 0; yyy++; printf("\n!needConvert = %d\n", yyy);
322370b324cSopenharmony_ci              // v = (Byte)v; // for marker version
323370b324cSopenharmony_ci              p->range = bound;
324370b324cSopenharmony_ci              *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
325370b324cSopenharmony_ci              // WRITE_CONTEXT_AND_SRC
326370b324cSopenharmony_ci              continue;
327370b324cSopenharmony_ci            }
328370b324cSopenharmony_ci            p->low += bound;
329370b324cSopenharmony_ci            p->range -= bound;
330370b324cSopenharmony_ci            *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
331370b324cSopenharmony_ci          }
332370b324cSopenharmony_ci          // p->context = src[3];
333370b324cSopenharmony_ci          {
334370b324cSopenharmony_ci            // const unsigned cj = ((Byte)v == 0xe8 ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP);
335370b324cSopenharmony_ci            const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
336370b324cSopenharmony_ci            ip = p->ip64;
337370b324cSopenharmony_ci            v = GetUi32(src); // relat
338370b324cSopenharmony_ci            ip += 4;
339370b324cSopenharmony_ci            p->ip64 = ip;
340370b324cSopenharmony_ci            src += 4;
341370b324cSopenharmony_ci            // p->src = src;
342370b324cSopenharmony_ci            {
343370b324cSopenharmony_ci              const UInt32 absol = (UInt32)ip + v;
344370b324cSopenharmony_ci              Byte *cur = p->bufs[cj];
345370b324cSopenharmony_ci              v >>= 24;
346370b324cSopenharmony_ci              // WRITE_CONTEXT
347370b324cSopenharmony_ci              if (cur == p->lims[cj])
348370b324cSopenharmony_ci              {
349370b324cSopenharmony_ci                p->state = cj;
350370b324cSopenharmony_ci                p->tempTarget = absol;
351370b324cSopenharmony_ci                WRITE_CONTEXT_AND_SRC
352370b324cSopenharmony_ci                return;
353370b324cSopenharmony_ci              }
354370b324cSopenharmony_ci              SetBe32a(cur, absol)
355370b324cSopenharmony_ci              p->bufs[cj] = cur + 4;
356370b324cSopenharmony_ci            }
357370b324cSopenharmony_ci          }
358370b324cSopenharmony_ci        }
359370b324cSopenharmony_ci      }
360370b324cSopenharmony_ci    } // end of loop
361370b324cSopenharmony_ci  }
362370b324cSopenharmony_ci
363370b324cSopenharmony_ci  for (; p->flushRem != 0; p->flushRem--)
364370b324cSopenharmony_ci    if (Bcj2_RangeEnc_ShiftLow(p))
365370b324cSopenharmony_ci      return;
366370b324cSopenharmony_ci  p->state = BCJ2_ENC_STATE_FINISHED;
367370b324cSopenharmony_ci}
368370b324cSopenharmony_ci
369370b324cSopenharmony_ci
370370b324cSopenharmony_ci/*
371370b324cSopenharmony_ciBCJ2 encoder needs look ahead for up to 4 bytes in (src) buffer.
372370b324cSopenharmony_ciSo base function Bcj2Enc_Encode_2()
373370b324cSopenharmony_ci  in BCJ2_ENC_FINISH_MODE_CONTINUE mode can return with
374370b324cSopenharmony_ci  (p->state == BCJ2_ENC_STATE_ORIG && p->src < p->srcLim)
375370b324cSopenharmony_ciBcj2Enc_Encode() solves that look ahead problem by using p->temp[] buffer.
376370b324cSopenharmony_ci  so if (p->state == BCJ2_ENC_STATE_ORIG) after Bcj2Enc_Encode(),
377370b324cSopenharmony_ci    then (p->src == p->srcLim).
378370b324cSopenharmony_ci  And the caller's code is simpler with Bcj2Enc_Encode().
379370b324cSopenharmony_ci*/
380370b324cSopenharmony_ci
381370b324cSopenharmony_ciZ7_NO_INLINE
382370b324cSopenharmony_civoid Bcj2Enc_Encode(CBcj2Enc *p)
383370b324cSopenharmony_ci{
384370b324cSopenharmony_ci  PRF2("\n----")
385370b324cSopenharmony_ci  if (p->tempPos != 0)
386370b324cSopenharmony_ci  {
387370b324cSopenharmony_ci    /* extra: number of bytes that were copied from (src) to (temp) buffer in this call */
388370b324cSopenharmony_ci    unsigned extra = 0;
389370b324cSopenharmony_ci    /* We will touch only minimal required number of bytes in input (src) stream.
390370b324cSopenharmony_ci       So we will add input bytes from (src) stream to temp[] with step of 1 byte.
391370b324cSopenharmony_ci       We don't add new bytes to temp[] before Bcj2Enc_Encode_2() call
392370b324cSopenharmony_ci         in first loop iteration because
393370b324cSopenharmony_ci         - previous call of Bcj2Enc_Encode() could use another (finishMode),
394370b324cSopenharmony_ci         - previous call could finish with (p->state != BCJ2_ENC_STATE_ORIG).
395370b324cSopenharmony_ci       the case with full temp[] buffer (p->tempPos == 4) is possible here.
396370b324cSopenharmony_ci    */
397370b324cSopenharmony_ci    for (;;)
398370b324cSopenharmony_ci    {
399370b324cSopenharmony_ci      // (0 < p->tempPos <= 5) // in non-marker version
400370b324cSopenharmony_ci      /* p->src : the current src data position including extra bytes
401370b324cSopenharmony_ci                  that were copied to temp[] buffer in this call */
402370b324cSopenharmony_ci      const Byte *src = p->src;
403370b324cSopenharmony_ci      const Byte *srcLim = p->srcLim;
404370b324cSopenharmony_ci      const EBcj2Enc_FinishMode finishMode = p->finishMode;
405370b324cSopenharmony_ci      if (src != srcLim)
406370b324cSopenharmony_ci      {
407370b324cSopenharmony_ci        /* if there are some src data after the data copied to temp[],
408370b324cSopenharmony_ci           then we use MODE_CONTINUE for temp data */
409370b324cSopenharmony_ci        p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
410370b324cSopenharmony_ci      }
411370b324cSopenharmony_ci      p->src = p->temp;
412370b324cSopenharmony_ci      p->srcLim = p->temp + p->tempPos;
413370b324cSopenharmony_ci      PRF2("    ")
414370b324cSopenharmony_ci      Bcj2Enc_Encode_2(p);
415370b324cSopenharmony_ci      {
416370b324cSopenharmony_ci        const unsigned num = (unsigned)(p->src - p->temp);
417370b324cSopenharmony_ci        const unsigned tempPos = p->tempPos - num;
418370b324cSopenharmony_ci        unsigned i;
419370b324cSopenharmony_ci        p->tempPos = tempPos;
420370b324cSopenharmony_ci        for (i = 0; i < tempPos; i++)
421370b324cSopenharmony_ci          p->temp[i] = p->temp[(SizeT)i + num];
422370b324cSopenharmony_ci        // tempPos : number of bytes in temp buffer
423370b324cSopenharmony_ci        p->src = src;
424370b324cSopenharmony_ci        p->srcLim = srcLim;
425370b324cSopenharmony_ci        p->finishMode = finishMode;
426370b324cSopenharmony_ci        if (p->state != BCJ2_ENC_STATE_ORIG)
427370b324cSopenharmony_ci        {
428370b324cSopenharmony_ci          // (p->tempPos <= 4) // in non-marker version
429370b324cSopenharmony_ci          /* if (the reason of exit from Bcj2Enc_Encode_2()
430370b324cSopenharmony_ci                 is not BCJ2_ENC_STATE_ORIG),
431370b324cSopenharmony_ci             then we exit from Bcj2Enc_Encode() with same reason */
432370b324cSopenharmony_ci          // optional code begin : we rollback (src) and tempPos, if it's possible:
433370b324cSopenharmony_ci          if (extra >= tempPos)
434370b324cSopenharmony_ci            extra = tempPos;
435370b324cSopenharmony_ci          p->src = src - extra;
436370b324cSopenharmony_ci          p->tempPos = tempPos - extra;
437370b324cSopenharmony_ci          // optional code end : rollback of (src) and tempPos
438370b324cSopenharmony_ci          return;
439370b324cSopenharmony_ci        }
440370b324cSopenharmony_ci        /* (p->tempPos <= 4)
441370b324cSopenharmony_ci           (p->state == BCJ2_ENC_STATE_ORIG)
442370b324cSopenharmony_ci             so encoder needs more data than in temp[] */
443370b324cSopenharmony_ci        if (src == srcLim)
444370b324cSopenharmony_ci          return; // src buffer has no more input data.
445370b324cSopenharmony_ci        /* (src != srcLim)
446370b324cSopenharmony_ci           so we can provide more input data from src for Bcj2Enc_Encode_2() */
447370b324cSopenharmony_ci        if (extra >= tempPos)
448370b324cSopenharmony_ci        {
449370b324cSopenharmony_ci          /* (extra >= tempPos) means that temp buffer contains
450370b324cSopenharmony_ci             only data from src buffer of this call.
451370b324cSopenharmony_ci             So now we can encode without temp buffer */
452370b324cSopenharmony_ci          p->src = src - tempPos; // rollback (src)
453370b324cSopenharmony_ci          p->tempPos = 0;
454370b324cSopenharmony_ci          break;
455370b324cSopenharmony_ci        }
456370b324cSopenharmony_ci        // we append one additional extra byte from (src) to temp[] buffer:
457370b324cSopenharmony_ci        p->temp[tempPos] = *src;
458370b324cSopenharmony_ci        p->tempPos = tempPos + 1;
459370b324cSopenharmony_ci        // (0 < p->tempPos <= 5) // in non-marker version
460370b324cSopenharmony_ci        p->src = src + 1;
461370b324cSopenharmony_ci        extra++;
462370b324cSopenharmony_ci      }
463370b324cSopenharmony_ci    }
464370b324cSopenharmony_ci  }
465370b324cSopenharmony_ci
466370b324cSopenharmony_ci  PRF2("++++")
467370b324cSopenharmony_ci  // (p->tempPos == 0)
468370b324cSopenharmony_ci  Bcj2Enc_Encode_2(p);
469370b324cSopenharmony_ci  PRF2("====")
470370b324cSopenharmony_ci
471370b324cSopenharmony_ci  if (p->state == BCJ2_ENC_STATE_ORIG)
472370b324cSopenharmony_ci  {
473370b324cSopenharmony_ci    const Byte *src = p->src;
474370b324cSopenharmony_ci    const Byte *srcLim = p->srcLim;
475370b324cSopenharmony_ci    const unsigned rem = (unsigned)(srcLim - src);
476370b324cSopenharmony_ci    /* (rem <= 4) here.
477370b324cSopenharmony_ci       if (p->src != p->srcLim), then
478370b324cSopenharmony_ci         - we copy non-processed bytes from (p->src) to temp[] buffer,
479370b324cSopenharmony_ci         - we set p->src equal to p->srcLim.
480370b324cSopenharmony_ci    */
481370b324cSopenharmony_ci    if (rem)
482370b324cSopenharmony_ci    {
483370b324cSopenharmony_ci      unsigned i = 0;
484370b324cSopenharmony_ci      p->src = srcLim;
485370b324cSopenharmony_ci      p->tempPos = rem;
486370b324cSopenharmony_ci      // (0 < p->tempPos <= 4)
487370b324cSopenharmony_ci      do
488370b324cSopenharmony_ci        p->temp[i] = src[i];
489370b324cSopenharmony_ci      while (++i != rem);
490370b324cSopenharmony_ci    }
491370b324cSopenharmony_ci    // (p->tempPos <= 4)
492370b324cSopenharmony_ci    // (p->src == p->srcLim)
493370b324cSopenharmony_ci  }
494370b324cSopenharmony_ci}
495370b324cSopenharmony_ci
496370b324cSopenharmony_ci#undef PRF2
497370b324cSopenharmony_ci#undef CONV_FLAG
498370b324cSopenharmony_ci#undef MARKER_FLAG
499370b324cSopenharmony_ci#undef WRITE_CONTEXT
500370b324cSopenharmony_ci#undef WRITE_CONTEXT_AND_SRC
501370b324cSopenharmony_ci#undef ONE_ITER
502370b324cSopenharmony_ci#undef NUM_SHIFT_BITS
503370b324cSopenharmony_ci#undef kTopValue
504370b324cSopenharmony_ci#undef kNumBitModelTotalBits
505370b324cSopenharmony_ci#undef kBitModelTotal
506370b324cSopenharmony_ci#undef kNumMoveBits
507