1370b324cSopenharmony_ci/* Bcj2Enc.c -- BCJ2 Encoder converter for x86 code (Branch CALL/JUMP variant2) 2370b324cSopenharmony_ci2023-04-02 : Igor Pavlov : Public domain */ 3370b324cSopenharmony_ci 4370b324cSopenharmony_ci#include "Precomp.h" 5370b324cSopenharmony_ci 6370b324cSopenharmony_ci/* #define SHOW_STAT */ 7370b324cSopenharmony_ci#ifdef SHOW_STAT 8370b324cSopenharmony_ci#include <stdio.h> 9370b324cSopenharmony_ci#define PRF2(s) printf("%s ip=%8x tempPos=%d src= %8x\n", s, (unsigned)p->ip64, p->tempPos, (unsigned)(p->srcLim - p->src)); 10370b324cSopenharmony_ci#else 11370b324cSopenharmony_ci#define PRF2(s) 12370b324cSopenharmony_ci#endif 13370b324cSopenharmony_ci 14370b324cSopenharmony_ci#include "Bcj2.h" 15370b324cSopenharmony_ci#include "CpuArch.h" 16370b324cSopenharmony_ci 17370b324cSopenharmony_ci#define kTopValue ((UInt32)1 << 24) 18370b324cSopenharmony_ci#define kNumBitModelTotalBits 11 19370b324cSopenharmony_ci#define kBitModelTotal (1 << kNumBitModelTotalBits) 20370b324cSopenharmony_ci#define kNumMoveBits 5 21370b324cSopenharmony_ci 22370b324cSopenharmony_civoid Bcj2Enc_Init(CBcj2Enc *p) 23370b324cSopenharmony_ci{ 24370b324cSopenharmony_ci unsigned i; 25370b324cSopenharmony_ci p->state = BCJ2_ENC_STATE_ORIG; 26370b324cSopenharmony_ci p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; 27370b324cSopenharmony_ci p->context = 0; 28370b324cSopenharmony_ci p->flushRem = 5; 29370b324cSopenharmony_ci p->isFlushState = 0; 30370b324cSopenharmony_ci p->cache = 0; 31370b324cSopenharmony_ci p->range = 0xffffffff; 32370b324cSopenharmony_ci p->low = 0; 33370b324cSopenharmony_ci p->cacheSize = 1; 34370b324cSopenharmony_ci p->ip64 = 0; 35370b324cSopenharmony_ci p->fileIp64 = 0; 36370b324cSopenharmony_ci p->fileSize64_minus1 = BCJ2_ENC_FileSizeField_UNLIMITED; 37370b324cSopenharmony_ci p->relatLimit = BCJ2_ENC_RELAT_LIMIT_DEFAULT; 38370b324cSopenharmony_ci // p->relatExcludeBits = 0; 39370b324cSopenharmony_ci p->tempPos = 0; 40370b324cSopenharmony_ci for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) 41370b324cSopenharmony_ci p->probs[i] = kBitModelTotal >> 1; 42370b324cSopenharmony_ci} 43370b324cSopenharmony_ci 44370b324cSopenharmony_ci// Z7_NO_INLINE 45370b324cSopenharmony_ciZ7_FORCE_INLINE 46370b324cSopenharmony_cistatic BoolInt Bcj2_RangeEnc_ShiftLow(CBcj2Enc *p) 47370b324cSopenharmony_ci{ 48370b324cSopenharmony_ci const UInt32 low = (UInt32)p->low; 49370b324cSopenharmony_ci const unsigned high = (unsigned) 50370b324cSopenharmony_ci #if defined(Z7_MSC_VER_ORIGINAL) \ 51370b324cSopenharmony_ci && defined(MY_CPU_X86) \ 52370b324cSopenharmony_ci && defined(MY_CPU_LE) \ 53370b324cSopenharmony_ci && !defined(MY_CPU_64BIT) 54370b324cSopenharmony_ci // we try to rid of __aullshr() call in MSVS-x86 55370b324cSopenharmony_ci (((const UInt32 *)&p->low)[1]); // [1] : for little-endian only 56370b324cSopenharmony_ci #else 57370b324cSopenharmony_ci (p->low >> 32); 58370b324cSopenharmony_ci #endif 59370b324cSopenharmony_ci if (low < (UInt32)0xff000000 || high != 0) 60370b324cSopenharmony_ci { 61370b324cSopenharmony_ci Byte *buf = p->bufs[BCJ2_STREAM_RC]; 62370b324cSopenharmony_ci do 63370b324cSopenharmony_ci { 64370b324cSopenharmony_ci if (buf == p->lims[BCJ2_STREAM_RC]) 65370b324cSopenharmony_ci { 66370b324cSopenharmony_ci p->state = BCJ2_STREAM_RC; 67370b324cSopenharmony_ci p->bufs[BCJ2_STREAM_RC] = buf; 68370b324cSopenharmony_ci return True; 69370b324cSopenharmony_ci } 70370b324cSopenharmony_ci *buf++ = (Byte)(p->cache + high); 71370b324cSopenharmony_ci p->cache = 0xff; 72370b324cSopenharmony_ci } 73370b324cSopenharmony_ci while (--p->cacheSize); 74370b324cSopenharmony_ci p->bufs[BCJ2_STREAM_RC] = buf; 75370b324cSopenharmony_ci p->cache = (Byte)(low >> 24); 76370b324cSopenharmony_ci } 77370b324cSopenharmony_ci p->cacheSize++; 78370b324cSopenharmony_ci p->low = low << 8; 79370b324cSopenharmony_ci return False; 80370b324cSopenharmony_ci} 81370b324cSopenharmony_ci 82370b324cSopenharmony_ci 83370b324cSopenharmony_ci/* 84370b324cSopenharmony_ciWe can use 2 alternative versions of code: 85370b324cSopenharmony_ci1) non-marker version: 86370b324cSopenharmony_ci Byte CBcj2Enc::context 87370b324cSopenharmony_ci Byte temp[8]; 88370b324cSopenharmony_ci Last byte of marker (e8/e9/[0f]8x) can be written to temp[] buffer. 89370b324cSopenharmony_ci Encoder writes last byte of marker (e8/e9/[0f]8x) to dest, only in conjunction 90370b324cSopenharmony_ci with writing branch symbol to range coder in same Bcj2Enc_Encode_2() call. 91370b324cSopenharmony_ci 92370b324cSopenharmony_ci2) marker version: 93370b324cSopenharmony_ci UInt32 CBcj2Enc::context 94370b324cSopenharmony_ci Byte CBcj2Enc::temp[4]; 95370b324cSopenharmony_ci MARKER_FLAG in CBcj2Enc::context shows that CBcj2Enc::context contains finded marker. 96370b324cSopenharmony_ci it's allowed that 97370b324cSopenharmony_ci one call of Bcj2Enc_Encode_2() writes last byte of marker (e8/e9/[0f]8x) to dest, 98370b324cSopenharmony_ci and another call of Bcj2Enc_Encode_2() does offset conversion. 99370b324cSopenharmony_ci So different values of (fileIp) and (fileSize) are possible 100370b324cSopenharmony_ci in these different Bcj2Enc_Encode_2() calls. 101370b324cSopenharmony_ci 102370b324cSopenharmony_ciAlso marker version requires additional if((v & MARKER_FLAG) == 0) check in main loop. 103370b324cSopenharmony_ciSo we use non-marker version. 104370b324cSopenharmony_ci*/ 105370b324cSopenharmony_ci 106370b324cSopenharmony_ci/* 107370b324cSopenharmony_ci Corner cases with overlap in multi-block. 108370b324cSopenharmony_ci before v23: there was one corner case, where converted instruction 109370b324cSopenharmony_ci could start in one sub-stream and finish in next sub-stream. 110370b324cSopenharmony_ci If multi-block (solid) encoding is used, 111370b324cSopenharmony_ci and BCJ2_ENC_FINISH_MODE_END_BLOCK is used for each sub-stream. 112370b324cSopenharmony_ci and (0f) is last byte of previous sub-stream 113370b324cSopenharmony_ci and (8x) is first byte of current sub-stream 114370b324cSopenharmony_ci then (0f 8x) pair is treated as marker by BCJ2 encoder and decoder. 115370b324cSopenharmony_ci BCJ2 encoder can converts 32-bit offset for that (0f 8x) cortage, 116370b324cSopenharmony_ci if that offset meets limit requirements. 117370b324cSopenharmony_ci If encoder allows 32-bit offset conversion for such overlap case, 118370b324cSopenharmony_ci then the data in 3 uncompressed BCJ2 streams for some sub-stream 119370b324cSopenharmony_ci can depend from data of previous sub-stream. 120370b324cSopenharmony_ci That corner case is not big problem, and it's rare case. 121370b324cSopenharmony_ci Since v23.00 we do additional check to prevent conversions in such overlap cases. 122370b324cSopenharmony_ci*/ 123370b324cSopenharmony_ci 124370b324cSopenharmony_ci/* 125370b324cSopenharmony_ci Bcj2Enc_Encode_2() output variables at exit: 126370b324cSopenharmony_ci { 127370b324cSopenharmony_ci if (Bcj2Enc_Encode_2() exits with (p->state == BCJ2_ENC_STATE_ORIG)) 128370b324cSopenharmony_ci { 129370b324cSopenharmony_ci it means that encoder needs more input data. 130370b324cSopenharmony_ci if (p->srcLim == p->src) at exit, then 131370b324cSopenharmony_ci { 132370b324cSopenharmony_ci (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) 133370b324cSopenharmony_ci all input data were read and processed, and we are ready for 134370b324cSopenharmony_ci new input data. 135370b324cSopenharmony_ci } 136370b324cSopenharmony_ci else 137370b324cSopenharmony_ci { 138370b324cSopenharmony_ci (p->srcLim != p->src) 139370b324cSopenharmony_ci (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) 140370b324cSopenharmony_ci The encoder have found e8/e9/0f_8x marker, 141370b324cSopenharmony_ci and p->src points to last byte of that marker, 142370b324cSopenharmony_ci Bcj2Enc_Encode_2() needs more input data to get totally 143370b324cSopenharmony_ci 5 bytes (last byte of marker and 32-bit branch offset) 144370b324cSopenharmony_ci as continuous array starting from p->src. 145370b324cSopenharmony_ci (p->srcLim - p->src < 5) requirement is met after exit. 146370b324cSopenharmony_ci So non-processed resedue from p->src to p->srcLim is always less than 5 bytes. 147370b324cSopenharmony_ci } 148370b324cSopenharmony_ci } 149370b324cSopenharmony_ci } 150370b324cSopenharmony_ci*/ 151370b324cSopenharmony_ci 152370b324cSopenharmony_ciZ7_NO_INLINE 153370b324cSopenharmony_cistatic void Bcj2Enc_Encode_2(CBcj2Enc *p) 154370b324cSopenharmony_ci{ 155370b324cSopenharmony_ci if (!p->isFlushState) 156370b324cSopenharmony_ci { 157370b324cSopenharmony_ci const Byte *src; 158370b324cSopenharmony_ci UInt32 v; 159370b324cSopenharmony_ci { 160370b324cSopenharmony_ci const unsigned state = p->state; 161370b324cSopenharmony_ci if (BCJ2_IS_32BIT_STREAM(state)) 162370b324cSopenharmony_ci { 163370b324cSopenharmony_ci Byte *cur = p->bufs[state]; 164370b324cSopenharmony_ci if (cur == p->lims[state]) 165370b324cSopenharmony_ci return; 166370b324cSopenharmony_ci SetBe32a(cur, p->tempTarget) 167370b324cSopenharmony_ci p->bufs[state] = cur + 4; 168370b324cSopenharmony_ci } 169370b324cSopenharmony_ci } 170370b324cSopenharmony_ci p->state = BCJ2_ENC_STATE_ORIG; // for main reason of exit 171370b324cSopenharmony_ci src = p->src; 172370b324cSopenharmony_ci v = p->context; 173370b324cSopenharmony_ci 174370b324cSopenharmony_ci // #define WRITE_CONTEXT p->context = v; // for marker version 175370b324cSopenharmony_ci #define WRITE_CONTEXT p->context = (Byte)v; 176370b324cSopenharmony_ci #define WRITE_CONTEXT_AND_SRC p->src = src; WRITE_CONTEXT 177370b324cSopenharmony_ci 178370b324cSopenharmony_ci for (;;) 179370b324cSopenharmony_ci { 180370b324cSopenharmony_ci // const Byte *src; 181370b324cSopenharmony_ci // UInt32 v; 182370b324cSopenharmony_ci CBcj2Enc_ip_unsigned ip; 183370b324cSopenharmony_ci if (p->range < kTopValue) 184370b324cSopenharmony_ci { 185370b324cSopenharmony_ci // to reduce register pressure and code size: we save and restore local variables. 186370b324cSopenharmony_ci WRITE_CONTEXT_AND_SRC 187370b324cSopenharmony_ci if (Bcj2_RangeEnc_ShiftLow(p)) 188370b324cSopenharmony_ci return; 189370b324cSopenharmony_ci p->range <<= 8; 190370b324cSopenharmony_ci src = p->src; 191370b324cSopenharmony_ci v = p->context; 192370b324cSopenharmony_ci } 193370b324cSopenharmony_ci // src = p->src; 194370b324cSopenharmony_ci // #define MARKER_FLAG ((UInt32)1 << 17) 195370b324cSopenharmony_ci // if ((v & MARKER_FLAG) == 0) // for marker version 196370b324cSopenharmony_ci { 197370b324cSopenharmony_ci const Byte *srcLim; 198370b324cSopenharmony_ci Byte *dest = p->bufs[BCJ2_STREAM_MAIN]; 199370b324cSopenharmony_ci { 200370b324cSopenharmony_ci const SizeT remSrc = (SizeT)(p->srcLim - src); 201370b324cSopenharmony_ci SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest); 202370b324cSopenharmony_ci if (rem >= remSrc) 203370b324cSopenharmony_ci rem = remSrc; 204370b324cSopenharmony_ci srcLim = src + rem; 205370b324cSopenharmony_ci } 206370b324cSopenharmony_ci /* p->context contains context of previous byte: 207370b324cSopenharmony_ci bits [0 : 7] : src[-1], if (src) was changed in this call 208370b324cSopenharmony_ci bits [8 : 31] : are undefined for non-marker version 209370b324cSopenharmony_ci */ 210370b324cSopenharmony_ci // v = p->context; 211370b324cSopenharmony_ci #define NUM_SHIFT_BITS 24 212370b324cSopenharmony_ci #define CONV_FLAG ((UInt32)1 << 16) 213370b324cSopenharmony_ci #define ONE_ITER { \ 214370b324cSopenharmony_ci b = src[0]; \ 215370b324cSopenharmony_ci *dest++ = (Byte)b; \ 216370b324cSopenharmony_ci v = (v << NUM_SHIFT_BITS) | b; \ 217370b324cSopenharmony_ci if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \ 218370b324cSopenharmony_ci if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \ 219370b324cSopenharmony_ci ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \ 220370b324cSopenharmony_ci src++; if (src == srcLim) { break; } } 221370b324cSopenharmony_ci 222370b324cSopenharmony_ci if (src != srcLim) 223370b324cSopenharmony_ci for (;;) 224370b324cSopenharmony_ci { 225370b324cSopenharmony_ci /* clang can generate ineffective code with setne instead of two jcc instructions. 226370b324cSopenharmony_ci we can use 2 iterations and external (unsigned b) to avoid that ineffective code genaration. */ 227370b324cSopenharmony_ci unsigned b; 228370b324cSopenharmony_ci ONE_ITER 229370b324cSopenharmony_ci ONE_ITER 230370b324cSopenharmony_ci } 231370b324cSopenharmony_ci 232370b324cSopenharmony_ci ip = p->ip64 + (CBcj2Enc_ip_unsigned)(SizeT)(dest - p->bufs[BCJ2_STREAM_MAIN]); 233370b324cSopenharmony_ci p->bufs[BCJ2_STREAM_MAIN] = dest; 234370b324cSopenharmony_ci p->ip64 = ip; 235370b324cSopenharmony_ci 236370b324cSopenharmony_ci if (src == srcLim) 237370b324cSopenharmony_ci { 238370b324cSopenharmony_ci WRITE_CONTEXT_AND_SRC 239370b324cSopenharmony_ci if (src != p->srcLim) 240370b324cSopenharmony_ci { 241370b324cSopenharmony_ci p->state = BCJ2_STREAM_MAIN; 242370b324cSopenharmony_ci return; 243370b324cSopenharmony_ci } 244370b324cSopenharmony_ci /* (p->src == p->srcLim) 245370b324cSopenharmony_ci (p->state == BCJ2_ENC_STATE_ORIG) */ 246370b324cSopenharmony_ci if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) 247370b324cSopenharmony_ci return; 248370b324cSopenharmony_ci /* (p->finishMode == BCJ2_ENC_FINISH_MODE_END_STREAM */ 249370b324cSopenharmony_ci // (p->flushRem == 5); 250370b324cSopenharmony_ci p->isFlushState = 1; 251370b324cSopenharmony_ci break; 252370b324cSopenharmony_ci } 253370b324cSopenharmony_ci src++; 254370b324cSopenharmony_ci // p->src = src; 255370b324cSopenharmony_ci } 256370b324cSopenharmony_ci // ip = p->ip; // for marker version 257370b324cSopenharmony_ci /* marker was found */ 258370b324cSopenharmony_ci /* (v) contains marker that was found: 259370b324cSopenharmony_ci bits [NUM_SHIFT_BITS : NUM_SHIFT_BITS + 7] 260370b324cSopenharmony_ci : value of src[-2] : xx/xx/0f 261370b324cSopenharmony_ci bits [0 : 7] : value of src[-1] : e8/e9/8x 262370b324cSopenharmony_ci */ 263370b324cSopenharmony_ci { 264370b324cSopenharmony_ci { 265370b324cSopenharmony_ci #if NUM_SHIFT_BITS != 24 266370b324cSopenharmony_ci v &= ~(UInt32)CONV_FLAG; 267370b324cSopenharmony_ci #endif 268370b324cSopenharmony_ci // UInt32 relat = 0; 269370b324cSopenharmony_ci if ((SizeT)(p->srcLim - src) >= 4) 270370b324cSopenharmony_ci { 271370b324cSopenharmony_ci /* 272370b324cSopenharmony_ci if (relat != 0 || (Byte)v != 0xe8) 273370b324cSopenharmony_ci BoolInt isBigOffset = True; 274370b324cSopenharmony_ci */ 275370b324cSopenharmony_ci const UInt32 relat = GetUi32(src); 276370b324cSopenharmony_ci /* 277370b324cSopenharmony_ci #define EXCLUDE_FLAG ((UInt32)1 << 4) 278370b324cSopenharmony_ci #define NEED_CONVERT(rel) ((((rel) + EXCLUDE_FLAG) & (0 - EXCLUDE_FLAG * 2)) != 0) 279370b324cSopenharmony_ci if (p->relatExcludeBits != 0) 280370b324cSopenharmony_ci { 281370b324cSopenharmony_ci const UInt32 flag = (UInt32)1 << (p->relatExcludeBits - 1); 282370b324cSopenharmony_ci isBigOffset = (((relat + flag) & (0 - flag * 2)) != 0); 283370b324cSopenharmony_ci } 284370b324cSopenharmony_ci // isBigOffset = False; // for debug 285370b324cSopenharmony_ci */ 286370b324cSopenharmony_ci ip -= p->fileIp64; 287370b324cSopenharmony_ci // Use the following if check, if (ip) is 64-bit: 288370b324cSopenharmony_ci if (ip > (((v + 0x20) >> 5) & 1)) // 23.00 : we eliminate milti-block overlap for (Of 80) and (e8/e9) 289370b324cSopenharmony_ci if ((CBcj2Enc_ip_unsigned)((CBcj2Enc_ip_signed)ip + 4 + (Int32)relat) <= p->fileSize64_minus1) 290370b324cSopenharmony_ci if (((UInt32)(relat + p->relatLimit) >> 1) < p->relatLimit) 291370b324cSopenharmony_ci v |= CONV_FLAG; 292370b324cSopenharmony_ci } 293370b324cSopenharmony_ci else if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) 294370b324cSopenharmony_ci { 295370b324cSopenharmony_ci // (p->srcLim - src < 4) 296370b324cSopenharmony_ci // /* 297370b324cSopenharmony_ci // for non-marker version 298370b324cSopenharmony_ci p->ip64--; // p->ip = ip - 1; 299370b324cSopenharmony_ci p->bufs[BCJ2_STREAM_MAIN]--; 300370b324cSopenharmony_ci src--; 301370b324cSopenharmony_ci v >>= NUM_SHIFT_BITS; 302370b324cSopenharmony_ci // (0 < p->srcLim - p->src <= 4) 303370b324cSopenharmony_ci // */ 304370b324cSopenharmony_ci // v |= MARKER_FLAG; // for marker version 305370b324cSopenharmony_ci /* (p->state == BCJ2_ENC_STATE_ORIG) */ 306370b324cSopenharmony_ci WRITE_CONTEXT_AND_SRC 307370b324cSopenharmony_ci return; 308370b324cSopenharmony_ci } 309370b324cSopenharmony_ci { 310370b324cSopenharmony_ci const unsigned c = ((v + 0x17) >> 6) & 1; 311370b324cSopenharmony_ci CBcj2Prob *prob = p->probs + (unsigned) 312370b324cSopenharmony_ci (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1)); 313370b324cSopenharmony_ci /* 314370b324cSopenharmony_ci ((Byte)v == 0xe8 ? 2 + ((Byte)(v >> 8)) : 315370b324cSopenharmony_ci ((Byte)v < 0xe8 ? 0 : 1)); // ((v >> 5) & 1)); 316370b324cSopenharmony_ci */ 317370b324cSopenharmony_ci const unsigned ttt = *prob; 318370b324cSopenharmony_ci const UInt32 bound = (p->range >> kNumBitModelTotalBits) * ttt; 319370b324cSopenharmony_ci if ((v & CONV_FLAG) == 0) 320370b324cSopenharmony_ci { 321370b324cSopenharmony_ci // static int yyy = 0; yyy++; printf("\n!needConvert = %d\n", yyy); 322370b324cSopenharmony_ci // v = (Byte)v; // for marker version 323370b324cSopenharmony_ci p->range = bound; 324370b324cSopenharmony_ci *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); 325370b324cSopenharmony_ci // WRITE_CONTEXT_AND_SRC 326370b324cSopenharmony_ci continue; 327370b324cSopenharmony_ci } 328370b324cSopenharmony_ci p->low += bound; 329370b324cSopenharmony_ci p->range -= bound; 330370b324cSopenharmony_ci *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits)); 331370b324cSopenharmony_ci } 332370b324cSopenharmony_ci // p->context = src[3]; 333370b324cSopenharmony_ci { 334370b324cSopenharmony_ci // const unsigned cj = ((Byte)v == 0xe8 ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP); 335370b324cSopenharmony_ci const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL; 336370b324cSopenharmony_ci ip = p->ip64; 337370b324cSopenharmony_ci v = GetUi32(src); // relat 338370b324cSopenharmony_ci ip += 4; 339370b324cSopenharmony_ci p->ip64 = ip; 340370b324cSopenharmony_ci src += 4; 341370b324cSopenharmony_ci // p->src = src; 342370b324cSopenharmony_ci { 343370b324cSopenharmony_ci const UInt32 absol = (UInt32)ip + v; 344370b324cSopenharmony_ci Byte *cur = p->bufs[cj]; 345370b324cSopenharmony_ci v >>= 24; 346370b324cSopenharmony_ci // WRITE_CONTEXT 347370b324cSopenharmony_ci if (cur == p->lims[cj]) 348370b324cSopenharmony_ci { 349370b324cSopenharmony_ci p->state = cj; 350370b324cSopenharmony_ci p->tempTarget = absol; 351370b324cSopenharmony_ci WRITE_CONTEXT_AND_SRC 352370b324cSopenharmony_ci return; 353370b324cSopenharmony_ci } 354370b324cSopenharmony_ci SetBe32a(cur, absol) 355370b324cSopenharmony_ci p->bufs[cj] = cur + 4; 356370b324cSopenharmony_ci } 357370b324cSopenharmony_ci } 358370b324cSopenharmony_ci } 359370b324cSopenharmony_ci } 360370b324cSopenharmony_ci } // end of loop 361370b324cSopenharmony_ci } 362370b324cSopenharmony_ci 363370b324cSopenharmony_ci for (; p->flushRem != 0; p->flushRem--) 364370b324cSopenharmony_ci if (Bcj2_RangeEnc_ShiftLow(p)) 365370b324cSopenharmony_ci return; 366370b324cSopenharmony_ci p->state = BCJ2_ENC_STATE_FINISHED; 367370b324cSopenharmony_ci} 368370b324cSopenharmony_ci 369370b324cSopenharmony_ci 370370b324cSopenharmony_ci/* 371370b324cSopenharmony_ciBCJ2 encoder needs look ahead for up to 4 bytes in (src) buffer. 372370b324cSopenharmony_ciSo base function Bcj2Enc_Encode_2() 373370b324cSopenharmony_ci in BCJ2_ENC_FINISH_MODE_CONTINUE mode can return with 374370b324cSopenharmony_ci (p->state == BCJ2_ENC_STATE_ORIG && p->src < p->srcLim) 375370b324cSopenharmony_ciBcj2Enc_Encode() solves that look ahead problem by using p->temp[] buffer. 376370b324cSopenharmony_ci so if (p->state == BCJ2_ENC_STATE_ORIG) after Bcj2Enc_Encode(), 377370b324cSopenharmony_ci then (p->src == p->srcLim). 378370b324cSopenharmony_ci And the caller's code is simpler with Bcj2Enc_Encode(). 379370b324cSopenharmony_ci*/ 380370b324cSopenharmony_ci 381370b324cSopenharmony_ciZ7_NO_INLINE 382370b324cSopenharmony_civoid Bcj2Enc_Encode(CBcj2Enc *p) 383370b324cSopenharmony_ci{ 384370b324cSopenharmony_ci PRF2("\n----") 385370b324cSopenharmony_ci if (p->tempPos != 0) 386370b324cSopenharmony_ci { 387370b324cSopenharmony_ci /* extra: number of bytes that were copied from (src) to (temp) buffer in this call */ 388370b324cSopenharmony_ci unsigned extra = 0; 389370b324cSopenharmony_ci /* We will touch only minimal required number of bytes in input (src) stream. 390370b324cSopenharmony_ci So we will add input bytes from (src) stream to temp[] with step of 1 byte. 391370b324cSopenharmony_ci We don't add new bytes to temp[] before Bcj2Enc_Encode_2() call 392370b324cSopenharmony_ci in first loop iteration because 393370b324cSopenharmony_ci - previous call of Bcj2Enc_Encode() could use another (finishMode), 394370b324cSopenharmony_ci - previous call could finish with (p->state != BCJ2_ENC_STATE_ORIG). 395370b324cSopenharmony_ci the case with full temp[] buffer (p->tempPos == 4) is possible here. 396370b324cSopenharmony_ci */ 397370b324cSopenharmony_ci for (;;) 398370b324cSopenharmony_ci { 399370b324cSopenharmony_ci // (0 < p->tempPos <= 5) // in non-marker version 400370b324cSopenharmony_ci /* p->src : the current src data position including extra bytes 401370b324cSopenharmony_ci that were copied to temp[] buffer in this call */ 402370b324cSopenharmony_ci const Byte *src = p->src; 403370b324cSopenharmony_ci const Byte *srcLim = p->srcLim; 404370b324cSopenharmony_ci const EBcj2Enc_FinishMode finishMode = p->finishMode; 405370b324cSopenharmony_ci if (src != srcLim) 406370b324cSopenharmony_ci { 407370b324cSopenharmony_ci /* if there are some src data after the data copied to temp[], 408370b324cSopenharmony_ci then we use MODE_CONTINUE for temp data */ 409370b324cSopenharmony_ci p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; 410370b324cSopenharmony_ci } 411370b324cSopenharmony_ci p->src = p->temp; 412370b324cSopenharmony_ci p->srcLim = p->temp + p->tempPos; 413370b324cSopenharmony_ci PRF2(" ") 414370b324cSopenharmony_ci Bcj2Enc_Encode_2(p); 415370b324cSopenharmony_ci { 416370b324cSopenharmony_ci const unsigned num = (unsigned)(p->src - p->temp); 417370b324cSopenharmony_ci const unsigned tempPos = p->tempPos - num; 418370b324cSopenharmony_ci unsigned i; 419370b324cSopenharmony_ci p->tempPos = tempPos; 420370b324cSopenharmony_ci for (i = 0; i < tempPos; i++) 421370b324cSopenharmony_ci p->temp[i] = p->temp[(SizeT)i + num]; 422370b324cSopenharmony_ci // tempPos : number of bytes in temp buffer 423370b324cSopenharmony_ci p->src = src; 424370b324cSopenharmony_ci p->srcLim = srcLim; 425370b324cSopenharmony_ci p->finishMode = finishMode; 426370b324cSopenharmony_ci if (p->state != BCJ2_ENC_STATE_ORIG) 427370b324cSopenharmony_ci { 428370b324cSopenharmony_ci // (p->tempPos <= 4) // in non-marker version 429370b324cSopenharmony_ci /* if (the reason of exit from Bcj2Enc_Encode_2() 430370b324cSopenharmony_ci is not BCJ2_ENC_STATE_ORIG), 431370b324cSopenharmony_ci then we exit from Bcj2Enc_Encode() with same reason */ 432370b324cSopenharmony_ci // optional code begin : we rollback (src) and tempPos, if it's possible: 433370b324cSopenharmony_ci if (extra >= tempPos) 434370b324cSopenharmony_ci extra = tempPos; 435370b324cSopenharmony_ci p->src = src - extra; 436370b324cSopenharmony_ci p->tempPos = tempPos - extra; 437370b324cSopenharmony_ci // optional code end : rollback of (src) and tempPos 438370b324cSopenharmony_ci return; 439370b324cSopenharmony_ci } 440370b324cSopenharmony_ci /* (p->tempPos <= 4) 441370b324cSopenharmony_ci (p->state == BCJ2_ENC_STATE_ORIG) 442370b324cSopenharmony_ci so encoder needs more data than in temp[] */ 443370b324cSopenharmony_ci if (src == srcLim) 444370b324cSopenharmony_ci return; // src buffer has no more input data. 445370b324cSopenharmony_ci /* (src != srcLim) 446370b324cSopenharmony_ci so we can provide more input data from src for Bcj2Enc_Encode_2() */ 447370b324cSopenharmony_ci if (extra >= tempPos) 448370b324cSopenharmony_ci { 449370b324cSopenharmony_ci /* (extra >= tempPos) means that temp buffer contains 450370b324cSopenharmony_ci only data from src buffer of this call. 451370b324cSopenharmony_ci So now we can encode without temp buffer */ 452370b324cSopenharmony_ci p->src = src - tempPos; // rollback (src) 453370b324cSopenharmony_ci p->tempPos = 0; 454370b324cSopenharmony_ci break; 455370b324cSopenharmony_ci } 456370b324cSopenharmony_ci // we append one additional extra byte from (src) to temp[] buffer: 457370b324cSopenharmony_ci p->temp[tempPos] = *src; 458370b324cSopenharmony_ci p->tempPos = tempPos + 1; 459370b324cSopenharmony_ci // (0 < p->tempPos <= 5) // in non-marker version 460370b324cSopenharmony_ci p->src = src + 1; 461370b324cSopenharmony_ci extra++; 462370b324cSopenharmony_ci } 463370b324cSopenharmony_ci } 464370b324cSopenharmony_ci } 465370b324cSopenharmony_ci 466370b324cSopenharmony_ci PRF2("++++") 467370b324cSopenharmony_ci // (p->tempPos == 0) 468370b324cSopenharmony_ci Bcj2Enc_Encode_2(p); 469370b324cSopenharmony_ci PRF2("====") 470370b324cSopenharmony_ci 471370b324cSopenharmony_ci if (p->state == BCJ2_ENC_STATE_ORIG) 472370b324cSopenharmony_ci { 473370b324cSopenharmony_ci const Byte *src = p->src; 474370b324cSopenharmony_ci const Byte *srcLim = p->srcLim; 475370b324cSopenharmony_ci const unsigned rem = (unsigned)(srcLim - src); 476370b324cSopenharmony_ci /* (rem <= 4) here. 477370b324cSopenharmony_ci if (p->src != p->srcLim), then 478370b324cSopenharmony_ci - we copy non-processed bytes from (p->src) to temp[] buffer, 479370b324cSopenharmony_ci - we set p->src equal to p->srcLim. 480370b324cSopenharmony_ci */ 481370b324cSopenharmony_ci if (rem) 482370b324cSopenharmony_ci { 483370b324cSopenharmony_ci unsigned i = 0; 484370b324cSopenharmony_ci p->src = srcLim; 485370b324cSopenharmony_ci p->tempPos = rem; 486370b324cSopenharmony_ci // (0 < p->tempPos <= 4) 487370b324cSopenharmony_ci do 488370b324cSopenharmony_ci p->temp[i] = src[i]; 489370b324cSopenharmony_ci while (++i != rem); 490370b324cSopenharmony_ci } 491370b324cSopenharmony_ci // (p->tempPos <= 4) 492370b324cSopenharmony_ci // (p->src == p->srcLim) 493370b324cSopenharmony_ci } 494370b324cSopenharmony_ci} 495370b324cSopenharmony_ci 496370b324cSopenharmony_ci#undef PRF2 497370b324cSopenharmony_ci#undef CONV_FLAG 498370b324cSopenharmony_ci#undef MARKER_FLAG 499370b324cSopenharmony_ci#undef WRITE_CONTEXT 500370b324cSopenharmony_ci#undef WRITE_CONTEXT_AND_SRC 501370b324cSopenharmony_ci#undef ONE_ITER 502370b324cSopenharmony_ci#undef NUM_SHIFT_BITS 503370b324cSopenharmony_ci#undef kTopValue 504370b324cSopenharmony_ci#undef kNumBitModelTotalBits 505370b324cSopenharmony_ci#undef kBitModelTotal 506370b324cSopenharmony_ci#undef kNumMoveBits 507