1370b324cSopenharmony_ci/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) 2370b324cSopenharmony_ci2023-03-01 : Igor Pavlov : Public domain */ 3370b324cSopenharmony_ci 4370b324cSopenharmony_ci#include "Precomp.h" 5370b324cSopenharmony_ci 6370b324cSopenharmony_ci#include "Bcj2.h" 7370b324cSopenharmony_ci#include "CpuArch.h" 8370b324cSopenharmony_ci 9370b324cSopenharmony_ci#define kTopValue ((UInt32)1 << 24) 10370b324cSopenharmony_ci#define kNumBitModelTotalBits 11 11370b324cSopenharmony_ci#define kBitModelTotal (1 << kNumBitModelTotalBits) 12370b324cSopenharmony_ci#define kNumMoveBits 5 13370b324cSopenharmony_ci 14370b324cSopenharmony_ci// UInt32 bcj2_stats[256 + 2][2]; 15370b324cSopenharmony_ci 16370b324cSopenharmony_civoid Bcj2Dec_Init(CBcj2Dec *p) 17370b324cSopenharmony_ci{ 18370b324cSopenharmony_ci unsigned i; 19370b324cSopenharmony_ci p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK; 20370b324cSopenharmony_ci p->ip = 0; 21370b324cSopenharmony_ci p->temp = 0; 22370b324cSopenharmony_ci p->range = 0; 23370b324cSopenharmony_ci p->code = 0; 24370b324cSopenharmony_ci for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) 25370b324cSopenharmony_ci p->probs[i] = kBitModelTotal >> 1; 26370b324cSopenharmony_ci} 27370b324cSopenharmony_ci 28370b324cSopenharmony_ciSRes Bcj2Dec_Decode(CBcj2Dec *p) 29370b324cSopenharmony_ci{ 30370b324cSopenharmony_ci UInt32 v = p->temp; 31370b324cSopenharmony_ci // const Byte *src; 32370b324cSopenharmony_ci if (p->range <= 5) 33370b324cSopenharmony_ci { 34370b324cSopenharmony_ci UInt32 code = p->code; 35370b324cSopenharmony_ci p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */ 36370b324cSopenharmony_ci for (; p->range != 5; p->range++) 37370b324cSopenharmony_ci { 38370b324cSopenharmony_ci if (p->range == 1 && code != 0) 39370b324cSopenharmony_ci return SZ_ERROR_DATA; 40370b324cSopenharmony_ci if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) 41370b324cSopenharmony_ci { 42370b324cSopenharmony_ci p->state = BCJ2_STREAM_RC; 43370b324cSopenharmony_ci return SZ_OK; 44370b324cSopenharmony_ci } 45370b324cSopenharmony_ci code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; 46370b324cSopenharmony_ci p->code = code; 47370b324cSopenharmony_ci } 48370b324cSopenharmony_ci if (code == 0xffffffff) 49370b324cSopenharmony_ci return SZ_ERROR_DATA; 50370b324cSopenharmony_ci p->range = 0xffffffff; 51370b324cSopenharmony_ci } 52370b324cSopenharmony_ci // else 53370b324cSopenharmony_ci { 54370b324cSopenharmony_ci unsigned state = p->state; 55370b324cSopenharmony_ci // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop 56370b324cSopenharmony_ci if (BCJ2_IS_32BIT_STREAM(state)) 57370b324cSopenharmony_ci { 58370b324cSopenharmony_ci const Byte *cur = p->bufs[state]; 59370b324cSopenharmony_ci if (cur == p->lims[state]) 60370b324cSopenharmony_ci return SZ_OK; 61370b324cSopenharmony_ci p->bufs[state] = cur + 4; 62370b324cSopenharmony_ci { 63370b324cSopenharmony_ci const UInt32 ip = p->ip + 4; 64370b324cSopenharmony_ci v = GetBe32a(cur) - ip; 65370b324cSopenharmony_ci p->ip = ip; 66370b324cSopenharmony_ci } 67370b324cSopenharmony_ci state = BCJ2_DEC_STATE_ORIG_0; 68370b324cSopenharmony_ci } 69370b324cSopenharmony_ci if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4) 70370b324cSopenharmony_ci { 71370b324cSopenharmony_ci Byte *dest = p->dest; 72370b324cSopenharmony_ci for (;;) 73370b324cSopenharmony_ci { 74370b324cSopenharmony_ci if (dest == p->destLim) 75370b324cSopenharmony_ci { 76370b324cSopenharmony_ci p->state = state; 77370b324cSopenharmony_ci p->temp = v; 78370b324cSopenharmony_ci return SZ_OK; 79370b324cSopenharmony_ci } 80370b324cSopenharmony_ci *dest++ = (Byte)v; 81370b324cSopenharmony_ci p->dest = dest; 82370b324cSopenharmony_ci if (++state == BCJ2_DEC_STATE_ORIG_3 + 1) 83370b324cSopenharmony_ci break; 84370b324cSopenharmony_ci v >>= 8; 85370b324cSopenharmony_ci } 86370b324cSopenharmony_ci } 87370b324cSopenharmony_ci } 88370b324cSopenharmony_ci 89370b324cSopenharmony_ci // src = p->bufs[BCJ2_STREAM_MAIN]; 90370b324cSopenharmony_ci for (;;) 91370b324cSopenharmony_ci { 92370b324cSopenharmony_ci /* 93370b324cSopenharmony_ci if (BCJ2_IS_32BIT_STREAM(p->state)) 94370b324cSopenharmony_ci p->state = BCJ2_DEC_STATE_OK; 95370b324cSopenharmony_ci else 96370b324cSopenharmony_ci */ 97370b324cSopenharmony_ci { 98370b324cSopenharmony_ci if (p->range < kTopValue) 99370b324cSopenharmony_ci { 100370b324cSopenharmony_ci if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) 101370b324cSopenharmony_ci { 102370b324cSopenharmony_ci p->state = BCJ2_STREAM_RC; 103370b324cSopenharmony_ci p->temp = v; 104370b324cSopenharmony_ci return SZ_OK; 105370b324cSopenharmony_ci } 106370b324cSopenharmony_ci p->range <<= 8; 107370b324cSopenharmony_ci p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; 108370b324cSopenharmony_ci } 109370b324cSopenharmony_ci { 110370b324cSopenharmony_ci const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; 111370b324cSopenharmony_ci const Byte *srcLim; 112370b324cSopenharmony_ci Byte *dest = p->dest; 113370b324cSopenharmony_ci { 114370b324cSopenharmony_ci const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src); 115370b324cSopenharmony_ci SizeT num = (SizeT)(p->destLim - dest); 116370b324cSopenharmony_ci if (num >= rem) 117370b324cSopenharmony_ci num = rem; 118370b324cSopenharmony_ci #define NUM_ITERS 4 119370b324cSopenharmony_ci #if (NUM_ITERS & (NUM_ITERS - 1)) == 0 120370b324cSopenharmony_ci num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x)) 121370b324cSopenharmony_ci #else 122370b324cSopenharmony_ci num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x)) 123370b324cSopenharmony_ci #endif 124370b324cSopenharmony_ci srcLim = src + num; 125370b324cSopenharmony_ci } 126370b324cSopenharmony_ci 127370b324cSopenharmony_ci #define NUM_SHIFT_BITS 24 128370b324cSopenharmony_ci #define ONE_ITER(indx) { \ 129370b324cSopenharmony_ci const unsigned b = src[indx]; \ 130370b324cSopenharmony_ci *dest++ = (Byte)b; \ 131370b324cSopenharmony_ci v = (v << NUM_SHIFT_BITS) | b; \ 132370b324cSopenharmony_ci if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \ 133370b324cSopenharmony_ci if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \ 134370b324cSopenharmony_ci ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \ 135370b324cSopenharmony_ci /* ++dest */; /* v = b; */ } 136370b324cSopenharmony_ci 137370b324cSopenharmony_ci if (src != srcLim) 138370b324cSopenharmony_ci for (;;) 139370b324cSopenharmony_ci { 140370b324cSopenharmony_ci /* The dependency chain of 2-cycle for (v) calculation is not big problem here. 141370b324cSopenharmony_ci But we can remove dependency chain with v = b in the end of loop. */ 142370b324cSopenharmony_ci ONE_ITER(0) 143370b324cSopenharmony_ci #if (NUM_ITERS > 1) 144370b324cSopenharmony_ci ONE_ITER(1) 145370b324cSopenharmony_ci #if (NUM_ITERS > 2) 146370b324cSopenharmony_ci ONE_ITER(2) 147370b324cSopenharmony_ci #if (NUM_ITERS > 3) 148370b324cSopenharmony_ci ONE_ITER(3) 149370b324cSopenharmony_ci #if (NUM_ITERS > 4) 150370b324cSopenharmony_ci ONE_ITER(4) 151370b324cSopenharmony_ci #if (NUM_ITERS > 5) 152370b324cSopenharmony_ci ONE_ITER(5) 153370b324cSopenharmony_ci #if (NUM_ITERS > 6) 154370b324cSopenharmony_ci ONE_ITER(6) 155370b324cSopenharmony_ci #if (NUM_ITERS > 7) 156370b324cSopenharmony_ci ONE_ITER(7) 157370b324cSopenharmony_ci #endif 158370b324cSopenharmony_ci #endif 159370b324cSopenharmony_ci #endif 160370b324cSopenharmony_ci #endif 161370b324cSopenharmony_ci #endif 162370b324cSopenharmony_ci #endif 163370b324cSopenharmony_ci #endif 164370b324cSopenharmony_ci 165370b324cSopenharmony_ci src += NUM_ITERS; 166370b324cSopenharmony_ci if (src == srcLim) 167370b324cSopenharmony_ci break; 168370b324cSopenharmony_ci } 169370b324cSopenharmony_ci 170370b324cSopenharmony_ci if (src == srcLim) 171370b324cSopenharmony_ci #if (NUM_ITERS > 1) 172370b324cSopenharmony_ci for (;;) 173370b324cSopenharmony_ci #endif 174370b324cSopenharmony_ci { 175370b324cSopenharmony_ci #if (NUM_ITERS > 1) 176370b324cSopenharmony_ci if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim) 177370b324cSopenharmony_ci #endif 178370b324cSopenharmony_ci { 179370b324cSopenharmony_ci const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]); 180370b324cSopenharmony_ci p->bufs[BCJ2_STREAM_MAIN] = src; 181370b324cSopenharmony_ci p->dest = dest; 182370b324cSopenharmony_ci p->ip += (UInt32)num; 183370b324cSopenharmony_ci /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ 184370b324cSopenharmony_ci p->state = 185370b324cSopenharmony_ci src == p->lims[BCJ2_STREAM_MAIN] ? 186370b324cSopenharmony_ci (unsigned)BCJ2_STREAM_MAIN : 187370b324cSopenharmony_ci (unsigned)BCJ2_DEC_STATE_ORIG; 188370b324cSopenharmony_ci p->temp = v; 189370b324cSopenharmony_ci return SZ_OK; 190370b324cSopenharmony_ci } 191370b324cSopenharmony_ci #if (NUM_ITERS > 1) 192370b324cSopenharmony_ci ONE_ITER(0) 193370b324cSopenharmony_ci src++; 194370b324cSopenharmony_ci #endif 195370b324cSopenharmony_ci } 196370b324cSopenharmony_ci 197370b324cSopenharmony_ci { 198370b324cSopenharmony_ci const SizeT num = (SizeT)(dest - p->dest); 199370b324cSopenharmony_ci p->dest = dest; // p->dest += num; 200370b324cSopenharmony_ci p->bufs[BCJ2_STREAM_MAIN] += num; // = src; 201370b324cSopenharmony_ci p->ip += (UInt32)num; 202370b324cSopenharmony_ci } 203370b324cSopenharmony_ci { 204370b324cSopenharmony_ci UInt32 bound, ttt; 205370b324cSopenharmony_ci CBcj2Prob *prob; // unsigned index; 206370b324cSopenharmony_ci /* 207370b324cSopenharmony_ci prob = p->probs + (unsigned)((Byte)v == 0xe8 ? 208370b324cSopenharmony_ci 2 + (Byte)(v >> 8) : 209370b324cSopenharmony_ci ((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1)); 210370b324cSopenharmony_ci */ 211370b324cSopenharmony_ci { 212370b324cSopenharmony_ci const unsigned c = ((v + 0x17) >> 6) & 1; 213370b324cSopenharmony_ci prob = p->probs + (unsigned) 214370b324cSopenharmony_ci (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1)); 215370b324cSopenharmony_ci // (Byte) 216370b324cSopenharmony_ci // 8x->0 : e9->1 : xxe8->xx+2 217370b324cSopenharmony_ci // 8x->0x100 : e9->0x101 : xxe8->xx 218370b324cSopenharmony_ci // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v)); 219370b324cSopenharmony_ci // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v)); 220370b324cSopenharmony_ci } 221370b324cSopenharmony_ci ttt = *prob; 222370b324cSopenharmony_ci bound = (p->range >> kNumBitModelTotalBits) * ttt; 223370b324cSopenharmony_ci if (p->code < bound) 224370b324cSopenharmony_ci { 225370b324cSopenharmony_ci // bcj2_stats[prob - p->probs][0]++; 226370b324cSopenharmony_ci p->range = bound; 227370b324cSopenharmony_ci *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); 228370b324cSopenharmony_ci continue; 229370b324cSopenharmony_ci } 230370b324cSopenharmony_ci { 231370b324cSopenharmony_ci // bcj2_stats[prob - p->probs][1]++; 232370b324cSopenharmony_ci p->range -= bound; 233370b324cSopenharmony_ci p->code -= bound; 234370b324cSopenharmony_ci *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits)); 235370b324cSopenharmony_ci } 236370b324cSopenharmony_ci } 237370b324cSopenharmony_ci } 238370b324cSopenharmony_ci } 239370b324cSopenharmony_ci { 240370b324cSopenharmony_ci /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */ 241370b324cSopenharmony_ci // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; 242370b324cSopenharmony_ci const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL; 243370b324cSopenharmony_ci const Byte *cur = p->bufs[cj]; 244370b324cSopenharmony_ci Byte *dest; 245370b324cSopenharmony_ci SizeT rem; 246370b324cSopenharmony_ci if (cur == p->lims[cj]) 247370b324cSopenharmony_ci { 248370b324cSopenharmony_ci p->state = cj; 249370b324cSopenharmony_ci break; 250370b324cSopenharmony_ci } 251370b324cSopenharmony_ci v = GetBe32a(cur); 252370b324cSopenharmony_ci p->bufs[cj] = cur + 4; 253370b324cSopenharmony_ci { 254370b324cSopenharmony_ci const UInt32 ip = p->ip + 4; 255370b324cSopenharmony_ci v -= ip; 256370b324cSopenharmony_ci p->ip = ip; 257370b324cSopenharmony_ci } 258370b324cSopenharmony_ci dest = p->dest; 259370b324cSopenharmony_ci rem = (SizeT)(p->destLim - dest); 260370b324cSopenharmony_ci if (rem < 4) 261370b324cSopenharmony_ci { 262370b324cSopenharmony_ci if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8; 263370b324cSopenharmony_ci if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8; 264370b324cSopenharmony_ci if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}} 265370b324cSopenharmony_ci p->temp = v; 266370b324cSopenharmony_ci p->dest = dest + rem; 267370b324cSopenharmony_ci p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; 268370b324cSopenharmony_ci break; 269370b324cSopenharmony_ci } 270370b324cSopenharmony_ci SetUi32(dest, v) 271370b324cSopenharmony_ci v >>= 24; 272370b324cSopenharmony_ci p->dest = dest + 4; 273370b324cSopenharmony_ci } 274370b324cSopenharmony_ci } 275370b324cSopenharmony_ci 276370b324cSopenharmony_ci if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC]) 277370b324cSopenharmony_ci { 278370b324cSopenharmony_ci p->range <<= 8; 279370b324cSopenharmony_ci p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; 280370b324cSopenharmony_ci } 281370b324cSopenharmony_ci return SZ_OK; 282370b324cSopenharmony_ci} 283370b324cSopenharmony_ci 284370b324cSopenharmony_ci#undef NUM_ITERS 285370b324cSopenharmony_ci#undef ONE_ITER 286370b324cSopenharmony_ci#undef NUM_SHIFT_BITS 287370b324cSopenharmony_ci#undef kTopValue 288370b324cSopenharmony_ci#undef kNumBitModelTotalBits 289370b324cSopenharmony_ci#undef kBitModelTotal 290370b324cSopenharmony_ci#undef kNumMoveBits 291