1370b324cSopenharmony_ci/* Ppmd.h -- PPMD codec common code
2370b324cSopenharmony_ci2023-03-05 : Igor Pavlov : Public domain
3370b324cSopenharmony_ciThis code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
4370b324cSopenharmony_ci
5370b324cSopenharmony_ci#ifndef ZIP7_INC_PPMD_H
6370b324cSopenharmony_ci#define ZIP7_INC_PPMD_H
7370b324cSopenharmony_ci
8370b324cSopenharmony_ci#include "CpuArch.h"
9370b324cSopenharmony_ci
10370b324cSopenharmony_ciEXTERN_C_BEGIN
11370b324cSopenharmony_ci
12370b324cSopenharmony_ci#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
13370b324cSopenharmony_ci/*
14370b324cSopenharmony_ci   PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block.
15370b324cSopenharmony_ci   if (PPMD_32BIT is     defined), the PPMD code stores internal pointers to 32-bit reference fields.
16370b324cSopenharmony_ci   if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields.
17370b324cSopenharmony_ci   if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed,
18370b324cSopenharmony_ci   if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional,
19370b324cSopenharmony_ci     and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit.
20370b324cSopenharmony_ci   PPMD code works slightly faster in (PPMD_32BIT) mode.
21370b324cSopenharmony_ci*/
22370b324cSopenharmony_ci  #define PPMD_32BIT
23370b324cSopenharmony_ci#endif
24370b324cSopenharmony_ci
25370b324cSopenharmony_ci#define PPMD_INT_BITS 7
26370b324cSopenharmony_ci#define PPMD_PERIOD_BITS 7
27370b324cSopenharmony_ci#define PPMD_BIN_SCALE (1 << (PPMD_INT_BITS + PPMD_PERIOD_BITS))
28370b324cSopenharmony_ci
29370b324cSopenharmony_ci#define PPMD_GET_MEAN_SPEC(summ, shift, round) (((summ) + (1 << ((shift) - (round)))) >> (shift))
30370b324cSopenharmony_ci#define PPMD_GET_MEAN(summ) PPMD_GET_MEAN_SPEC((summ), PPMD_PERIOD_BITS, 2)
31370b324cSopenharmony_ci#define PPMD_UPDATE_PROB_0(prob) ((prob) + (1 << PPMD_INT_BITS) - PPMD_GET_MEAN(prob))
32370b324cSopenharmony_ci#define PPMD_UPDATE_PROB_1(prob) ((prob) - PPMD_GET_MEAN(prob))
33370b324cSopenharmony_ci
34370b324cSopenharmony_ci#define PPMD_N1 4
35370b324cSopenharmony_ci#define PPMD_N2 4
36370b324cSopenharmony_ci#define PPMD_N3 4
37370b324cSopenharmony_ci#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4)
38370b324cSopenharmony_ci#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4)
39370b324cSopenharmony_ci
40370b324cSopenharmony_ciMY_CPU_pragma_pack_push_1
41370b324cSopenharmony_ci/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */
42370b324cSopenharmony_ci
43370b324cSopenharmony_ci/* SEE-contexts for PPM-contexts with masked symbols */
44370b324cSopenharmony_citypedef struct
45370b324cSopenharmony_ci{
46370b324cSopenharmony_ci  UInt16 Summ; /* Freq */
47370b324cSopenharmony_ci  Byte Shift;  /* Speed of Freq change; low Shift is for fast change */
48370b324cSopenharmony_ci  Byte Count;  /* Count to next change of Shift */
49370b324cSopenharmony_ci} CPpmd_See;
50370b324cSopenharmony_ci
51370b324cSopenharmony_ci#define Ppmd_See_UPDATE(p) \
52370b324cSopenharmony_ci  { if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
53370b324cSopenharmony_ci    { (p)->Summ = (UInt16)((p)->Summ << 1); \
54370b324cSopenharmony_ci      (p)->Count = (Byte)(3 << (p)->Shift++); }}
55370b324cSopenharmony_ci
56370b324cSopenharmony_ci
57370b324cSopenharmony_citypedef struct
58370b324cSopenharmony_ci{
59370b324cSopenharmony_ci  Byte Symbol;
60370b324cSopenharmony_ci  Byte Freq;
61370b324cSopenharmony_ci  UInt16 Successor_0;
62370b324cSopenharmony_ci  UInt16 Successor_1;
63370b324cSopenharmony_ci} CPpmd_State;
64370b324cSopenharmony_ci
65370b324cSopenharmony_citypedef struct CPpmd_State2_
66370b324cSopenharmony_ci{
67370b324cSopenharmony_ci  Byte Symbol;
68370b324cSopenharmony_ci  Byte Freq;
69370b324cSopenharmony_ci} CPpmd_State2;
70370b324cSopenharmony_ci
71370b324cSopenharmony_citypedef struct CPpmd_State4_
72370b324cSopenharmony_ci{
73370b324cSopenharmony_ci  UInt16 Successor_0;
74370b324cSopenharmony_ci  UInt16 Successor_1;
75370b324cSopenharmony_ci} CPpmd_State4;
76370b324cSopenharmony_ci
77370b324cSopenharmony_ciMY_CPU_pragma_pop
78370b324cSopenharmony_ci
79370b324cSopenharmony_ci/*
80370b324cSopenharmony_ci   PPMD code can write full CPpmd_State structure data to CPpmd*_Context
81370b324cSopenharmony_ci      at (byte offset = 2) instead of some fields of original CPpmd*_Context structure.
82370b324cSopenharmony_ci
83370b324cSopenharmony_ci   If we use pointers to different types, but that point to shared
84370b324cSopenharmony_ci   memory space, we can have aliasing problem (strict aliasing).
85370b324cSopenharmony_ci
86370b324cSopenharmony_ci   XLC compiler in -O2 mode can change the order of memory write instructions
87370b324cSopenharmony_ci   in relation to read instructions, if we have use pointers to different types.
88370b324cSopenharmony_ci
89370b324cSopenharmony_ci   To solve that aliasing problem we use combined CPpmd*_Context structure
90370b324cSopenharmony_ci   with unions that contain the fields from both structures:
91370b324cSopenharmony_ci   the original CPpmd*_Context and CPpmd_State.
92370b324cSopenharmony_ci   So we can access the fields from both structures via one pointer,
93370b324cSopenharmony_ci   and the compiler doesn't change the order of write instructions
94370b324cSopenharmony_ci   in relation to read instructions.
95370b324cSopenharmony_ci
96370b324cSopenharmony_ci   If we don't use memory write instructions to shared memory in
97370b324cSopenharmony_ci   some local code, and we use only reading instructions (read only),
98370b324cSopenharmony_ci   then probably it's safe to use pointers to different types for reading.
99370b324cSopenharmony_ci*/
100370b324cSopenharmony_ci
101370b324cSopenharmony_ci
102370b324cSopenharmony_ci
103370b324cSopenharmony_ci#ifdef PPMD_32BIT
104370b324cSopenharmony_ci
105370b324cSopenharmony_ci  #define Ppmd_Ref_Type(type)   type *
106370b324cSopenharmony_ci  #define Ppmd_GetRef(p, ptr)   (ptr)
107370b324cSopenharmony_ci  #define Ppmd_GetPtr(p, ptr)   (ptr)
108370b324cSopenharmony_ci  #define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr)
109370b324cSopenharmony_ci
110370b324cSopenharmony_ci#else
111370b324cSopenharmony_ci
112370b324cSopenharmony_ci  #define Ppmd_Ref_Type(type)   UInt32
113370b324cSopenharmony_ci  #define Ppmd_GetRef(p, ptr)   ((UInt32)((Byte *)(ptr) - (p)->Base))
114370b324cSopenharmony_ci  #define Ppmd_GetPtr(p, offs)  ((void *)((p)->Base + (offs)))
115370b324cSopenharmony_ci  #define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs))
116370b324cSopenharmony_ci
117370b324cSopenharmony_ci#endif // PPMD_32BIT
118370b324cSopenharmony_ci
119370b324cSopenharmony_ci
120370b324cSopenharmony_citypedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref;
121370b324cSopenharmony_citypedef Ppmd_Ref_Type(void)        CPpmd_Void_Ref;
122370b324cSopenharmony_citypedef Ppmd_Ref_Type(Byte)        CPpmd_Byte_Ref;
123370b324cSopenharmony_ci
124370b324cSopenharmony_ci
125370b324cSopenharmony_ci/*
126370b324cSopenharmony_ci#ifdef MY_CPU_LE_UNALIGN
127370b324cSopenharmony_ci// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache.
128370b324cSopenharmony_ci#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0)
129370b324cSopenharmony_ci#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v)
130370b324cSopenharmony_ci
131370b324cSopenharmony_ci#else
132370b324cSopenharmony_ci*/
133370b324cSopenharmony_ci
134370b324cSopenharmony_ci/*
135370b324cSopenharmony_ci   We can write 16-bit halves to 32-bit (Successor) field in any selected order.
136370b324cSopenharmony_ci   But the native order is more consistent way.
137370b324cSopenharmony_ci   So we use the native order, if LE/BE order can be detected here at compile time.
138370b324cSopenharmony_ci*/
139370b324cSopenharmony_ci
140370b324cSopenharmony_ci#ifdef MY_CPU_BE
141370b324cSopenharmony_ci
142370b324cSopenharmony_ci  #define Ppmd_GET_SUCCESSOR(p) \
143370b324cSopenharmony_ci    ( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) )
144370b324cSopenharmony_ci
145370b324cSopenharmony_ci  #define Ppmd_SET_SUCCESSOR(p, v) { \
146370b324cSopenharmony_ci    (p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \
147370b324cSopenharmony_ci    (p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); }
148370b324cSopenharmony_ci
149370b324cSopenharmony_ci#else
150370b324cSopenharmony_ci
151370b324cSopenharmony_ci  #define Ppmd_GET_SUCCESSOR(p) \
152370b324cSopenharmony_ci    ( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) )
153370b324cSopenharmony_ci
154370b324cSopenharmony_ci  #define Ppmd_SET_SUCCESSOR(p, v) { \
155370b324cSopenharmony_ci    (p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \
156370b324cSopenharmony_ci    (p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); }
157370b324cSopenharmony_ci
158370b324cSopenharmony_ci#endif
159370b324cSopenharmony_ci
160370b324cSopenharmony_ci// #endif
161370b324cSopenharmony_ci
162370b324cSopenharmony_ci
163370b324cSopenharmony_ci#define PPMD_SetAllBitsIn256Bytes(p) \
164370b324cSopenharmony_ci  { size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \
165370b324cSopenharmony_ci  p[z+7] = p[z+6] = p[z+5] = p[z+4] = p[z+3] = p[z+2] = p[z+1] = p[z+0] = ~(size_t)0; }}
166370b324cSopenharmony_ci
167370b324cSopenharmony_ciEXTERN_C_END
168370b324cSopenharmony_ci
169370b324cSopenharmony_ci#endif
170