1diff --git a/cdjpeg.h b/cdjpeg.h
2index 082687c..840cd2d 100644
3--- a/cdjpeg.h
4+++ b/cdjpeg.h
5@@ -103,39 +103,39 @@ typedef struct cdjpeg_progress_mgr *cd_progress_ptr;
6 
7 /* Module selection routines for I/O modules. */
8 
9-EXTERN(cjpeg_source_ptr) jinit_read_bmp(j_compress_ptr cinfo,
10+HIDE(EXTERN)(cjpeg_source_ptr) jinit_read_bmp(j_compress_ptr cinfo,
11                                         boolean use_inversion_array);
12-EXTERN(djpeg_dest_ptr) jinit_write_bmp(j_decompress_ptr cinfo, boolean is_os2,
13+HIDE(EXTERN)(djpeg_dest_ptr) jinit_write_bmp(j_decompress_ptr cinfo, boolean is_os2,
14                                        boolean use_inversion_array);
15-EXTERN(cjpeg_source_ptr) jinit_read_gif(j_compress_ptr cinfo);
16-EXTERN(djpeg_dest_ptr) jinit_write_gif(j_decompress_ptr cinfo, boolean is_lzw);
17-EXTERN(cjpeg_source_ptr) jinit_read_ppm(j_compress_ptr cinfo);
18-EXTERN(djpeg_dest_ptr) jinit_write_ppm(j_decompress_ptr cinfo);
19-EXTERN(cjpeg_source_ptr) jinit_read_targa(j_compress_ptr cinfo);
20-EXTERN(djpeg_dest_ptr) jinit_write_targa(j_decompress_ptr cinfo);
21+HIDE(EXTERN)(cjpeg_source_ptr) jinit_read_gif(j_compress_ptr cinfo);
22+HIDE(EXTERN)(djpeg_dest_ptr) jinit_write_gif(j_decompress_ptr cinfo, boolean is_lzw);
23+HIDE(EXTERN)(cjpeg_source_ptr) jinit_read_ppm(j_compress_ptr cinfo);
24+HIDE(EXTERN)(djpeg_dest_ptr) jinit_write_ppm(j_decompress_ptr cinfo);
25+HIDE(EXTERN)(cjpeg_source_ptr) jinit_read_targa(j_compress_ptr cinfo);
26+HIDE(EXTERN)(djpeg_dest_ptr) jinit_write_targa(j_decompress_ptr cinfo);
27 
28 /* cjpeg support routines (in rdswitch.c) */
29 
30-EXTERN(boolean) read_quant_tables(j_compress_ptr cinfo, char *filename,
31+HIDE(EXTERN)(boolean) read_quant_tables(j_compress_ptr cinfo, char *filename,
32                                   boolean force_baseline);
33-EXTERN(boolean) read_scan_script(j_compress_ptr cinfo, char *filename);
34-EXTERN(boolean) set_quality_ratings(j_compress_ptr cinfo, char *arg,
35+HIDE(EXTERN)(boolean) read_scan_script(j_compress_ptr cinfo, char *filename);
36+HIDE(EXTERN)(boolean) set_quality_ratings(j_compress_ptr cinfo, char *arg,
37                                     boolean force_baseline);
38-EXTERN(boolean) set_quant_slots(j_compress_ptr cinfo, char *arg);
39-EXTERN(boolean) set_sample_factors(j_compress_ptr cinfo, char *arg);
40+HIDE(EXTERN)(boolean) set_quant_slots(j_compress_ptr cinfo, char *arg);
41+HIDE(EXTERN)(boolean) set_sample_factors(j_compress_ptr cinfo, char *arg);
42 
43 /* djpeg support routines (in rdcolmap.c) */
44 
45-EXTERN(void) read_color_map(j_decompress_ptr cinfo, FILE *infile);
46+HIDE(EXTERN)(void) read_color_map(j_decompress_ptr cinfo, FILE *infile);
47 
48 /* common support routines (in cdjpeg.c) */
49 
50-EXTERN(void) start_progress_monitor(j_common_ptr cinfo,
51+HIDE(EXTERN)(void) start_progress_monitor(j_common_ptr cinfo,
52                                     cd_progress_ptr progress);
53-EXTERN(void) end_progress_monitor(j_common_ptr cinfo);
54-EXTERN(boolean) keymatch(char *arg, const char *keyword, int minchars);
55-EXTERN(FILE *) read_stdin(void);
56-EXTERN(FILE *) write_stdout(void);
57+HIDE(EXTERN)(void) end_progress_monitor(j_common_ptr cinfo);
58+HIDE(EXTERN)(boolean) keymatch(char *arg, const char *keyword, int minchars);
59+HIDE(EXTERN)(FILE *) read_stdin(void);
60+HIDE(EXTERN)(FILE *) write_stdout(void);
61 
62 /* miscellaneous useful macros */
63 
64diff --git a/jchuff.h b/jchuff.h
65index da7809a..57e9f8c 100644
66--- a/jchuff.h
67+++ b/jchuff.h
68@@ -42,9 +42,9 @@ typedef struct {
69 } c_derived_tbl;
70 
71 /* Expand a Huffman table definition into the derived format */
72-EXTERN(void) jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC,
73+HIDE(EXTERN)(void) jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC,
74                                      int tblno, c_derived_tbl **pdtbl);
75 
76 /* Generate an optimal table definition given the specified counts */
77-EXTERN(void) jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl,
78+HIDE(EXTERN)(void) jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl,
79                                     long freq[]);
80diff --git a/jdatadst-tj.c b/jdatadst-tj.c
81index fdaa2de..dc53970 100644
82--- a/jdatadst-tj.c
83+++ b/jdatadst-tj.c
84@@ -27,7 +27,7 @@
85 extern void *malloc(size_t size);
86 extern void free(void *ptr);
87 #endif
88-void jpeg_mem_dest_tj(j_compress_ptr cinfo, unsigned char **outbuffer,
89+HIDE(void) jpeg_mem_dest_tj(j_compress_ptr cinfo, unsigned char **outbuffer,
90                       unsigned long *outsize, boolean alloc);
91 
92 
93diff --git a/jdatasrc-tj.c b/jdatasrc-tj.c
94index 69fb5ea..1808dd9 100644
95--- a/jdatasrc-tj.c
96+++ b/jdatasrc-tj.c
97@@ -23,7 +23,7 @@
98 #include "jpeglib.h"
99 #include "jerror.h"
100 
101-void jpeg_mem_src_tj(j_decompress_ptr cinfo, const unsigned char *inbuffer,
102+HIDE(void) jpeg_mem_src_tj(j_decompress_ptr cinfo, const unsigned char *inbuffer,
103                      unsigned long insize);
104 
105 
106diff --git a/jdct.h b/jdct.h
107index 66d1718..7273ec0 100644
108--- a/jdct.h
109+++ b/jdct.h
110@@ -90,62 +90,62 @@ typedef FAST_FLOAT FLOAT_MULT_TYPE;  /* preferred floating type */
111 
112 /* Extern declarations for the forward and inverse DCT routines. */
113 
114-EXTERN(void) jpeg_fdct_islow(DCTELEM *data);
115-EXTERN(void) jpeg_fdct_ifast(DCTELEM *data);
116-EXTERN(void) jpeg_fdct_float(FAST_FLOAT *data);
117+HIDE(EXTERN)(void) jpeg_fdct_islow(DCTELEM *data);
118+HIDE(EXTERN)(void) jpeg_fdct_ifast(DCTELEM *data);
119+HIDE(EXTERN)(void) jpeg_fdct_float(FAST_FLOAT *data);
120 
121-EXTERN(void) jpeg_idct_islow(j_decompress_ptr cinfo,
122+HIDE(EXTERN)(void) jpeg_idct_islow(j_decompress_ptr cinfo,
123                              jpeg_component_info *compptr, JCOEFPTR coef_block,
124                              JSAMPARRAY output_buf, JDIMENSION output_col);
125-EXTERN(void) jpeg_idct_ifast(j_decompress_ptr cinfo,
126+HIDE(EXTERN)(void) jpeg_idct_ifast(j_decompress_ptr cinfo,
127                              jpeg_component_info *compptr, JCOEFPTR coef_block,
128                              JSAMPARRAY output_buf, JDIMENSION output_col);
129-EXTERN(void) jpeg_idct_float(j_decompress_ptr cinfo,
130+HIDE(EXTERN)(void) jpeg_idct_float(j_decompress_ptr cinfo,
131                              jpeg_component_info *compptr, JCOEFPTR coef_block,
132                              JSAMPARRAY output_buf, JDIMENSION output_col);
133-EXTERN(void) jpeg_idct_7x7(j_decompress_ptr cinfo,
134+HIDE(EXTERN)(void) jpeg_idct_7x7(j_decompress_ptr cinfo,
135                            jpeg_component_info *compptr, JCOEFPTR coef_block,
136                            JSAMPARRAY output_buf, JDIMENSION output_col);
137-EXTERN(void) jpeg_idct_6x6(j_decompress_ptr cinfo,
138+HIDE(EXTERN)(void) jpeg_idct_6x6(j_decompress_ptr cinfo,
139                            jpeg_component_info *compptr, JCOEFPTR coef_block,
140                            JSAMPARRAY output_buf, JDIMENSION output_col);
141-EXTERN(void) jpeg_idct_5x5(j_decompress_ptr cinfo,
142+HIDE(EXTERN)(void) jpeg_idct_5x5(j_decompress_ptr cinfo,
143                            jpeg_component_info *compptr, JCOEFPTR coef_block,
144                            JSAMPARRAY output_buf, JDIMENSION output_col);
145-EXTERN(void) jpeg_idct_4x4(j_decompress_ptr cinfo,
146+HIDE(EXTERN)(void) jpeg_idct_4x4(j_decompress_ptr cinfo,
147                            jpeg_component_info *compptr, JCOEFPTR coef_block,
148                            JSAMPARRAY output_buf, JDIMENSION output_col);
149-EXTERN(void) jpeg_idct_3x3(j_decompress_ptr cinfo,
150+HIDE(EXTERN)(void) jpeg_idct_3x3(j_decompress_ptr cinfo,
151                            jpeg_component_info *compptr, JCOEFPTR coef_block,
152                            JSAMPARRAY output_buf, JDIMENSION output_col);
153-EXTERN(void) jpeg_idct_2x2(j_decompress_ptr cinfo,
154+HIDE(EXTERN)(void) jpeg_idct_2x2(j_decompress_ptr cinfo,
155                            jpeg_component_info *compptr, JCOEFPTR coef_block,
156                            JSAMPARRAY output_buf, JDIMENSION output_col);
157-EXTERN(void) jpeg_idct_1x1(j_decompress_ptr cinfo,
158+HIDE(EXTERN)(void) jpeg_idct_1x1(j_decompress_ptr cinfo,
159                            jpeg_component_info *compptr, JCOEFPTR coef_block,
160                            JSAMPARRAY output_buf, JDIMENSION output_col);
161-EXTERN(void) jpeg_idct_9x9(j_decompress_ptr cinfo,
162+HIDE(EXTERN)(void) jpeg_idct_9x9(j_decompress_ptr cinfo,
163                            jpeg_component_info *compptr, JCOEFPTR coef_block,
164                            JSAMPARRAY output_buf, JDIMENSION output_col);
165-EXTERN(void) jpeg_idct_10x10(j_decompress_ptr cinfo,
166+HIDE(EXTERN)(void) jpeg_idct_10x10(j_decompress_ptr cinfo,
167                              jpeg_component_info *compptr, JCOEFPTR coef_block,
168                              JSAMPARRAY output_buf, JDIMENSION output_col);
169-EXTERN(void) jpeg_idct_11x11(j_decompress_ptr cinfo,
170+HIDE(EXTERN)(void) jpeg_idct_11x11(j_decompress_ptr cinfo,
171                              jpeg_component_info *compptr, JCOEFPTR coef_block,
172                              JSAMPARRAY output_buf, JDIMENSION output_col);
173-EXTERN(void) jpeg_idct_12x12(j_decompress_ptr cinfo,
174+HIDE(EXTERN)(void) jpeg_idct_12x12(j_decompress_ptr cinfo,
175                              jpeg_component_info *compptr, JCOEFPTR coef_block,
176                              JSAMPARRAY output_buf, JDIMENSION output_col);
177-EXTERN(void) jpeg_idct_13x13(j_decompress_ptr cinfo,
178+HIDE(EXTERN)(void) jpeg_idct_13x13(j_decompress_ptr cinfo,
179                              jpeg_component_info *compptr, JCOEFPTR coef_block,
180                              JSAMPARRAY output_buf, JDIMENSION output_col);
181-EXTERN(void) jpeg_idct_14x14(j_decompress_ptr cinfo,
182+HIDE(EXTERN)(void) jpeg_idct_14x14(j_decompress_ptr cinfo,
183                              jpeg_component_info *compptr, JCOEFPTR coef_block,
184                              JSAMPARRAY output_buf, JDIMENSION output_col);
185-EXTERN(void) jpeg_idct_15x15(j_decompress_ptr cinfo,
186+HIDE(EXTERN)(void) jpeg_idct_15x15(j_decompress_ptr cinfo,
187                              jpeg_component_info *compptr, JCOEFPTR coef_block,
188                              JSAMPARRAY output_buf, JDIMENSION output_col);
189-EXTERN(void) jpeg_idct_16x16(j_decompress_ptr cinfo,
190+HIDE(EXTERN)(void) jpeg_idct_16x16(j_decompress_ptr cinfo,
191                              jpeg_component_info *compptr, JCOEFPTR coef_block,
192                              JSAMPARRAY output_buf, JDIMENSION output_col);
193 
194diff --git a/jdhuff.h b/jdhuff.h
195index cfa0b7f..6cc2344 100644
196--- a/jdhuff.h
197+++ b/jdhuff.h
198@@ -16,10 +16,55 @@
199 
200 #include "jconfigint.h"
201 
202-
203+#ifndef HUFF_DECODE_OPT
204+#define HUFF_LOOKAHEAD  8
205+#define HUFF_CODE_LARGE_LONG_ALIGNED 0
206+#else
207+// OH ISSUE: jpeg optimize
208 /* Derived data constructed for each Huffman table */
209-
210-#define HUFF_LOOKAHEAD  8       /* # of bits of lookahead */
211+#define MAX_HUFF_CODE_LEN 16
212+
213+#define HUFF_LOOKAHEAD  10      /* # of bits of lookahead  9-13 maybe */
214+#define HUFF_AC_SYMBOLS 192
215+
216+#define HUFF_L_REM (16 - HUFF_LOOKAHEAD)
217+#define HUFF_L_DUP ((1 << HUFF_L_REM) - (HUFF_L_REM + 1))
218+#define HUFF_L_UNUSED ((1 << HUFF_L_REM) - (1 << ((HUFF_L_REM)/2)) - (1 << ((HUFF_L_REM + 1)/2)) + 1)
219+#define HUFF_L_SIZE (HUFF_AC_SYMBOLS + HUFF_L_DUP + HUFF_L_UNUSED)
220+#define HUFF_CODE_LARGE_LONG_ALIGNED (HUFF_L_SIZE + (-HUFF_L_SIZE & 0xf))
221+
222+#define COEF_BITS_OFFSET 0
223+#define COEF_BITS_BITS 4
224+#define ZERO_NUM1_OFFSET 4
225+#define ZERO_NUM_BITS 7
226+#define NB_OFFSET 11
227+#define NB_BITS 5
228+#define COEF1_OFFSET 16
229+#define COEF_VALUE_BITS 16
230+
231+#define EXTRA_BITS_OFFSET COEF_BITS_OFFSET  // 2nd table offset bits
232+#define EXTRA_BITS_BITS COEF_BITS_BITS
233+
234+#define SYM_OFFSET COEF_BITS_OFFSET
235+
236+#define MAKE_BITS(x, s)  (x) << (s)
237+#define GETS_BITS(x, s, l)  (((x) >> (s)) & ((0x1L << (l)) - 1))
238+
239+#define MAKE_ZERO_NUM1(x) MAKE_BITS((x), ZERO_NUM1_OFFSET)
240+#define MAKE_COEF_BITS(x) MAKE_BITS((x), COEF_BITS_OFFSET)
241+#define MAKE_SYM(x) MAKE_BITS((x), SYM_OFFSET)
242+#define MAKE_NB(x) MAKE_BITS((x), NB_OFFSET)
243+#define MAKE_COEF1(x) (unsigned long long) MAKE_BITS((UINT16) (x), COEF1_OFFSET)
244+#define MAKE_BASE(x) MAKE_BITS((x), COEF1_OFFSET)
245+#define MAKE_EXTRA_BITS(x) MAKE_BITS((x), EXTRA_BITS_OFFSET)
246+
247+#define GET_ZERO_NUM1(x) GETS_BITS((x), ZERO_NUM1_OFFSET, ZERO_NUM_BITS)
248+#define GET_COEF_BITS(x) GETS_BITS((x), COEF_BITS_OFFSET, COEF_BITS_BITS)
249+#define GET_NB(x) GETS_BITS((x), NB_OFFSET, NB_BITS)
250+#define GET_COEF1(x) GETS_BITS((x), COEF1_OFFSET, COEF_VALUE_BITS)
251+#define GET_BASE(x) GETS_BITS((x), COEF1_OFFSET, COEF_VALUE_BITS)
252+#define GET_EXTRA_BITS(x) GETS_BITS((x), EXTRA_BITS_OFFSET, EXTRA_BITS_BITS)
253+#endif
254 
255 typedef struct {
256   /* Basic tables: (element [0] of each array is unused) */
257@@ -44,11 +89,11 @@ typedef struct {
258    * if too long.  The next 8 bits of each entry contain the
259    * symbol.
260    */
261-  int lookup[1 << HUFF_LOOKAHEAD];
262+  int lookup[(1 << HUFF_LOOKAHEAD) + HUFF_CODE_LARGE_LONG_ALIGNED];
263 } d_derived_tbl;
264 
265 /* Expand a Huffman table definition into the derived format */
266-EXTERN(void) jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC,
267+HIDE(EXTERN)(void) jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC,
268                                      int tblno, d_derived_tbl **pdtbl);
269 
270 
271@@ -172,7 +217,7 @@ typedef struct {                /* Bitreading working state within an MCU */
272   (bits_left -= (nbits))
273 
274 /* Load up the bit buffer to a depth of at least nbits */
275-EXTERN(boolean) jpeg_fill_bit_buffer(bitread_working_state *state,
276+HIDE(EXTERN)(boolean) jpeg_fill_bit_buffer(bitread_working_state *state,
277                                      register bit_buf_type get_buffer,
278                                      register int bits_left, int nbits);
279 
280@@ -241,7 +286,7 @@ slowlabel: \
281   }
282 
283 /* Out-of-line case for Huffman code fetching */
284-EXTERN(int) jpeg_huff_decode(bitread_working_state *state,
285+HIDE(EXTERN)(int) jpeg_huff_decode(bitread_working_state *state,
286                              register bit_buf_type get_buffer,
287                              register int bits_left, d_derived_tbl *htbl,
288                              int min_bits);
289diff --git a/jdhuff_opt.c b/jdhuff_opt.c
290new file mode 100644
291index 0000000..5b1875c
292--- /dev/null
293+++ b/jdhuff_opt.c
294@@ -0,0 +1,1046 @@
295+/*
296+ * jdhuff_opt.c
297+ *
298+ * This file was part of the Independent JPEG Group's software:
299+ * Copyright (C) 1991-1997, Thomas G. Lane.
300+ * libjpeg-turbo Modifications:
301+ * Copyright (C) 2009-2011, 2016, 2018-2019, D. R. Commander.
302+ * Copyright (C) 2018, Matthias Räncker.
303+ * For conditions of distribution and use, see the accompanying README.ijg
304+ * file.
305+ *
306+ * This file contains Huffman entropy decoding routines.
307+ *
308+ * Much of the complexity here has to do with supporting input suspension.
309+ * If the data source module demands suspension, we want to be able to back
310+ * up to the start of the current MCU.  To do this, we copy state variables
311+ * into local working storage, and update them back to the permanent
312+ * storage only upon successful completion of an MCU.
313+ *
314+ * NOTE: All referenced figures are from
315+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
316+ */
317+
318+#define JPEG_INTERNALS
319+#include "jinclude.h"
320+#include "jpeglib.h"
321+#include "jdhuff.h"             /* Declarations shared with jdphuff.c */
322+#include "jpegcomp.h"
323+#include "jstdhuff.c"
324+
325+
326+/*
327+ * Expanded entropy decoder object for Huffman decoding.
328+ *
329+ * The savable_state subrecord contains fields that change within an MCU,
330+ * but must not be updated permanently until we complete the MCU.
331+ */
332+
333+typedef struct {
334+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
335+} savable_state;
336+
337+typedef struct {
338+  struct jpeg_entropy_decoder pub; /* public fields */
339+
340+  /* These fields are loaded into local variables at start of each MCU.
341+   * In case of suspension, we exit WITHOUT updating them.
342+   */
343+  bitread_perm_state bitstate;  /* Bit buffer at start of MCU */
344+  savable_state saved;          /* Other state at start of MCU */
345+
346+  /* These fields are NOT loaded into local working state. */
347+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
348+
349+  /* Pointers to derived tables (these workspaces have image lifespan) */
350+  d_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS];
351+  d_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS];
352+
353+  /* Precalculated info set up by start_pass for use in decode_mcu: */
354+
355+  /* Pointers to derived tables to be used for each block within an MCU */
356+  d_derived_tbl *dc_cur_tbls[D_MAX_BLOCKS_IN_MCU];
357+  d_derived_tbl *ac_cur_tbls[D_MAX_BLOCKS_IN_MCU];
358+  /* Whether we care about the DC and AC coefficient values for each block */
359+  boolean dc_needed[D_MAX_BLOCKS_IN_MCU];
360+  boolean ac_needed[D_MAX_BLOCKS_IN_MCU];
361+} huff_entropy_decoder;
362+
363+typedef huff_entropy_decoder *huff_entropy_ptr;
364+
365+/*
366+ * Figure F.12: extend sign bit.
367+ * On some machines, a shift and add will be faster than a table lookup.
368+ */
369+
370+#define AVOID_TABLES
371+#ifdef AVOID_TABLES
372+
373+#define NEG_1  ((unsigned int)-1)
374+#define HUFF_EXTEND(x, s) \
375+  ((x) + ((((x) - (1 << ((s) - 1))) >> 31) & (((NEG_1) << (s)) + 1)))
376+
377+#else
378+
379+#define HUFF_EXTEND(x, s) \
380+  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
381+
382+static const int extend_test[16] = {   /* entry n is 2**(n-1) */
383+  0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
384+  0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000
385+};
386+
387+static const int extend_offset[16] = { /* entry n is (-1 << n) + 1 */
388+  0, ((-1) << 1) + 1, ((-1) << 2) + 1, ((-1) << 3) + 1, ((-1) << 4) + 1,
389+  ((-1) << 5) + 1, ((-1) << 6) + 1, ((-1) << 7) + 1, ((-1) << 8) + 1,
390+  ((-1) << 9) + 1, ((-1) << 10) + 1, ((-1) << 11) + 1, ((-1) << 12) + 1,
391+  ((-1) << 13) + 1, ((-1) << 14) + 1, ((-1) << 15) + 1
392+};
393+
394+#endif /* AVOID_TABLES */
395+
396+/*
397+ * Initialize for a Huffman-compressed scan.
398+ */
399+
400+METHODDEF(void)
401+start_pass_huff_decoder(j_decompress_ptr cinfo)
402+{
403+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
404+  int ci, blkn, dctbl, actbl;
405+  d_derived_tbl **pdtbl;
406+  jpeg_component_info *compptr;
407+
408+  /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
409+   * This ought to be an error condition, but we make it a warning because
410+   * there are some baseline files out there with all zeroes in these bytes.
411+   */
412+  if (cinfo->Ss != 0 || cinfo->Se != DCTSIZE2 - 1 ||
413+      cinfo->Ah != 0 || cinfo->Al != 0)
414+    WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
415+
416+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
417+    compptr = cinfo->cur_comp_info[ci];
418+    dctbl = compptr->dc_tbl_no;
419+    actbl = compptr->ac_tbl_no;
420+    /* Compute derived values for Huffman tables */
421+    /* We may do this more than once for a table, but it's not expensive */
422+    pdtbl = (d_derived_tbl **)(entropy->dc_derived_tbls) + dctbl;
423+    jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl, pdtbl);
424+    pdtbl = (d_derived_tbl **)(entropy->ac_derived_tbls) + actbl;
425+    jpeg_make_d_derived_tbl(cinfo, FALSE, actbl, pdtbl);
426+    /* Initialize DC predictions to 0 */
427+    entropy->saved.last_dc_val[ci] = 0;
428+  }
429+
430+  /* Precalculate decoding info for each block in an MCU of this scan */
431+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
432+    ci = cinfo->MCU_membership[blkn];
433+    compptr = cinfo->cur_comp_info[ci];
434+    /* Precalculate which table to use for each block */
435+    entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no];
436+    entropy->ac_cur_tbls[blkn] = entropy->ac_derived_tbls[compptr->ac_tbl_no];
437+    /* Decide whether we really care about the coefficient values */
438+    if (compptr->component_needed) {
439+      entropy->dc_needed[blkn] = TRUE;
440+      /* we don't need the ACs if producing a 1/8th-size image */
441+      entropy->ac_needed[blkn] = (compptr->_DCT_scaled_size > 1);
442+    } else {
443+      entropy->dc_needed[blkn] = entropy->ac_needed[blkn] = FALSE;
444+    }
445+  }
446+
447+  /* Initialize bitread state variables */
448+  entropy->bitstate.bits_left = 0;
449+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
450+  entropy->pub.insufficient_data = FALSE;
451+
452+  /* Initialize restart counter */
453+  entropy->restarts_to_go = cinfo->restart_interval;
454+}
455+
456+LOCAL(void)
457+jpeg_make_d_ac_derived_tbl(JHUFF_TBL *htbl, d_derived_tbl *dtbl, const unsigned int* huffcode)
458+{
459+  // Look up tables for AC, index is huffman code, value is the symbol and the length
460+  // htbl->bits[l], number of symbol that of which the code length is l
461+  // htbl->huffval[l], symbol in order
462+  int p, i, l, lookbits, ctr;
463+  // nb <= LOOKAHEAD
464+  p = 0;
465+  int coef0;
466+  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
467+    for (i = 1; i <= (int)htbl->bits[l]; i++, p++) {
468+      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
469+      /* Generate left-justified code followed by all possible bit sequences */
470+      UINT8 rs = htbl->huffval[p]; // run length symbol (zero num + coeff bits)
471+      UINT8 coef_bits = rs & 0x0F;
472+      if ((l + coef_bits) <= HUFF_LOOKAHEAD) {
473+        // save DCT coeffs in higher bits
474+        for (coef0 = 0; coef0 < (1 << coef_bits); coef0++) {
475+          INT16 coef_value = HUFF_EXTEND(coef0, coef_bits);  // save value after extended.
476+          lookbits = (huffcode[p] << (HUFF_LOOKAHEAD - l)) | (coef0 << (HUFF_LOOKAHEAD - l - coef_bits));
477+          for (ctr = 1 << (HUFF_LOOKAHEAD - l - coef_bits); ctr > 0; ctr--) {
478+            if (coef_bits == 0 && (rs >> 4) != 0xF) { // the low 4 bits are number of coef bits
479+              // use 63 to exit the loop when symbol is 00
480+              dtbl->lookup[lookbits] = MAKE_COEF1(coef_value) | MAKE_NB(l + coef_bits) | MAKE_ZERO_NUM1(63);
481+             } else { // F0 and other symbols
482+              // save the low 4 bits
483+              dtbl->lookup[lookbits] = MAKE_COEF1(coef_value) | MAKE_NB(l + coef_bits) | MAKE_ZERO_NUM1(rs >> 4);
484+            }
485+            lookbits++;
486+          }
487+        }
488+      } else {
489+        // same as the original lookup table
490+        lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l);
491+        for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) {
492+          dtbl->lookup[lookbits] = MAKE_NB(l) | MAKE_SYM(rs);
493+          lookbits++;
494+        }
495+      }
496+    }
497+  }
498+  // nb > LOOKAHEAD
499+  int offset = 0;
500+  int base = 1 << HUFF_LOOKAHEAD;
501+  int short_tbl_index = 0xFFFFFFFF;
502+  int cur_long_tbl_base = 1 << HUFF_LOOKAHEAD;
503+  int left;
504+  int offset_bit = 0;
505+  int first = p;  // the index of the first code of this length.
506+  int max_code_len;
507+  for (max_code_len = MAX_HUFF_CODE_LEN; max_code_len >= 1; max_code_len--) {
508+    if (htbl->bits[max_code_len]) {
509+        break;
510+    }
511+  }
512+  for (l = HUFF_LOOKAHEAD + 1; l <= MAX_HUFF_CODE_LEN; l++) {
513+    for (i = 1; i <= (int)htbl->bits[l]; i++, p++) {
514+      UINT8 rs = htbl->huffval[p]; // run length symbol (zero num + coeff bits)
515+      UINT8 coef_bits = rs & 0x0f;
516+      // similar as 1st table as before
517+      lookbits = huffcode[p] >> (l - HUFF_LOOKAHEAD); // index in 1st table
518+      // check if a new 2nd tbl should be created
519+      if (lookbits != short_tbl_index) {
520+        short_tbl_index = lookbits;
521+        cur_long_tbl_base += offset;
522+        offset = 0;
523+        offset_bit = l - HUFF_LOOKAHEAD;
524+        left = (1 << offset_bit) - (htbl->bits[l] - (p - first));
525+        while (offset_bit + HUFF_LOOKAHEAD < max_code_len && left > 0) {
526+          offset_bit++;
527+          left = (left << 1) - htbl->bits[offset_bit + HUFF_LOOKAHEAD];
528+        }
529+      }
530+      base = cur_long_tbl_base;
531+      // set 1st table value
532+      dtbl->lookup[lookbits] = MAKE_BASE(base) | MAKE_NB(l) | MAKE_EXTRA_BITS(offset_bit);
533+      // set 2nd table value
534+      // index is guarenteed to be valid
535+      for (ctr = 0; ctr < (1 << (offset_bit - (l - HUFF_LOOKAHEAD))); ctr++) {
536+        if (coef_bits == 0) {
537+          dtbl->lookup[base + offset] = MAKE_NB(l) | MAKE_SYM(rs) | MAKE_COEF_BITS(0xF);
538+        } else {
539+          dtbl->lookup[base + offset] = MAKE_NB(l) | MAKE_SYM(rs);
540+        }
541+        offset++;
542+      }
543+    }
544+    first = p;
545+  }
546+}
547+
548+/*
549+ * Compute the derived values for a Huffman table.
550+ * This routine also performs some validation checks on the table.
551+ *
552+ * Note this is also used by jdphuff.c.
553+ */
554+
555+GLOBAL(void)
556+jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno,
557+                        d_derived_tbl **pdtbl)
558+{
559+  JHUFF_TBL *htbl;
560+  d_derived_tbl *dtbl;
561+  int p, i, l, si, numsymbols;
562+  int lookbits, ctr;
563+  char huffsize[257];
564+  unsigned int huffcode[257];
565+  unsigned int code;
566+
567+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
568+   * paralleling the order of the symbols themselves in htbl->huffval[].
569+   */
570+
571+  /* Find the input Huffman table */
572+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
573+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
574+  htbl =
575+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
576+  if (htbl == NULL)
577+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
578+
579+  /* Allocate a workspace if we haven't already done so. */
580+  if (*pdtbl == NULL)
581+    *pdtbl = (d_derived_tbl *)
582+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
583+                                  sizeof(d_derived_tbl));
584+  dtbl = *pdtbl;
585+  dtbl->pub = htbl;             /* fill in back link */
586+
587+  /* Figure C.1: make table of Huffman code length for each symbol */
588+
589+  p = 0;
590+  for (l = 1; l <= MAX_HUFF_CODE_LEN; l++) {
591+    i = (int)htbl->bits[l];
592+    if (i < 0 || p + i > 256)   /* protect against table overrun, 256 is the max number of symbols */
593+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
594+    while (i--)
595+      huffsize[p++] = (char)l;
596+  }
597+  huffsize[p] = 0;
598+  numsymbols = p;
599+
600+  /* Figure C.2: generate the codes themselves */
601+  /* We also validate that the counts represent a legal Huffman code tree. */
602+
603+  code = 0;
604+  si = huffsize[0];
605+  p = 0;
606+  while (huffsize[p]) {
607+    while (((int)huffsize[p]) == si) {
608+      huffcode[p++] = code;
609+      code++;
610+    }
611+    /* code is now 1 more than the last code used for codelength si; but
612+     * it must still fit in si bits, since no code is allowed to be all ones.
613+     */
614+    if (((JLONG)code) >= (((JLONG)1) << si))
615+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
616+    code <<= 1;
617+    si++;
618+  }
619+
620+  /* Figure F.15: generate decoding tables for bit-sequential decoding */
621+
622+  p = 0;
623+  for (l = 1; l <= MAX_HUFF_CODE_LEN; l++) {
624+    if (htbl->bits[l]) {
625+      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
626+       * minus the minimum code of length l
627+       */
628+      dtbl->valoffset[l] = (JLONG)p - (JLONG)huffcode[p];
629+      p += htbl->bits[l];
630+      dtbl->maxcode[l] = huffcode[p - 1]; /* maximum code of length l */
631+    } else {
632+      dtbl->maxcode[l] = -1;    /* -1 if no codes of this length */
633+    }
634+  }
635+  dtbl->valoffset[17] = 0;  /* 17 is always max symbol length in Huffman spec */
636+  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates, 17 has the same meaning above */
637+
638+  /* Compute lookahead tables to speed up decoding.
639+   * First we set all the table entries to 0, indicating "too long";
640+   * then we iterate through the Huffman codes that are short enough and
641+   * fill in all the entries that correspond to bit sequences starting
642+   * with that code.
643+   */
644+
645+  for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++) {
646+    dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD;
647+  }
648+  if (!isDC) {
649+    jpeg_make_d_ac_derived_tbl(htbl, dtbl, huffcode);
650+  } else {
651+    for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++)
652+      dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD;
653+    p = 0;
654+    for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
655+      for (i = 1; i <= (int)htbl->bits[l]; i++, p++) {
656+        /* l = current code's length, p = its index in huffcode[] & huffval[]. */
657+        /* Generate left-justified code followed by all possible bit sequences */
658+        lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l);
659+        for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) {
660+          dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
661+          lookbits++;
662+        }
663+      }
664+    }
665+  }
666+
667+  /* Validate symbols as being reasonable.
668+   * For AC tables, we make no check, but accept all byte values 0..255.
669+   * For DC tables, we require the symbols to be in range 0..15.
670+   * (Tighter bounds could be applied depending on the data depth and mode,
671+   * but this is sufficient to ensure safe decoding.)
672+   */
673+  if (isDC) {
674+    for (i = 0; i < numsymbols; i++) {
675+      int sym = htbl->huffval[i];
676+      if (sym < 0 || sym > 15)  // 15 is the max value of DC symbol
677+        ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
678+    }
679+  }
680+}
681+
682+
683+/*
684+ * Out-of-line code for bit fetching (shared with jdphuff.c).
685+ * See jdhuff.h for info about usage.
686+ * Note: current values of get_buffer and bits_left are passed as parameters,
687+ * but are returned in the corresponding fields of the state struct.
688+ *
689+ * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
690+ * of get_buffer to be used.  (On machines with wider words, an even larger
691+ * buffer could be used.)  However, on some machines 32-bit shifts are
692+ * quite slow and take time proportional to the number of places shifted.
693+ * (This is true with most PC compilers, for instance.)  In this case it may
694+ * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
695+ * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
696+ */
697+
698+#ifdef SLOW_SHIFT_32
699+#define MIN_GET_BITS  15        /* minimum allowable value */
700+#else
701+#define MIN_GET_BITS  (BIT_BUF_SIZE - 7)
702+#endif
703+
704+
705+GLOBAL(boolean)
706+jpeg_fill_bit_buffer(bitread_working_state *state,
707+                     register bit_buf_type get_buffer, register int bits_left,
708+                     int nbits)
709+/* Load up the bit buffer to a depth of at least nbits */
710+{
711+  /* Copy heavily used state fields into locals (hopefully registers) */
712+  register const JOCTET *next_input_byte = state->next_input_byte;
713+  register size_t bytes_in_buffer = state->bytes_in_buffer;
714+  j_decompress_ptr cinfo = state->cinfo;
715+
716+  /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
717+  /* (It is assumed that no request will be for more than that many bits.) */
718+  /* We fail to do so only if we hit a marker or are forced to suspend. */
719+
720+  if (cinfo->unread_marker == 0) {      /* cannot advance past a marker */
721+    while (bits_left < MIN_GET_BITS) {
722+      register int c;
723+
724+      /* Attempt to read a byte */
725+      if (bytes_in_buffer == 0) {
726+        if (!(*cinfo->src->fill_input_buffer) (cinfo))
727+          return FALSE;
728+        next_input_byte = cinfo->src->next_input_byte;
729+        bytes_in_buffer = cinfo->src->bytes_in_buffer;
730+      }
731+      bytes_in_buffer--;
732+      c = *next_input_byte++;
733+
734+      /* If it's 0xFF, check and discard stuffed zero byte */
735+      if (c == 0xFF) {
736+        /* Loop here to discard any padding FF's on terminating marker,
737+         * so that we can save a valid unread_marker value.  NOTE: we will
738+         * accept multiple FF's followed by a 0 as meaning a single FF data
739+         * byte.  This data pattern is not valid according to the standard.
740+         */
741+        do {
742+          if (bytes_in_buffer == 0) {
743+            if (!(*cinfo->src->fill_input_buffer) (cinfo))
744+              return FALSE;
745+            next_input_byte = cinfo->src->next_input_byte;
746+            bytes_in_buffer = cinfo->src->bytes_in_buffer;
747+          }
748+          bytes_in_buffer--;
749+          c = *next_input_byte++;
750+        } while (c == 0xFF);
751+
752+        if (c == 0) {
753+          /* Found FF/00, which represents an FF data byte */
754+          c = 0xFF;
755+        } else {
756+          /* Oops, it's actually a marker indicating end of compressed data.
757+           * Save the marker code for later use.
758+           * Fine point: it might appear that we should save the marker into
759+           * bitread working state, not straight into permanent state.  But
760+           * once we have hit a marker, we cannot need to suspend within the
761+           * current MCU, because we will read no more bytes from the data
762+           * source.  So it is OK to update permanent state right away.
763+           */
764+          cinfo->unread_marker = c;
765+          /* See if we need to insert some fake zero bits. */
766+          goto no_more_bytes;
767+        }
768+      }
769+
770+      /* OK, load c into get_buffer */
771+      get_buffer = (get_buffer << 8) | c; // read 8 bits every time
772+      bits_left += 8; // read 8 bits every time
773+    } /* end while */
774+  } else {
775+no_more_bytes:
776+    /* We get here if we've read the marker that terminates the compressed
777+     * data segment.  There should be enough bits in the buffer register
778+     * to satisfy the request; if so, no problem.
779+     */
780+    if (nbits > bits_left) {
781+      /* Uh-oh.  Report corrupted data to user and stuff zeroes into
782+       * the data stream, so that we can produce some kind of image.
783+       * We use a nonvolatile flag to ensure that only one warning message
784+       * appears per data segment.
785+       */
786+      if (!cinfo->entropy->insufficient_data) {
787+        WARNMS(cinfo, JWRN_HIT_MARKER);
788+        cinfo->entropy->insufficient_data = TRUE;
789+      }
790+      /* Fill the buffer with zero bits */
791+      get_buffer <<= MIN_GET_BITS - bits_left;
792+      bits_left = MIN_GET_BITS;
793+    }
794+  }
795+
796+  /* Unload the local registers */
797+  state->next_input_byte = next_input_byte;
798+  state->bytes_in_buffer = bytes_in_buffer;
799+  state->get_buffer = get_buffer;
800+  state->bits_left = bits_left;
801+
802+  return TRUE;
803+}
804+
805+
806+/* Macro version of the above, which performs much better but does not
807+   handle markers.  We have to hand off any blocks with markers to the
808+   slower routines. */
809+
810+#define GET_BYTE { \
811+  register int c0, c1; \
812+  c0 = *buffer++; \
813+  c1 = *buffer; \
814+  /* Pre-execute most common case */ \
815+  get_buffer = (get_buffer << 8) | c0; \
816+  bits_left += 8; \
817+  if (c0 == 0xFF) { \
818+    /* Pre-execute case of FF/00, which represents an FF data byte */ \
819+    buffer++; \
820+    if (c1 != 0) { \
821+      /* Oops, it's actually a marker indicating end of compressed data. */ \
822+      cinfo->unread_marker = c1; \
823+      /* Back out pre-execution and fill the buffer with zero bits */ \
824+      buffer -= 2; \
825+      get_buffer &= ~0xFF; \
826+    } \
827+  } \
828+}
829+
830+#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__))
831+
832+/* Pre-fetch 48 bytes, because the holding register is 64-bit */
833+#define FILL_BIT_BUFFER_FAST \
834+  if (bits_left <= 16) { \
835+    GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE \
836+  }
837+
838+#else
839+
840+/* Pre-fetch 16 bytes, because the holding register is 32-bit */
841+#define FILL_BIT_BUFFER_FAST \
842+  if (bits_left <= 16) { \
843+    GET_BYTE GET_BYTE \
844+  }
845+
846+#endif
847+
848+
849+/*
850+ * Out-of-line code for Huffman code decoding.
851+ * See jdhuff.h for info about usage.
852+ */
853+
854+GLOBAL(int)
855+jpeg_huff_decode(bitread_working_state *state,
856+                 register bit_buf_type get_buffer, register int bits_left,
857+                 d_derived_tbl *htbl, int min_bits)
858+{
859+  register int l = min_bits;
860+  register JLONG code;
861+
862+  /* HUFF_DECODE has determined that the code is at least min_bits */
863+  /* bits long, so fetch that many bits in one swoop. */
864+
865+  CHECK_BIT_BUFFER(*state, l, return -1);
866+  code = GET_BITS(l);
867+
868+  /* Collect the rest of the Huffman code one bit at a time. */
869+  /* This is per Figure F.16. */
870+
871+  while (code > htbl->maxcode[l]) {
872+    code <<= 1;
873+    CHECK_BIT_BUFFER(*state, 1, return -1);
874+    code |= GET_BITS(1);
875+    l++;
876+  }
877+
878+  /* Unload the local registers */
879+  state->get_buffer = get_buffer;
880+  state->bits_left = bits_left;
881+
882+  /* With garbage input we may reach the sentinel value l = 17. */
883+
884+  if (l > MAX_HUFF_CODE_LEN) {
885+    WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
886+    return 0;                   /* fake a zero as the safest result */
887+  }
888+
889+  return htbl->pub->huffval[(int)(code + htbl->valoffset[l])];
890+}
891+
892+/*
893+ * Check for a restart marker & resynchronize decoder.
894+ * Returns FALSE if must suspend.
895+ */
896+
897+LOCAL(boolean)
898+process_restart(j_decompress_ptr cinfo)
899+{
900+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
901+  int ci;
902+
903+  /* Throw away any unused bits remaining in bit buffer; */
904+  /* include any full bytes in next_marker's count of discarded bytes */
905+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8; // 8 bits in a byte
906+  entropy->bitstate.bits_left = 0;
907+
908+  /* Advance past the RSTn marker */
909+  if (!(*cinfo->marker->read_restart_marker) (cinfo))
910+    return FALSE;
911+
912+  /* Re-initialize DC predictions to 0 */
913+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
914+    entropy->saved.last_dc_val[ci] = 0;
915+
916+  /* Reset restart counter */
917+  entropy->restarts_to_go = cinfo->restart_interval;
918+
919+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
920+   * against a marker.  In that case we will end up treating the next data
921+   * segment as empty, and we can avoid producing bogus output pixels by
922+   * leaving the flag set.
923+   */
924+  if (cinfo->unread_marker == 0)
925+    entropy->pub.insufficient_data = FALSE;
926+
927+  return TRUE;
928+}
929+
930+
931+#if defined(__has_feature)
932+#if __has_feature(undefined_behavior_sanitizer)
933+__attribute__((no_sanitize("signed-integer-overflow"),
934+               no_sanitize("unsigned-integer-overflow")))
935+#endif
936+#endif
937+LOCAL(boolean)
938+decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
939+{
940+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
941+  BITREAD_STATE_VARS;
942+  int blkn;
943+  savable_state state;
944+  /* Outer loop handles each block in the MCU */
945+
946+  /* Load up working state */
947+  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
948+  state = entropy->saved;
949+
950+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
951+    JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
952+    d_derived_tbl *dctbl = entropy->dc_cur_tbls[blkn];
953+    d_derived_tbl *actbl = entropy->ac_cur_tbls[blkn];
954+    register int s, k, r;
955+
956+    /* Decode a single block's worth of coefficients */
957+
958+    /* Section F.2.2.1: decode the DC coefficient difference */
959+    HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
960+    if (s) {
961+      CHECK_BIT_BUFFER(br_state, s, return FALSE);
962+      r = GET_BITS(s);
963+      s = HUFF_EXTEND(r, s);
964+    }
965+
966+    if (entropy->dc_needed[blkn]) {
967+      /* Convert DC difference to actual value, update last_dc_val */
968+      int ci = cinfo->MCU_membership[blkn];
969+      /* Certain malformed JPEG images produce repeated DC coefficient
970+       * differences of 2047 or -2047, which causes state.last_dc_val[ci] to
971+       * grow until it overflows or underflows a 32-bit signed integer.  This
972+       * behavior is, to the best of our understanding, innocuous, and it is
973+       * unclear how to work around it without potentially affecting
974+       * performance.  Thus, we (hopefully temporarily) suppress UBSan integer
975+       * overflow errors for this function and decode_mcu_fast().
976+       */
977+      s += state.last_dc_val[ci];
978+      state.last_dc_val[ci] = s;
979+      if (block) {
980+        /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
981+        (*block)[0] = (JCOEF)s;
982+      }
983+    }
984+
985+    if (entropy->ac_needed[blkn] && block) {
986+      /* Section F.2.2.2: decode the AC coefficients */
987+      /* Since zeroes are skipped, output area must be cleared beforehand */
988+      for (k = 1; k < DCTSIZE2; k++) {
989+        register int nb, look;
990+        if (bits_left < HUFF_LOOKAHEAD) {
991+          if (!jpeg_fill_bit_buffer(&br_state, get_buffer, bits_left, 0)) {
992+            return FALSE;
993+          }
994+          get_buffer = br_state.get_buffer;
995+          bits_left = br_state.bits_left;
996+          if (bits_left < HUFF_LOOKAHEAD) {
997+            nb = 1;
998+            goto slowlabel;
999+          }
1000+        }
1001+        look = PEEK_BITS(HUFF_LOOKAHEAD);
1002+        r = actbl->lookup[look];
1003+        nb = GET_NB(r);
1004+        unsigned int zero_num;
1005+        unsigned int coef_bits = GET_COEF_BITS(r);
1006+        if (nb <= HUFF_LOOKAHEAD) {
1007+          DROP_BITS(nb);
1008+          s = actbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1);
1009+          zero_num = GET_ZERO_NUM1(r);
1010+          k += zero_num;
1011+          if (coef_bits == 0) {
1012+            s = GET_COEF1(r);
1013+            (*block)[jpeg_natural_order[k]] = (JCOEF)s;
1014+          } else {
1015+            CHECK_BIT_BUFFER(br_state, (int)coef_bits, return FALSE);
1016+            r = GET_BITS(coef_bits);
1017+            s = HUFF_EXTEND(r, coef_bits);
1018+            (*block)[jpeg_natural_order[k]] = (JCOEF)s;
1019+          }
1020+        } else {
1021+        slowlabel:
1022+          nb = 1;
1023+          if ((s = jpeg_huff_decode(&br_state, get_buffer, bits_left, actbl, nb)) < 0) { return FALSE; }
1024+          get_buffer = br_state.get_buffer;
1025+          bits_left = br_state.bits_left;
1026+
1027+          r = s >> 4; // get higher 4 bits
1028+          s &= 15;  // use 15 as a mask to get the lower 4 bits
1029+
1030+          if (s) {
1031+            k += r;
1032+            CHECK_BIT_BUFFER(br_state, s, return FALSE);
1033+            r = GET_BITS(s);
1034+            s = HUFF_EXTEND(r, s);
1035+            /* Output coefficient in natural (dezigzagged) order.
1036+             * Note: the extra entries in jpeg_natural_order[] will save us
1037+             * if k >= DCTSIZE2, which could happen if the data is corrupted.
1038+             */
1039+            (*block)[jpeg_natural_order[k]] = (JCOEF)s;
1040+          } else {
1041+            if (r != 15)  // 15 = 0xF0 is a special symbol means 16 zeros in RLE coding
1042+              break;
1043+            k += 15;  // use 15 to skip DCT coef zero
1044+          }
1045+        }
1046+      }
1047+    } else {
1048+      /* Section F.2.2.2: decode the AC coefficients */
1049+      /* In this path we just discard the values */
1050+      for (k = 1; k < DCTSIZE2; k++) {
1051+        register int nb, look;
1052+        if (bits_left < HUFF_LOOKAHEAD) {
1053+          if (!jpeg_fill_bit_buffer(&br_state, get_buffer, bits_left, 0)) {
1054+            return FALSE;
1055+          }
1056+          get_buffer = br_state.get_buffer;
1057+          bits_left = br_state.bits_left;
1058+          if (bits_left < HUFF_LOOKAHEAD) {
1059+            nb = 1;
1060+            goto slowlabel2;
1061+          }
1062+        }
1063+        look = PEEK_BITS(HUFF_LOOKAHEAD);
1064+        r = actbl->lookup[look];
1065+        nb = GET_NB(r);
1066+        unsigned int zero_num;
1067+        unsigned int coef_bits = GET_COEF_BITS(r);
1068+        if (nb <= HUFF_LOOKAHEAD) {
1069+          DROP_BITS(nb);
1070+          s = actbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1);
1071+          zero_num = GET_ZERO_NUM1(r);
1072+          k += zero_num;
1073+          if (coef_bits != 0) {
1074+            CHECK_BIT_BUFFER(br_state, (int)coef_bits, return FALSE);
1075+            DROP_BITS(coef_bits);
1076+          }
1077+        } else {
1078+        slowlabel2:
1079+          nb = 1;
1080+          if ((s = jpeg_huff_decode(&br_state, get_buffer, bits_left, actbl, nb)) < 0) { return FALSE; }
1081+          get_buffer = br_state.get_buffer;
1082+          bits_left = br_state.bits_left;
1083+
1084+          r = s >> 4; // get higher 4 bits
1085+          s &= 15; // use 15 as a mask to get the lower 4 bits
1086+
1087+          if (s) {
1088+            k += r;
1089+            CHECK_BIT_BUFFER(br_state, s, return FALSE);
1090+            DROP_BITS(s);
1091+          } else {
1092+            if (r != 15) // 15 = 0xF0 is a special symbol means 16 zeros in RLE coding
1093+              break;
1094+            k += 15;  // use 15 to skip DCT coef zero
1095+          }
1096+        }
1097+      }
1098+    }
1099+  }
1100+
1101+  /* Completed MCU, so update state */
1102+  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
1103+  entropy->saved = state;
1104+  return TRUE;
1105+}
1106+
1107+
1108+#if defined(__has_feature)
1109+#if __has_feature(undefined_behavior_sanitizer)
1110+__attribute__((no_sanitize("signed-integer-overflow"),
1111+               no_sanitize("unsigned-integer-overflow")))
1112+#endif
1113+#endif
1114+LOCAL(boolean)
1115+decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
1116+{
1117+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
1118+  BITREAD_STATE_VARS;
1119+  JOCTET *buffer;
1120+  int blkn;
1121+  savable_state state;
1122+  /* Outer loop handles each block in the MCU */
1123+
1124+  /* Load up working state */
1125+  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
1126+  buffer = (JOCTET *)br_state.next_input_byte;
1127+  state = entropy->saved;
1128+
1129+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
1130+    JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
1131+    d_derived_tbl *dctbl = entropy->dc_cur_tbls[blkn];
1132+    d_derived_tbl *actbl = entropy->ac_cur_tbls[blkn];
1133+    register int s, k, r, l;
1134+
1135+    HUFF_DECODE_FAST(s, l, dctbl);
1136+    if (s) {
1137+      FILL_BIT_BUFFER_FAST
1138+      r = GET_BITS(s);
1139+      s = HUFF_EXTEND(r, s);
1140+    }
1141+
1142+    if (entropy->dc_needed[blkn]) {
1143+      int ci = cinfo->MCU_membership[blkn];
1144+      /* Refer to the comment in decode_mcu_slow() regarding the supression of
1145+       * a UBSan integer overflow error in this line of code.
1146+       */
1147+      s += state.last_dc_val[ci];
1148+      state.last_dc_val[ci] = s;
1149+      if (block)
1150+        (*block)[0] = (JCOEF)s;
1151+    }
1152+
1153+    if (entropy->ac_needed[blkn] && block) {
1154+      for (k = 1; k < DCTSIZE2; k++) {
1155+        FILL_BIT_BUFFER_FAST;
1156+        r = PEEK_BITS(HUFF_LOOKAHEAD);    // 先读取look_ahead位
1157+        r = actbl->lookup[r];
1158+        l = GET_NB(r);
1159+        unsigned int zero_num;
1160+        unsigned int coef_bits = GET_COEF_BITS(r);
1161+
1162+        if (l <= HUFF_LOOKAHEAD) {
1163+          zero_num = GET_ZERO_NUM1(r);
1164+          DROP_BITS(l);
1165+          if (coef_bits == 0) {
1166+            s = GET_COEF1(r);
1167+            k += zero_num;
1168+            (*block)[jpeg_natural_order[k]] = (JCOEF)s;
1169+          } else {
1170+            FILL_BIT_BUFFER_FAST
1171+            r = GET_BITS(coef_bits);
1172+            s = HUFF_EXTEND(r, coef_bits);
1173+            k += zero_num;
1174+            (*block)[jpeg_natural_order[k]] = (JCOEF)s;
1175+          }
1176+        } else {
1177+          unsigned int base = GET_BASE(r);  // 高16位为base
1178+          unsigned int offset_bits = GET_EXTRA_BITS(r);  // 低8位为offset_bits, l = nb 为二级表的最大码长
1179+          r = PEEK_BITS(l); // 前HUFF_LOOKAHEAD位已使用,只使用低nb - HUFF_LOOKAHEAD位, 取低offset_bits作为二级表索引
1180+          s = actbl->lookup[base + (r & ((1 << offset_bits) - 1))];
1181+          l = GET_NB(s); // 实际码长
1182+          coef_bits = GET_COEF_BITS(s);
1183+          zero_num = GET_ZERO_NUM1(s);
1184+          DROP_BITS(l);
1185+          if (coef_bits == 0xF) {
1186+            if (zero_num != 0xF) {
1187+              break;
1188+            } else {
1189+              k += 15; // use 15 to skip DCT coef zero
1190+            }
1191+          } else {
1192+            FILL_BIT_BUFFER_FAST
1193+            r = GET_BITS(coef_bits);
1194+            s = HUFF_EXTEND(r, coef_bits);
1195+            k += zero_num;
1196+            (*block)[jpeg_natural_order[k]] = (JCOEF)s;
1197+          }
1198+        }
1199+      }
1200+    } else {
1201+      for (k = 1; k < DCTSIZE2; k++) {
1202+        FILL_BIT_BUFFER_FAST;
1203+        r = PEEK_BITS(HUFF_LOOKAHEAD);    // 先读取look_ahead位
1204+        r = actbl->lookup[r];
1205+        l = GET_NB(r);
1206+        unsigned int zero_num;
1207+        unsigned int coef_bits = GET_COEF_BITS(r);
1208+
1209+        if (l <= HUFF_LOOKAHEAD) {
1210+          zero_num = GET_ZERO_NUM1(r);
1211+          DROP_BITS(l);
1212+          if (coef_bits == 0) {
1213+            s = GET_COEF1(r);
1214+            k += zero_num;
1215+          } else {
1216+            FILL_BIT_BUFFER_FAST
1217+            DROP_BITS(coef_bits);
1218+            k += zero_num;
1219+          }
1220+        } else {
1221+          unsigned int base = GET_BASE(r);  // 高16位为base
1222+          unsigned int offset_bits = GET_EXTRA_BITS(r);  // 低8位为offset_bits, l = nb 为二级表的最大码长
1223+          r = PEEK_BITS(l); // 前HUFF_LOOKAHEAD位已使用,只使用低nb - HUFF_LOOKAHEAD位, 取低offset_bits作为二级表索引
1224+          s = actbl->lookup[base + (r & ((1 << offset_bits) - 1))];
1225+          l = GET_NB(s); // 实际码长
1226+          coef_bits = GET_COEF_BITS(s);
1227+          zero_num = GET_ZERO_NUM1(s);
1228+          DROP_BITS(l);
1229+          if (coef_bits == 0xF) {
1230+            if (zero_num != 0xF) {
1231+              break;
1232+            } else {
1233+              k += 15; // use 15 to skip DCT coef zero
1234+            }
1235+          } else {
1236+            FILL_BIT_BUFFER_FAST
1237+            DROP_BITS(coef_bits);
1238+            k += zero_num;
1239+          }
1240+        }
1241+      }
1242+    }
1243+  }
1244+
1245+  if (cinfo->unread_marker != 0) {
1246+    cinfo->unread_marker = 0;
1247+    return FALSE;
1248+  }
1249+
1250+  br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
1251+  br_state.next_input_byte = buffer;
1252+  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
1253+  entropy->saved = state;
1254+  return TRUE;
1255+}
1256+
1257+
1258+/*
1259+ * Decode and return one MCU's worth of Huffman-compressed coefficients.
1260+ * The coefficients are reordered from zigzag order into natural array order,
1261+ * but are not dequantized.
1262+ *
1263+ * The i'th block of the MCU is stored into the block pointed to by
1264+ * MCU_data[i].  WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER.
1265+ * (Wholesale zeroing is usually a little faster than retail...)
1266+ *
1267+ * Returns FALSE if data source requested suspension.  In that case no
1268+ * changes have been made to permanent state.  (Exception: some output
1269+ * coefficients may already have been assigned.  This is harmless for
1270+ * this module, since we'll just re-assign them on the next call.)
1271+ */
1272+
1273+#define BUFSIZE  (DCTSIZE2 * 8)
1274+
1275+METHODDEF(boolean)
1276+decode_mcu(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
1277+{
1278+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
1279+  int usefast = 1;
1280+
1281+  /* Process restart marker if needed; may have to suspend */
1282+  if (cinfo->restart_interval) {
1283+    if (entropy->restarts_to_go == 0)
1284+      if (!process_restart(cinfo))
1285+        return FALSE;
1286+    usefast = 0;
1287+  }
1288+
1289+  if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU ||
1290+      cinfo->unread_marker != 0)
1291+    usefast = 0;
1292+
1293+  /* If we've run out of data, just leave the MCU set to zeroes.
1294+   * This way, we return uniform gray for the remainder of the segment.
1295+   */
1296+  if (!entropy->pub.insufficient_data) {
1297+    if (usefast) {
1298+      if (!decode_mcu_fast(cinfo, MCU_data)) goto use_slow;
1299+    } else {
1300+use_slow:
1301+      if (!decode_mcu_slow(cinfo, MCU_data)) return FALSE;
1302+    }
1303+  }
1304+
1305+  /* Account for restart interval (no-op if not using restarts) */
1306+  if (cinfo->restart_interval)
1307+    entropy->restarts_to_go--;
1308+
1309+  return TRUE;
1310+}
1311+
1312+
1313+/*
1314+ * Module initialization routine for Huffman entropy decoding.
1315+ */
1316+
1317+GLOBAL(void)
1318+jinit_huff_decoder(j_decompress_ptr cinfo)
1319+{
1320+  huff_entropy_ptr entropy;
1321+  int i;
1322+
1323+  /* Motion JPEG frames typically do not include the Huffman tables if they
1324+     are the default tables.  Thus, if the tables are not set by the time
1325+     the Huffman decoder is initialized (usually within the body of
1326+     jpeg_start_decompress()), we set them to default values. */
1327+  std_huff_tables((j_common_ptr)cinfo);
1328+
1329+  entropy = (huff_entropy_ptr)
1330+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
1331+                                sizeof(huff_entropy_decoder));
1332+  cinfo->entropy = (struct jpeg_entropy_decoder *)entropy;
1333+  entropy->pub.start_pass = start_pass_huff_decoder;
1334+  entropy->pub.decode_mcu = decode_mcu;
1335+
1336+  /* Mark tables unallocated */
1337+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
1338+    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
1339+  }
1340+}
1341diff --git a/jdphuff.c b/jdphuff.c
1342index c6d82ca..25e3b1d 100644
1343--- a/jdphuff.c
1344+++ b/jdphuff.c
1345@@ -71,6 +71,129 @@ METHODDEF(boolean) decode_mcu_DC_refine(j_decompress_ptr cinfo,
1346 METHODDEF(boolean) decode_mcu_AC_refine(j_decompress_ptr cinfo,
1347                                         JBLOCKROW *MCU_data);
1348 
1349+#ifdef HUFF_DECODE_OPT
1350+// OH ISSUE: jpeg optimize
1351+LOCAL(void)
1352+jpeg_make_dp_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno,
1353+                        d_derived_tbl **pdtbl)
1354+{
1355+  JHUFF_TBL *htbl;
1356+  d_derived_tbl *dtbl;
1357+  int p, i, l, si, numsymbols;
1358+  int lookbits, ctr;
1359+  char huffsize[257];
1360+  unsigned int huffcode[257];
1361+  unsigned int code;
1362+
1363+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
1364+   * paralleling the order of the symbols themselves in htbl->huffval[].
1365+   */
1366+
1367+  /* Find the input Huffman table */
1368+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
1369+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
1370+  htbl =
1371+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
1372+  if (htbl == NULL)
1373+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
1374+
1375+  /* Allocate a workspace if we haven't already done so. */
1376+  if (*pdtbl == NULL)
1377+    *pdtbl = (d_derived_tbl *)
1378+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
1379+                                  sizeof(d_derived_tbl));
1380+  dtbl = *pdtbl;
1381+  dtbl->pub = htbl;             /* fill in back link */
1382+
1383+  /* Figure C.1: make table of Huffman code length for each symbol */
1384+
1385+  p = 0;
1386+  for (l = 1; l <= MAX_HUFF_CODE_LEN; l++) {
1387+    i = (int)htbl->bits[l];
1388+    if (i < 0 || p + i > 256)   /* protect against table overrun, 256 is the max number of symbols */
1389+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
1390+    while (i--)
1391+      huffsize[p++] = (char)l;
1392+  }
1393+  huffsize[p] = 0;
1394+  numsymbols = p;
1395+
1396+  /* Figure C.2: generate the codes themselves */
1397+  /* We also validate that the counts represent a legal Huffman code tree. */
1398+
1399+  code = 0;
1400+  si = huffsize[0];
1401+  p = 0;
1402+  while (huffsize[p]) {
1403+    while (((int)huffsize[p]) == si) {
1404+      huffcode[p++] = code;
1405+      code++;
1406+    }
1407+    /* code is now 1 more than the last code used for codelength si; but
1408+     * it must still fit in si bits, since no code is allowed to be all ones.
1409+     */
1410+    if (((JLONG)code) >= (((JLONG)1) << si))
1411+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
1412+    code <<= 1;
1413+    si++;
1414+  }
1415+
1416+  /* Figure F.15: generate decoding tables for bit-sequential decoding */
1417+
1418+  p = 0;
1419+  for (l = 1; l <= MAX_HUFF_CODE_LEN; l++) {
1420+    if (htbl->bits[l]) {
1421+      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
1422+       * minus the minimum code of length l
1423+       */
1424+      dtbl->valoffset[l] = (JLONG)p - (JLONG)huffcode[p];
1425+      p += htbl->bits[l];
1426+      dtbl->maxcode[l] = huffcode[p - 1]; /* maximum code of length l */
1427+    } else {
1428+      dtbl->maxcode[l] = -1;    /* -1 if no codes of this length */
1429+    }
1430+  }
1431+  dtbl->valoffset[17] = 0; /* 17 is always max symbol length in Huffman spec */
1432+  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates, 17 has the same meaning above */
1433+
1434+  /* Compute lookahead tables to speed up decoding.
1435+   * First we set all the table entries to 0, indicating "too long";
1436+   * then we iterate through the Huffman codes that are short enough and
1437+   * fill in all the entries that correspond to bit sequences starting
1438+   * with that code.
1439+   */
1440+
1441+  for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++)
1442+    dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD;
1443+
1444+  p = 0;
1445+  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
1446+    for (i = 1; i <= (int)htbl->bits[l]; i++, p++) {
1447+      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
1448+      /* Generate left-justified code followed by all possible bit sequences */
1449+      lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l);
1450+      for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) {
1451+        dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
1452+        lookbits++;
1453+      }
1454+    }
1455+  }
1456+
1457+  /* Validate symbols as being reasonable.
1458+   * For AC tables, we make no check, but accept all byte values 0..255.
1459+   * For DC tables, we require the symbols to be in range 0..15.
1460+   * (Tighter bounds could be applied depending on the data depth and mode,
1461+   * but this is sufficient to ensure safe decoding.)
1462+   */
1463+  if (isDC) {
1464+    for (i = 0; i < numsymbols; i++) {
1465+      int sym = htbl->huffval[i];
1466+      if (sym < 0 || sym > 15) // 15 is the max value of DC symbol
1467+        ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
1468+    }
1469+  }
1470+}
1471+#endif
1472 
1473 /*
1474  * Initialize for a Huffman-compressed scan.
1475@@ -163,12 +286,22 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo)
1476       if (cinfo->Ah == 0) {     /* DC refinement needs no table */
1477         tbl = compptr->dc_tbl_no;
1478         pdtbl = (d_derived_tbl **)(entropy->derived_tbls) + tbl;
1479+#ifdef HUFF_DECODE_OPT
1480+        // OH ISSUE: jpeg optimize
1481+        jpeg_make_dp_derived_tbl(cinfo, TRUE, tbl, pdtbl);
1482+#else
1483         jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, pdtbl);
1484+#endif
1485       }
1486     } else {
1487       tbl = compptr->ac_tbl_no;
1488       pdtbl = (d_derived_tbl **)(entropy->derived_tbls) + tbl;
1489+#ifdef HUFF_DECODE_OPT
1490+      // OH ISSUE: jpeg optimize
1491+      jpeg_make_dp_derived_tbl(cinfo, FALSE, tbl, pdtbl);
1492+#else
1493       jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, pdtbl);
1494+#endif
1495       /* remember the single active table */
1496       entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
1497     }
1498diff --git a/jmemsys.h b/jmemsys.h
1499index 9229550..a17f46c 100644
1500--- a/jmemsys.h
1501+++ b/jmemsys.h
1502@@ -31,8 +31,8 @@
1503  * size of the object being freed, just in case it's needed.
1504  */
1505 
1506-EXTERN(void *) jpeg_get_small(j_common_ptr cinfo, size_t sizeofobject);
1507-EXTERN(void) jpeg_free_small(j_common_ptr cinfo, void *object,
1508+HIDE(EXTERN)(void *) jpeg_get_small(j_common_ptr cinfo, size_t sizeofobject);
1509+HIDE(EXTERN)(void) jpeg_free_small(j_common_ptr cinfo, void *object,
1510                              size_t sizeofobject);
1511 
1512 /*
1513@@ -43,8 +43,8 @@ EXTERN(void) jpeg_free_small(j_common_ptr cinfo, void *object,
1514  * large chunks.
1515  */
1516 
1517-EXTERN(void *) jpeg_get_large(j_common_ptr cinfo, size_t sizeofobject);
1518-EXTERN(void) jpeg_free_large(j_common_ptr cinfo, void *object,
1519+HIDE(EXTERN)(void *) jpeg_get_large(j_common_ptr cinfo, size_t sizeofobject);
1520+HIDE(EXTERN)(void) jpeg_free_large(j_common_ptr cinfo, void *object,
1521                              size_t sizeofobject);
1522 
1523 /*
1524@@ -84,7 +84,7 @@ EXTERN(void) jpeg_free_large(j_common_ptr cinfo, void *object,
1525  * Conversely, zero may be returned to always use the minimum amount of memory.
1526  */
1527 
1528-EXTERN(size_t) jpeg_mem_available(j_common_ptr cinfo, size_t min_bytes_needed,
1529+HIDE(EXTERN)(size_t) jpeg_mem_available(j_common_ptr cinfo, size_t min_bytes_needed,
1530                                   size_t max_bytes_needed,
1531                                   size_t already_allocated);
1532 
1533@@ -157,7 +157,7 @@ typedef struct backing_store_struct {
1534  * just take an error exit.)
1535  */
1536 
1537-EXTERN(void) jpeg_open_backing_store(j_common_ptr cinfo,
1538+HIDE(EXTERN)(void) jpeg_open_backing_store(j_common_ptr cinfo,
1539                                      backing_store_ptr info,
1540                                      long total_bytes_needed);
1541 
1542@@ -174,5 +174,5 @@ EXTERN(void) jpeg_open_backing_store(j_common_ptr cinfo,
1543  * all opened backing-store objects have been closed.
1544  */
1545 
1546-EXTERN(long) jpeg_mem_init(j_common_ptr cinfo);
1547-EXTERN(void) jpeg_mem_term(j_common_ptr cinfo);
1548+HIDE(EXTERN)(long) jpeg_mem_init(j_common_ptr cinfo);
1549+HIDE(EXTERN)(void) jpeg_mem_term(j_common_ptr cinfo);
1550diff --git a/jmorecfg.h b/jmorecfg.h
1551index 1a63a14..f4b3903 100644
1552--- a/jmorecfg.h
1553+++ b/jmorecfg.h
1554@@ -178,7 +178,12 @@ typedef unsigned int JDIMENSION;
1555 #define GLOBAL(type)            type
1556 /* a reference to a GLOBAL function: */
1557 #define EXTERN(type)            extern type
1558-
1559+/* a function used only in this lib: */
1560+#ifdef HIDE_INTERNAL
1561+  #define HIDE(type)            __attribute__((visibility("hidden"))) type
1562+#else
1563+  #define HIDE(type)            type
1564+#endif
1565 
1566 /* Originally, this macro was used as a way of defining function prototypes
1567  * for both modern compilers as well as older compilers that did not support
1568diff --git a/jpegint.h b/jpegint.h
1569index 8c85347..7798a89 100644
1570--- a/jpegint.h
1571+++ b/jpegint.h
1572@@ -318,53 +318,53 @@ struct jpeg_color_quantizer {
1573 
1574 
1575 /* Compression module initialization routines */
1576-EXTERN(void) jinit_compress_master(j_compress_ptr cinfo);
1577-EXTERN(void) jinit_c_master_control(j_compress_ptr cinfo,
1578+HIDE(EXTERN)(void) jinit_compress_master(j_compress_ptr cinfo);
1579+HIDE(EXTERN)(void) jinit_c_master_control(j_compress_ptr cinfo,
1580                                     boolean transcode_only);
1581-EXTERN(void) jinit_c_main_controller(j_compress_ptr cinfo,
1582+HIDE(EXTERN)(void) jinit_c_main_controller(j_compress_ptr cinfo,
1583                                      boolean need_full_buffer);
1584-EXTERN(void) jinit_c_prep_controller(j_compress_ptr cinfo,
1585+HIDE(EXTERN)(void) jinit_c_prep_controller(j_compress_ptr cinfo,
1586                                      boolean need_full_buffer);
1587-EXTERN(void) jinit_c_coef_controller(j_compress_ptr cinfo,
1588+HIDE(EXTERN)(void) jinit_c_coef_controller(j_compress_ptr cinfo,
1589                                      boolean need_full_buffer);
1590-EXTERN(void) jinit_color_converter(j_compress_ptr cinfo);
1591-EXTERN(void) jinit_downsampler(j_compress_ptr cinfo);
1592-EXTERN(void) jinit_forward_dct(j_compress_ptr cinfo);
1593-EXTERN(void) jinit_huff_encoder(j_compress_ptr cinfo);
1594-EXTERN(void) jinit_phuff_encoder(j_compress_ptr cinfo);
1595-EXTERN(void) jinit_arith_encoder(j_compress_ptr cinfo);
1596-EXTERN(void) jinit_marker_writer(j_compress_ptr cinfo);
1597+HIDE(EXTERN)(void) jinit_color_converter(j_compress_ptr cinfo);
1598+HIDE(EXTERN)(void) jinit_downsampler(j_compress_ptr cinfo);
1599+HIDE(EXTERN)(void) jinit_forward_dct(j_compress_ptr cinfo);
1600+HIDE(EXTERN)(void) jinit_huff_encoder(j_compress_ptr cinfo);
1601+HIDE(EXTERN)(void) jinit_phuff_encoder(j_compress_ptr cinfo);
1602+HIDE(EXTERN)(void) jinit_arith_encoder(j_compress_ptr cinfo);
1603+HIDE(EXTERN)(void) jinit_marker_writer(j_compress_ptr cinfo);
1604 /* Decompression module initialization routines */
1605-EXTERN(void) jinit_master_decompress(j_decompress_ptr cinfo);
1606-EXTERN(void) jinit_d_main_controller(j_decompress_ptr cinfo,
1607+HIDE(EXTERN)(void) jinit_master_decompress(j_decompress_ptr cinfo);
1608+HIDE(EXTERN)(void) jinit_d_main_controller(j_decompress_ptr cinfo,
1609                                      boolean need_full_buffer);
1610-EXTERN(void) jinit_d_coef_controller(j_decompress_ptr cinfo,
1611+HIDE(EXTERN)(void) jinit_d_coef_controller(j_decompress_ptr cinfo,
1612                                      boolean need_full_buffer);
1613-EXTERN(void) jinit_d_post_controller(j_decompress_ptr cinfo,
1614+HIDE(EXTERN)(void) jinit_d_post_controller(j_decompress_ptr cinfo,
1615                                      boolean need_full_buffer);
1616-EXTERN(void) jinit_input_controller(j_decompress_ptr cinfo);
1617-EXTERN(void) jinit_marker_reader(j_decompress_ptr cinfo);
1618-EXTERN(void) jinit_huff_decoder(j_decompress_ptr cinfo);
1619-EXTERN(void) jinit_phuff_decoder(j_decompress_ptr cinfo);
1620-EXTERN(void) jinit_arith_decoder(j_decompress_ptr cinfo);
1621-EXTERN(void) jinit_inverse_dct(j_decompress_ptr cinfo);
1622-EXTERN(void) jinit_upsampler(j_decompress_ptr cinfo);
1623-EXTERN(void) jinit_color_deconverter(j_decompress_ptr cinfo);
1624-EXTERN(void) jinit_1pass_quantizer(j_decompress_ptr cinfo);
1625-EXTERN(void) jinit_2pass_quantizer(j_decompress_ptr cinfo);
1626-EXTERN(void) jinit_merged_upsampler(j_decompress_ptr cinfo);
1627+HIDE(EXTERN)(void) jinit_input_controller(j_decompress_ptr cinfo);
1628+HIDE(EXTERN)(void) jinit_marker_reader(j_decompress_ptr cinfo);
1629+HIDE(EXTERN)(void) jinit_huff_decoder(j_decompress_ptr cinfo);
1630+HIDE(EXTERN)(void) jinit_phuff_decoder(j_decompress_ptr cinfo);
1631+HIDE(EXTERN)(void) jinit_arith_decoder(j_decompress_ptr cinfo);
1632+HIDE(EXTERN)(void) jinit_inverse_dct(j_decompress_ptr cinfo);
1633+HIDE(EXTERN)(void) jinit_upsampler(j_decompress_ptr cinfo);
1634+HIDE(EXTERN)(void) jinit_color_deconverter(j_decompress_ptr cinfo);
1635+HIDE(EXTERN)(void) jinit_1pass_quantizer(j_decompress_ptr cinfo);
1636+HIDE(EXTERN)(void) jinit_2pass_quantizer(j_decompress_ptr cinfo);
1637+HIDE(EXTERN)(void) jinit_merged_upsampler(j_decompress_ptr cinfo);
1638 /* Memory manager initialization */
1639-EXTERN(void) jinit_memory_mgr(j_common_ptr cinfo);
1640+HIDE(EXTERN)(void) jinit_memory_mgr(j_common_ptr cinfo);
1641 
1642 /* Utility routines in jutils.c */
1643-EXTERN(long) jdiv_round_up(long a, long b);
1644-EXTERN(long) jround_up(long a, long b);
1645-EXTERN(void) jcopy_sample_rows(JSAMPARRAY input_array, int source_row,
1646+HIDE(EXTERN)(long) jdiv_round_up(long a, long b);
1647+HIDE(EXTERN)(long) jround_up(long a, long b);
1648+HIDE(EXTERN)(void) jcopy_sample_rows(JSAMPARRAY input_array, int source_row,
1649                                JSAMPARRAY output_array, int dest_row,
1650                                int num_rows, JDIMENSION num_cols);
1651-EXTERN(void) jcopy_block_row(JBLOCKROW input_row, JBLOCKROW output_row,
1652+HIDE(EXTERN)(void) jcopy_block_row(JBLOCKROW input_row, JBLOCKROW output_row,
1653                              JDIMENSION num_blocks);
1654-EXTERN(void) jzero_far(void *target, size_t bytestozero);
1655+HIDE(EXTERN)(void) jzero_far(void *target, size_t bytestozero);
1656 /* Constant tables in jutils.c */
1657 #if 0                           /* This table is not actually needed in v6a */
1658 extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
1659diff --git a/jsimd.h b/jsimd.h
1660index f3a87ee..ea094f6 100644
1661--- a/jsimd.h
1662+++ b/jsimd.h
1663@@ -14,110 +14,110 @@
1664 
1665 #include "jchuff.h"             /* Declarations shared with jcphuff.c */
1666 
1667-EXTERN(int) jsimd_can_rgb_ycc(void);
1668-EXTERN(int) jsimd_can_rgb_gray(void);
1669-EXTERN(int) jsimd_can_ycc_rgb(void);
1670-EXTERN(int) jsimd_can_ycc_rgb565(void);
1671-EXTERN(int) jsimd_c_can_null_convert(void);
1672+HIDE(EXTERN)(int) jsimd_can_rgb_ycc(void);
1673+HIDE(EXTERN)(int) jsimd_can_rgb_gray(void);
1674+HIDE(EXTERN)(int) jsimd_can_ycc_rgb(void);
1675+HIDE(EXTERN)(int) jsimd_can_ycc_rgb565(void);
1676+HIDE(EXTERN)(int) jsimd_c_can_null_convert(void);
1677 
1678-EXTERN(void) jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
1679+HIDE(EXTERN)(void) jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
1680                                    JSAMPIMAGE output_buf,
1681                                    JDIMENSION output_row, int num_rows);
1682-EXTERN(void) jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
1683+HIDE(EXTERN)(void) jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
1684                                     JSAMPIMAGE output_buf,
1685                                     JDIMENSION output_row, int num_rows);
1686-EXTERN(void) jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,
1687+HIDE(EXTERN)(void) jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,
1688                                    JSAMPIMAGE input_buf, JDIMENSION input_row,
1689                                    JSAMPARRAY output_buf, int num_rows);
1690-EXTERN(void) jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,
1691+HIDE(EXTERN)(void) jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,
1692                                       JSAMPIMAGE input_buf,
1693                                       JDIMENSION input_row,
1694                                       JSAMPARRAY output_buf, int num_rows);
1695-EXTERN(void) jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
1696+HIDE(EXTERN)(void) jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
1697                                   JSAMPIMAGE output_buf, JDIMENSION output_row,
1698                                   int num_rows);
1699 
1700-EXTERN(int) jsimd_can_h2v2_downsample(void);
1701-EXTERN(int) jsimd_can_h2v1_downsample(void);
1702+HIDE(EXTERN)(int) jsimd_can_h2v2_downsample(void);
1703+HIDE(EXTERN)(int) jsimd_can_h2v1_downsample(void);
1704 
1705-EXTERN(void) jsimd_h2v2_downsample(j_compress_ptr cinfo,
1706+HIDE(EXTERN)(void) jsimd_h2v2_downsample(j_compress_ptr cinfo,
1707                                    jpeg_component_info *compptr,
1708                                    JSAMPARRAY input_data,
1709                                    JSAMPARRAY output_data);
1710 
1711-EXTERN(int) jsimd_can_h2v2_smooth_downsample(void);
1712+HIDE(EXTERN)(int) jsimd_can_h2v2_smooth_downsample(void);
1713 
1714-EXTERN(void) jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo,
1715+HIDE(EXTERN)(void) jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo,
1716                                           jpeg_component_info *compptr,
1717                                           JSAMPARRAY input_data,
1718                                           JSAMPARRAY output_data);
1719 
1720-EXTERN(void) jsimd_h2v1_downsample(j_compress_ptr cinfo,
1721+HIDE(EXTERN)(void) jsimd_h2v1_downsample(j_compress_ptr cinfo,
1722                                    jpeg_component_info *compptr,
1723                                    JSAMPARRAY input_data,
1724                                    JSAMPARRAY output_data);
1725 
1726-EXTERN(int) jsimd_can_h2v2_upsample(void);
1727-EXTERN(int) jsimd_can_h2v1_upsample(void);
1728-EXTERN(int) jsimd_can_int_upsample(void);
1729+HIDE(EXTERN)(int) jsimd_can_h2v2_upsample(void);
1730+HIDE(EXTERN)(int) jsimd_can_h2v1_upsample(void);
1731+HIDE(EXTERN)(int) jsimd_can_int_upsample(void);
1732 
1733-EXTERN(void) jsimd_h2v2_upsample(j_decompress_ptr cinfo,
1734+HIDE(EXTERN)(void) jsimd_h2v2_upsample(j_decompress_ptr cinfo,
1735                                  jpeg_component_info *compptr,
1736                                  JSAMPARRAY input_data,
1737                                  JSAMPARRAY *output_data_ptr);
1738-EXTERN(void) jsimd_h2v1_upsample(j_decompress_ptr cinfo,
1739+HIDE(EXTERN)(void) jsimd_h2v1_upsample(j_decompress_ptr cinfo,
1740                                  jpeg_component_info *compptr,
1741                                  JSAMPARRAY input_data,
1742                                  JSAMPARRAY *output_data_ptr);
1743-EXTERN(void) jsimd_int_upsample(j_decompress_ptr cinfo,
1744+HIDE(EXTERN)(void) jsimd_int_upsample(j_decompress_ptr cinfo,
1745                                 jpeg_component_info *compptr,
1746                                 JSAMPARRAY input_data,
1747                                 JSAMPARRAY *output_data_ptr);
1748 
1749-EXTERN(int) jsimd_can_h2v2_fancy_upsample(void);
1750-EXTERN(int) jsimd_can_h2v1_fancy_upsample(void);
1751-EXTERN(int) jsimd_can_h1v2_fancy_upsample(void);
1752+HIDE(EXTERN)(int) jsimd_can_h2v2_fancy_upsample(void);
1753+HIDE(EXTERN)(int) jsimd_can_h2v1_fancy_upsample(void);
1754+HIDE(EXTERN)(int) jsimd_can_h1v2_fancy_upsample(void);
1755 
1756-EXTERN(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,
1757+HIDE(EXTERN)(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,
1758                                        jpeg_component_info *compptr,
1759                                        JSAMPARRAY input_data,
1760                                        JSAMPARRAY *output_data_ptr);
1761-EXTERN(void) jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,
1762+HIDE(EXTERN)(void) jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,
1763                                        jpeg_component_info *compptr,
1764                                        JSAMPARRAY input_data,
1765                                        JSAMPARRAY *output_data_ptr);
1766-EXTERN(void) jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,
1767+HIDE(EXTERN)(void) jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,
1768                                        jpeg_component_info *compptr,
1769                                        JSAMPARRAY input_data,
1770                                        JSAMPARRAY *output_data_ptr);
1771 
1772-EXTERN(int) jsimd_can_h2v2_merged_upsample(void);
1773-EXTERN(int) jsimd_can_h2v1_merged_upsample(void);
1774+HIDE(EXTERN)(int) jsimd_can_h2v2_merged_upsample(void);
1775+HIDE(EXTERN)(int) jsimd_can_h2v1_merged_upsample(void);
1776 
1777-EXTERN(void) jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,
1778+HIDE(EXTERN)(void) jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,
1779                                         JSAMPIMAGE input_buf,
1780                                         JDIMENSION in_row_group_ctr,
1781                                         JSAMPARRAY output_buf);
1782-EXTERN(void) jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,
1783+HIDE(EXTERN)(void) jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,
1784                                         JSAMPIMAGE input_buf,
1785                                         JDIMENSION in_row_group_ctr,
1786                                         JSAMPARRAY output_buf);
1787 
1788-EXTERN(int) jsimd_can_huff_encode_one_block(void);
1789+HIDE(EXTERN)(int) jsimd_can_huff_encode_one_block(void);
1790 
1791-EXTERN(JOCTET *) jsimd_huff_encode_one_block(void *state, JOCTET *buffer,
1792+HIDE(EXTERN)(JOCTET *) jsimd_huff_encode_one_block(void *state, JOCTET *buffer,
1793                                              JCOEFPTR block, int last_dc_val,
1794                                              c_derived_tbl *dctbl,
1795                                              c_derived_tbl *actbl);
1796 
1797-EXTERN(int) jsimd_can_encode_mcu_AC_first_prepare(void);
1798+HIDE(EXTERN)(int) jsimd_can_encode_mcu_AC_first_prepare(void);
1799 
1800-EXTERN(void) jsimd_encode_mcu_AC_first_prepare
1801+HIDE(EXTERN)(void) jsimd_encode_mcu_AC_first_prepare
1802   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
1803    UJCOEF *values, size_t *zerobits);
1804 
1805-EXTERN(int) jsimd_can_encode_mcu_AC_refine_prepare(void);
1806+HIDE(EXTERN)(int) jsimd_can_encode_mcu_AC_refine_prepare(void);
1807 
1808-EXTERN(int) jsimd_encode_mcu_AC_refine_prepare
1809+HIDE(EXTERN)(int) jsimd_encode_mcu_AC_refine_prepare
1810   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
1811    UJCOEF *absvalues, size_t *bits);
1812diff --git a/jsimddct.h b/jsimddct.h
1813index 55ee8cf..c19fe41 100644
1814--- a/jsimddct.h
1815+++ b/jsimddct.h
1816@@ -9,62 +9,62 @@
1817  *
1818  */
1819 
1820-EXTERN(int) jsimd_can_convsamp(void);
1821-EXTERN(int) jsimd_can_convsamp_float(void);
1822+HIDE(EXTERN)(int) jsimd_can_convsamp(void);
1823+HIDE(EXTERN)(int) jsimd_can_convsamp_float(void);
1824 
1825-EXTERN(void) jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
1826+HIDE(EXTERN)(void) jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
1827                             DCTELEM *workspace);
1828-EXTERN(void) jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
1829+HIDE(EXTERN)(void) jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
1830                                   FAST_FLOAT *workspace);
1831 
1832-EXTERN(int) jsimd_can_fdct_islow(void);
1833-EXTERN(int) jsimd_can_fdct_ifast(void);
1834-EXTERN(int) jsimd_can_fdct_float(void);
1835+HIDE(EXTERN)(int) jsimd_can_fdct_islow(void);
1836+HIDE(EXTERN)(int) jsimd_can_fdct_ifast(void);
1837+HIDE(EXTERN)(int) jsimd_can_fdct_float(void);
1838 
1839-EXTERN(void) jsimd_fdct_islow(DCTELEM *data);
1840-EXTERN(void) jsimd_fdct_ifast(DCTELEM *data);
1841-EXTERN(void) jsimd_fdct_float(FAST_FLOAT *data);
1842+HIDE(EXTERN)(void) jsimd_fdct_islow(DCTELEM *data);
1843+HIDE(EXTERN)(void) jsimd_fdct_ifast(DCTELEM *data);
1844+HIDE(EXTERN)(void) jsimd_fdct_float(FAST_FLOAT *data);
1845 
1846-EXTERN(int) jsimd_can_quantize(void);
1847-EXTERN(int) jsimd_can_quantize_float(void);
1848+HIDE(EXTERN)(int) jsimd_can_quantize(void);
1849+HIDE(EXTERN)(int) jsimd_can_quantize_float(void);
1850 
1851-EXTERN(void) jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors,
1852+HIDE(EXTERN)(void) jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors,
1853                             DCTELEM *workspace);
1854-EXTERN(void) jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
1855+HIDE(EXTERN)(void) jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
1856                                   FAST_FLOAT *workspace);
1857 
1858-EXTERN(int) jsimd_can_idct_2x2(void);
1859-EXTERN(int) jsimd_can_idct_4x4(void);
1860-EXTERN(int) jsimd_can_idct_6x6(void);
1861-EXTERN(int) jsimd_can_idct_12x12(void);
1862+HIDE(EXTERN)(int) jsimd_can_idct_2x2(void);
1863+HIDE(EXTERN)(int) jsimd_can_idct_4x4(void);
1864+HIDE(EXTERN)(int) jsimd_can_idct_6x6(void);
1865+HIDE(EXTERN)(int) jsimd_can_idct_12x12(void);
1866 
1867-EXTERN(void) jsimd_idct_2x2(j_decompress_ptr cinfo,
1868+HIDE(EXTERN)(void) jsimd_idct_2x2(j_decompress_ptr cinfo,
1869                             jpeg_component_info *compptr, JCOEFPTR coef_block,
1870                             JSAMPARRAY output_buf, JDIMENSION output_col);
1871-EXTERN(void) jsimd_idct_4x4(j_decompress_ptr cinfo,
1872+HIDE(EXTERN)(void) jsimd_idct_4x4(j_decompress_ptr cinfo,
1873                             jpeg_component_info *compptr, JCOEFPTR coef_block,
1874                             JSAMPARRAY output_buf, JDIMENSION output_col);
1875-EXTERN(void) jsimd_idct_6x6(j_decompress_ptr cinfo,
1876+HIDE(EXTERN)(void) jsimd_idct_6x6(j_decompress_ptr cinfo,
1877                             jpeg_component_info *compptr, JCOEFPTR coef_block,
1878                             JSAMPARRAY output_buf, JDIMENSION output_col);
1879-EXTERN(void) jsimd_idct_12x12(j_decompress_ptr cinfo,
1880+HIDE(EXTERN)(void) jsimd_idct_12x12(j_decompress_ptr cinfo,
1881                               jpeg_component_info *compptr,
1882                               JCOEFPTR coef_block, JSAMPARRAY output_buf,
1883                               JDIMENSION output_col);
1884 
1885-EXTERN(int) jsimd_can_idct_islow(void);
1886-EXTERN(int) jsimd_can_idct_ifast(void);
1887-EXTERN(int) jsimd_can_idct_float(void);
1888+HIDE(EXTERN)(int) jsimd_can_idct_islow(void);
1889+HIDE(EXTERN)(int) jsimd_can_idct_ifast(void);
1890+HIDE(EXTERN)(int) jsimd_can_idct_float(void);
1891 
1892-EXTERN(void) jsimd_idct_islow(j_decompress_ptr cinfo,
1893+HIDE(EXTERN)(void) jsimd_idct_islow(j_decompress_ptr cinfo,
1894                               jpeg_component_info *compptr,
1895                               JCOEFPTR coef_block, JSAMPARRAY output_buf,
1896                               JDIMENSION output_col);
1897-EXTERN(void) jsimd_idct_ifast(j_decompress_ptr cinfo,
1898+HIDE(EXTERN)(void) jsimd_idct_ifast(j_decompress_ptr cinfo,
1899                               jpeg_component_info *compptr,
1900                               JCOEFPTR coef_block, JSAMPARRAY output_buf,
1901                               JDIMENSION output_col);
1902-EXTERN(void) jsimd_idct_float(j_decompress_ptr cinfo,
1903+HIDE(EXTERN)(void) jsimd_idct_float(j_decompress_ptr cinfo,
1904                               jpeg_component_info *compptr,
1905                               JCOEFPTR coef_block, JSAMPARRAY output_buf,
1906                               JDIMENSION output_col);
1907diff --git a/jutils.c b/jutils.c
1908index 5c5bb17..cd14e74 100644
1909--- a/jutils.c
1910+++ b/jutils.c
1911@@ -53,6 +53,27 @@ const int jpeg_zigzag_order[DCTSIZE2] = {
1912  * fake entries.
1913  */
1914 
1915+#ifdef HUFF_DECODE_OPT
1916+// OH ISSUE: jpeg optimize
1917+const int jpeg_natural_order[DCTSIZE2 + 64] = {
1918+  0,  1,  8, 16,  9,  2,  3, 10,
1919+ 17, 24, 32, 25, 18, 11,  4,  5,
1920+ 12, 19, 26, 33, 40, 48, 41, 34,
1921+ 27, 20, 13,  6,  7, 14, 21, 28,
1922+ 35, 42, 49, 56, 57, 50, 43, 36,
1923+ 29, 22, 15, 23, 30, 37, 44, 51,
1924+ 58, 59, 52, 45, 38, 31, 39, 46,
1925+ 53, 60, 61, 54, 47, 55, 62, 63,
1926+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
1927+ 63, 63, 63, 63, 63, 63, 63, 63,
1928+ 63, 63, 63, 63, 63, 63, 63, 63,
1929+ 63, 63, 63, 63, 63, 63, 63, 63,
1930+ 63, 63, 63, 63, 63, 63, 63, 63,
1931+ 63, 63, 63, 63, 63, 63, 63, 63,
1932+ 63, 63, 63, 63, 63, 63, 63, 63,
1933+ 63, 63, 63, 63, 63, 63, 63, 63
1934+};
1935+#else
1936 const int jpeg_natural_order[DCTSIZE2 + 16] = {
1937   0,  1,  8, 16,  9,  2,  3, 10,
1938  17, 24, 32, 25, 18, 11,  4,  5,
1939@@ -65,6 +86,7 @@ const int jpeg_natural_order[DCTSIZE2 + 16] = {
1940  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
1941  63, 63, 63, 63, 63, 63, 63, 63
1942 };
1943+#endif
1944 
1945 
1946 /*
1947diff --git a/simd/jsimd.h b/simd/jsimd.h
1948index abebbf4..1d45a56 100644
1949--- a/simd/jsimd.h
1950+++ b/simd/jsimd.h
1951@@ -102,34 +102,34 @@ EXTERN(void) jsimd_extxrgb_ycc_convert_avx2
1952   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
1953    JDIMENSION output_row, int num_rows);
1954 
1955-EXTERN(void) jsimd_rgb_ycc_convert_neon
1956+HIDE(EXTERN)(void) jsimd_rgb_ycc_convert_neon
1957   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
1958    JDIMENSION output_row, int num_rows);
1959-EXTERN(void) jsimd_extrgb_ycc_convert_neon
1960+HIDE(EXTERN)(void) jsimd_extrgb_ycc_convert_neon
1961   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
1962    JDIMENSION output_row, int num_rows);
1963-EXTERN(void) jsimd_extrgbx_ycc_convert_neon
1964+HIDE(EXTERN)(void) jsimd_extrgbx_ycc_convert_neon
1965   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
1966    JDIMENSION output_row, int num_rows);
1967-EXTERN(void) jsimd_extbgr_ycc_convert_neon
1968+HIDE(EXTERN)(void) jsimd_extbgr_ycc_convert_neon
1969   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
1970    JDIMENSION output_row, int num_rows);
1971-EXTERN(void) jsimd_extbgrx_ycc_convert_neon
1972+HIDE(EXTERN)(void) jsimd_extbgrx_ycc_convert_neon
1973   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
1974    JDIMENSION output_row, int num_rows);
1975-EXTERN(void) jsimd_extxbgr_ycc_convert_neon
1976+HIDE(EXTERN)(void) jsimd_extxbgr_ycc_convert_neon
1977   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
1978    JDIMENSION output_row, int num_rows);
1979-EXTERN(void) jsimd_extxrgb_ycc_convert_neon
1980+HIDE(EXTERN)(void) jsimd_extxrgb_ycc_convert_neon
1981   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
1982    JDIMENSION output_row, int num_rows);
1983 
1984 #ifndef NEON_INTRINSICS
1985 
1986-EXTERN(void) jsimd_extrgb_ycc_convert_neon_slowld3
1987+HIDE(EXTERN)(void) jsimd_extrgb_ycc_convert_neon_slowld3
1988   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
1989    JDIMENSION output_row, int num_rows);
1990-EXTERN(void) jsimd_extbgr_ycc_convert_neon_slowld3
1991+HIDE(EXTERN)(void) jsimd_extbgr_ycc_convert_neon_slowld3
1992   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
1993    JDIMENSION output_row, int num_rows);
1994 
1995@@ -270,25 +270,25 @@ EXTERN(void) jsimd_extxrgb_gray_convert_avx2
1996   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
1997    JDIMENSION output_row, int num_rows);
1998 
1999-EXTERN(void) jsimd_rgb_gray_convert_neon
2000+HIDE(EXTERN)(void) jsimd_rgb_gray_convert_neon
2001   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
2002    JDIMENSION output_row, int num_rows);
2003-EXTERN(void) jsimd_extrgb_gray_convert_neon
2004+HIDE(EXTERN)(void) jsimd_extrgb_gray_convert_neon
2005   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
2006    JDIMENSION output_row, int num_rows);
2007-EXTERN(void) jsimd_extrgbx_gray_convert_neon
2008+HIDE(EXTERN)(void) jsimd_extrgbx_gray_convert_neon
2009   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
2010    JDIMENSION output_row, int num_rows);
2011-EXTERN(void) jsimd_extbgr_gray_convert_neon
2012+HIDE(EXTERN)(void) jsimd_extbgr_gray_convert_neon
2013   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
2014    JDIMENSION output_row, int num_rows);
2015-EXTERN(void) jsimd_extbgrx_gray_convert_neon
2016+HIDE(EXTERN)(void) jsimd_extbgrx_gray_convert_neon
2017   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
2018    JDIMENSION output_row, int num_rows);
2019-EXTERN(void) jsimd_extxbgr_gray_convert_neon
2020+HIDE(EXTERN)(void) jsimd_extxbgr_gray_convert_neon
2021   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
2022    JDIMENSION output_row, int num_rows);
2023-EXTERN(void) jsimd_extxrgb_gray_convert_neon
2024+HIDE(EXTERN)(void) jsimd_extxrgb_gray_convert_neon
2025   (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
2026    JDIMENSION output_row, int num_rows);
2027 
2028@@ -427,37 +427,37 @@ EXTERN(void) jsimd_ycc_extxrgb_convert_avx2
2029   (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
2030    JSAMPARRAY output_buf, int num_rows);
2031 
2032-EXTERN(void) jsimd_ycc_rgb_convert_neon
2033+HIDE(EXTERN)(void) jsimd_ycc_rgb_convert_neon
2034   (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
2035    JSAMPARRAY output_buf, int num_rows);
2036-EXTERN(void) jsimd_ycc_extrgb_convert_neon
2037+HIDE(EXTERN)(void) jsimd_ycc_extrgb_convert_neon
2038   (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
2039    JSAMPARRAY output_buf, int num_rows);
2040-EXTERN(void) jsimd_ycc_extrgbx_convert_neon
2041+HIDE(EXTERN)(void) jsimd_ycc_extrgbx_convert_neon
2042   (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
2043    JSAMPARRAY output_buf, int num_rows);
2044-EXTERN(void) jsimd_ycc_extbgr_convert_neon
2045+HIDE(EXTERN)(void) jsimd_ycc_extbgr_convert_neon
2046   (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
2047    JSAMPARRAY output_buf, int num_rows);
2048-EXTERN(void) jsimd_ycc_extbgrx_convert_neon
2049+HIDE(EXTERN)(void) jsimd_ycc_extbgrx_convert_neon
2050   (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
2051    JSAMPARRAY output_buf, int num_rows);
2052-EXTERN(void) jsimd_ycc_extxbgr_convert_neon
2053+HIDE(EXTERN)(void) jsimd_ycc_extxbgr_convert_neon
2054   (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
2055    JSAMPARRAY output_buf, int num_rows);
2056-EXTERN(void) jsimd_ycc_extxrgb_convert_neon
2057+HIDE(EXTERN)(void) jsimd_ycc_extxrgb_convert_neon
2058   (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
2059    JSAMPARRAY output_buf, int num_rows);
2060-EXTERN(void) jsimd_ycc_rgb565_convert_neon
2061+HIDE(EXTERN)(void) jsimd_ycc_rgb565_convert_neon
2062   (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
2063    JSAMPARRAY output_buf, int num_rows);
2064 
2065 #ifndef NEON_INTRINSICS
2066 
2067-EXTERN(void) jsimd_ycc_extrgb_convert_neon_slowst3
2068+HIDE(EXTERN)(void) jsimd_ycc_extrgb_convert_neon_slowst3
2069   (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
2070    JSAMPARRAY output_buf, int num_rows);
2071-EXTERN(void) jsimd_ycc_extbgr_convert_neon_slowst3
2072+HIDE(EXTERN)(void) jsimd_ycc_extbgr_convert_neon_slowst3
2073   (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
2074    JSAMPARRAY output_buf, int num_rows);
2075 
2076@@ -547,7 +547,7 @@ EXTERN(void) jsimd_h2v1_downsample_avx2
2077   (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor,
2078    JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data);
2079 
2080-EXTERN(void) jsimd_h2v1_downsample_neon
2081+HIDE(EXTERN)(void) jsimd_h2v1_downsample_neon
2082   (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor,
2083    JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data);
2084 
2085@@ -572,7 +572,7 @@ EXTERN(void) jsimd_h2v2_downsample_avx2
2086   (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor,
2087    JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data);
2088 
2089-EXTERN(void) jsimd_h2v2_downsample_neon
2090+HIDE(EXTERN)(void) jsimd_h2v2_downsample_neon
2091   (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor,
2092    JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data);
2093 
2094@@ -617,10 +617,10 @@ EXTERN(void) jsimd_h2v2_upsample_avx2
2095   (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
2096    JSAMPARRAY *output_data_ptr);
2097 
2098-EXTERN(void) jsimd_h2v1_upsample_neon
2099+HIDE(EXTERN)(void) jsimd_h2v1_upsample_neon
2100   (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
2101    JSAMPARRAY *output_data_ptr);
2102-EXTERN(void) jsimd_h2v2_upsample_neon
2103+HIDE(EXTERN)(void) jsimd_h2v2_upsample_neon
2104   (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
2105    JSAMPARRAY *output_data_ptr);
2106 
2107@@ -667,13 +667,13 @@ EXTERN(void) jsimd_h2v2_fancy_upsample_avx2
2108   (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data,
2109    JSAMPARRAY *output_data_ptr);
2110 
2111-EXTERN(void) jsimd_h2v1_fancy_upsample_neon
2112+HIDE(EXTERN)(void) jsimd_h2v1_fancy_upsample_neon
2113   (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data,
2114    JSAMPARRAY *output_data_ptr);
2115-EXTERN(void) jsimd_h2v2_fancy_upsample_neon
2116+HIDE(EXTERN)(void) jsimd_h2v2_fancy_upsample_neon
2117   (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data,
2118    JSAMPARRAY *output_data_ptr);
2119-EXTERN(void) jsimd_h1v2_fancy_upsample_neon
2120+HIDE(EXTERN)(void) jsimd_h1v2_fancy_upsample_neon
2121   (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data,
2122    JSAMPARRAY *output_data_ptr);
2123 
2124@@ -833,47 +833,47 @@ EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_avx2
2125   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2126    JSAMPARRAY output_buf);
2127 
2128-EXTERN(void) jsimd_h2v1_merged_upsample_neon
2129+HIDE(EXTERN)(void) jsimd_h2v1_merged_upsample_neon
2130   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2131    JSAMPARRAY output_buf);
2132-EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_neon
2133+HIDE(EXTERN)(void) jsimd_h2v1_extrgb_merged_upsample_neon
2134   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2135    JSAMPARRAY output_buf);
2136-EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_neon
2137+HIDE(EXTERN)(void) jsimd_h2v1_extrgbx_merged_upsample_neon
2138   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2139    JSAMPARRAY output_buf);
2140-EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_neon
2141+HIDE(EXTERN)(void) jsimd_h2v1_extbgr_merged_upsample_neon
2142   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2143    JSAMPARRAY output_buf);
2144-EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_neon
2145+HIDE(EXTERN)(void) jsimd_h2v1_extbgrx_merged_upsample_neon
2146   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2147    JSAMPARRAY output_buf);
2148-EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_neon
2149+HIDE(EXTERN)(void) jsimd_h2v1_extxbgr_merged_upsample_neon
2150   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2151    JSAMPARRAY output_buf);
2152-EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_neon
2153+HIDE(EXTERN)(void) jsimd_h2v1_extxrgb_merged_upsample_neon
2154   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2155    JSAMPARRAY output_buf);
2156 
2157-EXTERN(void) jsimd_h2v2_merged_upsample_neon
2158+HIDE(EXTERN)(void) jsimd_h2v2_merged_upsample_neon
2159   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2160    JSAMPARRAY output_buf);
2161-EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_neon
2162+HIDE(EXTERN)(void) jsimd_h2v2_extrgb_merged_upsample_neon
2163   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2164    JSAMPARRAY output_buf);
2165-EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_neon
2166+HIDE(EXTERN)(void) jsimd_h2v2_extrgbx_merged_upsample_neon
2167   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2168    JSAMPARRAY output_buf);
2169-EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_neon
2170+HIDE(EXTERN)(void) jsimd_h2v2_extbgr_merged_upsample_neon
2171   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2172    JSAMPARRAY output_buf);
2173-EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_neon
2174+HIDE(EXTERN)(void) jsimd_h2v2_extbgrx_merged_upsample_neon
2175   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2176    JSAMPARRAY output_buf);
2177-EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_neon
2178+HIDE(EXTERN)(void) jsimd_h2v2_extxbgr_merged_upsample_neon
2179   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2180    JSAMPARRAY output_buf);
2181-EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_neon
2182+HIDE(EXTERN)(void) jsimd_h2v2_extxrgb_merged_upsample_neon
2183   (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
2184    JSAMPARRAY output_buf);
2185 
2186@@ -1019,7 +1019,7 @@ EXTERN(void) jsimd_convsamp_sse2
2187 EXTERN(void) jsimd_convsamp_avx2
2188   (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace);
2189 
2190-EXTERN(void) jsimd_convsamp_neon
2191+HIDE(EXTERN)(void) jsimd_convsamp_neon
2192   (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace);
2193 
2194 EXTERN(void) jsimd_convsamp_dspr2
2195@@ -1050,7 +1050,7 @@ EXTERN(void) jsimd_fdct_islow_sse2(DCTELEM *data);
2196 extern const int jconst_fdct_islow_avx2[];
2197 EXTERN(void) jsimd_fdct_islow_avx2(DCTELEM *data);
2198 
2199-EXTERN(void) jsimd_fdct_islow_neon(DCTELEM *data);
2200+HIDE(EXTERN)(void) jsimd_fdct_islow_neon(DCTELEM *data);
2201 
2202 EXTERN(void) jsimd_fdct_islow_dspr2(DCTELEM *data);
2203 
2204@@ -1064,7 +1064,7 @@ EXTERN(void) jsimd_fdct_ifast_mmx(DCTELEM *data);
2205 extern const int jconst_fdct_ifast_sse2[];
2206 EXTERN(void) jsimd_fdct_ifast_sse2(DCTELEM *data);
2207 
2208-EXTERN(void) jsimd_fdct_ifast_neon(DCTELEM *data);
2209+HIDE(EXTERN)(void) jsimd_fdct_ifast_neon(DCTELEM *data);
2210 
2211 EXTERN(void) jsimd_fdct_ifast_dspr2(DCTELEM *data);
2212 
2213@@ -1088,7 +1088,7 @@ EXTERN(void) jsimd_quantize_sse2
2214 EXTERN(void) jsimd_quantize_avx2
2215   (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace);
2216 
2217-EXTERN(void) jsimd_quantize_neon
2218+HIDE(EXTERN)(void) jsimd_quantize_neon
2219   (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace);
2220 
2221 EXTERN(void) jsimd_quantize_dspr2
2222@@ -1129,10 +1129,10 @@ EXTERN(void) jsimd_idct_4x4_sse2
2223   (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
2224    JDIMENSION output_col);
2225 
2226-EXTERN(void) jsimd_idct_2x2_neon
2227+HIDE(EXTERN)(void) jsimd_idct_2x2_neon
2228   (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
2229    JDIMENSION output_col);
2230-EXTERN(void) jsimd_idct_4x4_neon
2231+HIDE(EXTERN)(void) jsimd_idct_4x4_neon
2232   (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
2233    JDIMENSION output_col);
2234 
2235@@ -1165,7 +1165,7 @@ EXTERN(void) jsimd_idct_islow_avx2
2236   (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
2237    JDIMENSION output_col);
2238 
2239-EXTERN(void) jsimd_idct_islow_neon
2240+HIDE(EXTERN)(void) jsimd_idct_islow_neon
2241   (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
2242    JDIMENSION output_col);
2243 
2244@@ -1190,7 +1190,7 @@ EXTERN(void) jsimd_idct_ifast_sse2
2245   (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
2246    JDIMENSION output_col);
2247 
2248-EXTERN(void) jsimd_idct_ifast_neon
2249+HIDE(EXTERN)(void) jsimd_idct_ifast_neon
2250   (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
2251    JDIMENSION output_col);
2252 
2253@@ -1230,13 +1230,13 @@ EXTERN(JOCTET *) jsimd_huff_encode_one_block_sse2
2254   (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val,
2255    c_derived_tbl *dctbl, c_derived_tbl *actbl);
2256 
2257-EXTERN(JOCTET *) jsimd_huff_encode_one_block_neon
2258+HIDE(EXTERN)(JOCTET *) jsimd_huff_encode_one_block_neon
2259   (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val,
2260    c_derived_tbl *dctbl, c_derived_tbl *actbl);
2261 
2262 #ifndef NEON_INTRINSICS
2263 
2264-EXTERN(JOCTET *) jsimd_huff_encode_one_block_neon_slowtbl
2265+HIDE(EXTERN)(JOCTET *) jsimd_huff_encode_one_block_neon_slowtbl
2266   (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val,
2267    c_derived_tbl *dctbl, c_derived_tbl *actbl);
2268 
2269@@ -1247,7 +1247,7 @@ EXTERN(void) jsimd_encode_mcu_AC_first_prepare_sse2
2270   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
2271    UJCOEF *values, size_t *zerobits);
2272 
2273-EXTERN(void) jsimd_encode_mcu_AC_first_prepare_neon
2274+HIDE(EXTERN)(void) jsimd_encode_mcu_AC_first_prepare_neon
2275   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
2276    UJCOEF *values, size_t *zerobits);
2277 
2278@@ -1255,6 +1255,6 @@ EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_sse2
2279   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
2280    UJCOEF *absvalues, size_t *bits);
2281 
2282-EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_neon
2283+HIDE(EXTERN)(int) jsimd_encode_mcu_AC_refine_prepare_neon
2284   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
2285    UJCOEF *absvalues, size_t *bits);
2286