1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * This file is part of the Independent JPEG Group's software.
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * The authors make NO WARRANTY or representation, either express or implied,
5cabdff1aSopenharmony_ci * with respect to this software, its quality, accuracy, merchantability, or
6cabdff1aSopenharmony_ci * fitness for a particular purpose.  This software is provided "AS IS", and
7cabdff1aSopenharmony_ci * you, its user, assume the entire risk as to its quality and accuracy.
8cabdff1aSopenharmony_ci *
9cabdff1aSopenharmony_ci * This software is copyright (C) 1994-1996, Thomas G. Lane.
10cabdff1aSopenharmony_ci * All Rights Reserved except as specified below.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * Permission is hereby granted to use, copy, modify, and distribute this
13cabdff1aSopenharmony_ci * software (or portions thereof) for any purpose, without fee, subject to
14cabdff1aSopenharmony_ci * these conditions:
15cabdff1aSopenharmony_ci * (1) If any part of the source code for this software is distributed, then
16cabdff1aSopenharmony_ci * this README file must be included, with this copyright and no-warranty
17cabdff1aSopenharmony_ci * notice unaltered; and any additions, deletions, or changes to the original
18cabdff1aSopenharmony_ci * files must be clearly indicated in accompanying documentation.
19cabdff1aSopenharmony_ci * (2) If only executable code is distributed, then the accompanying
20cabdff1aSopenharmony_ci * documentation must state that "this software is based in part on the work
21cabdff1aSopenharmony_ci * of the Independent JPEG Group".
22cabdff1aSopenharmony_ci * (3) Permission for use of this software is granted only if the user accepts
23cabdff1aSopenharmony_ci * full responsibility for any undesirable consequences; the authors accept
24cabdff1aSopenharmony_ci * NO LIABILITY for damages of any kind.
25cabdff1aSopenharmony_ci *
26cabdff1aSopenharmony_ci * These conditions apply to any software derived from or based on the IJG
27cabdff1aSopenharmony_ci * code, not just to the unmodified library.  If you use our work, you ought
28cabdff1aSopenharmony_ci * to acknowledge us.
29cabdff1aSopenharmony_ci *
30cabdff1aSopenharmony_ci * Permission is NOT granted for the use of any IJG author's name or company
31cabdff1aSopenharmony_ci * name in advertising or publicity relating to this software or products
32cabdff1aSopenharmony_ci * derived from it.  This software may be referred to only as "the Independent
33cabdff1aSopenharmony_ci * JPEG Group's software".
34cabdff1aSopenharmony_ci *
35cabdff1aSopenharmony_ci * We specifically permit and encourage the use of this software as the basis
36cabdff1aSopenharmony_ci * of commercial products, provided that all warranty or liability claims are
37cabdff1aSopenharmony_ci * assumed by the product vendor.
38cabdff1aSopenharmony_ci *
39cabdff1aSopenharmony_ci * This file contains a fast, not so accurate integer implementation of the
40cabdff1aSopenharmony_ci * forward DCT (Discrete Cosine Transform).
41cabdff1aSopenharmony_ci *
42cabdff1aSopenharmony_ci * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
43cabdff1aSopenharmony_ci * on each column.  Direct algorithms are also available, but they are
44cabdff1aSopenharmony_ci * much more complex and seem not to be any faster when reduced to code.
45cabdff1aSopenharmony_ci *
46cabdff1aSopenharmony_ci * This implementation is based on Arai, Agui, and Nakajima's algorithm for
47cabdff1aSopenharmony_ci * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
48cabdff1aSopenharmony_ci * Japanese, but the algorithm is described in the Pennebaker & Mitchell
49cabdff1aSopenharmony_ci * JPEG textbook (see REFERENCES section in file README).  The following code
50cabdff1aSopenharmony_ci * is based directly on figure 4-8 in P&M.
51cabdff1aSopenharmony_ci * While an 8-point DCT cannot be done in less than 11 multiplies, it is
52cabdff1aSopenharmony_ci * possible to arrange the computation so that many of the multiplies are
53cabdff1aSopenharmony_ci * simple scalings of the final outputs.  These multiplies can then be
54cabdff1aSopenharmony_ci * folded into the multiplications or divisions by the JPEG quantization
55cabdff1aSopenharmony_ci * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
56cabdff1aSopenharmony_ci * to be done in the DCT itself.
57cabdff1aSopenharmony_ci * The primary disadvantage of this method is that with fixed-point math,
58cabdff1aSopenharmony_ci * accuracy is lost due to imprecise representation of the scaled
59cabdff1aSopenharmony_ci * quantization values.  The smaller the quantization table entry, the less
60cabdff1aSopenharmony_ci * precise the scaled value, so this implementation does worse with high-
61cabdff1aSopenharmony_ci * quality-setting files than with low-quality ones.
62cabdff1aSopenharmony_ci */
63cabdff1aSopenharmony_ci
64cabdff1aSopenharmony_ci/**
65cabdff1aSopenharmony_ci * @file
66cabdff1aSopenharmony_ci * Independent JPEG Group's fast AAN dct.
67cabdff1aSopenharmony_ci */
68cabdff1aSopenharmony_ci
69cabdff1aSopenharmony_ci#include <stdint.h>
70cabdff1aSopenharmony_ci#include "libavutil/attributes.h"
71cabdff1aSopenharmony_ci#include "dct.h"
72cabdff1aSopenharmony_ci
73cabdff1aSopenharmony_ci#define DCTSIZE 8
74cabdff1aSopenharmony_ci#define GLOBAL(x) x
75cabdff1aSopenharmony_ci#define RIGHT_SHIFT(x, n) ((x) >> (n))
76cabdff1aSopenharmony_ci
77cabdff1aSopenharmony_ci/*
78cabdff1aSopenharmony_ci * This module is specialized to the case DCTSIZE = 8.
79cabdff1aSopenharmony_ci */
80cabdff1aSopenharmony_ci
81cabdff1aSopenharmony_ci#if DCTSIZE != 8
82cabdff1aSopenharmony_ci  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
83cabdff1aSopenharmony_ci#endif
84cabdff1aSopenharmony_ci
85cabdff1aSopenharmony_ci
86cabdff1aSopenharmony_ci/* Scaling decisions are generally the same as in the LL&M algorithm;
87cabdff1aSopenharmony_ci * see jfdctint.c for more details.  However, we choose to descale
88cabdff1aSopenharmony_ci * (right shift) multiplication products as soon as they are formed,
89cabdff1aSopenharmony_ci * rather than carrying additional fractional bits into subsequent additions.
90cabdff1aSopenharmony_ci * This compromises accuracy slightly, but it lets us save a few shifts.
91cabdff1aSopenharmony_ci * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
92cabdff1aSopenharmony_ci * everywhere except in the multiplications proper; this saves a good deal
93cabdff1aSopenharmony_ci * of work on 16-bit-int machines.
94cabdff1aSopenharmony_ci *
95cabdff1aSopenharmony_ci * Again to save a few shifts, the intermediate results between pass 1 and
96cabdff1aSopenharmony_ci * pass 2 are not upscaled, but are represented only to integral precision.
97cabdff1aSopenharmony_ci *
98cabdff1aSopenharmony_ci * A final compromise is to represent the multiplicative constants to only
99cabdff1aSopenharmony_ci * 8 fractional bits, rather than 13.  This saves some shifting work on some
100cabdff1aSopenharmony_ci * machines, and may also reduce the cost of multiplication (since there
101cabdff1aSopenharmony_ci * are fewer one-bits in the constants).
102cabdff1aSopenharmony_ci */
103cabdff1aSopenharmony_ci
104cabdff1aSopenharmony_ci#define CONST_BITS  8
105cabdff1aSopenharmony_ci
106cabdff1aSopenharmony_ci
107cabdff1aSopenharmony_ci/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
108cabdff1aSopenharmony_ci * causing a lot of useless floating-point operations at run time.
109cabdff1aSopenharmony_ci * To get around this we use the following pre-calculated constants.
110cabdff1aSopenharmony_ci * If you change CONST_BITS you may want to add appropriate values.
111cabdff1aSopenharmony_ci * (With a reasonable C compiler, you can just rely on the FIX() macro...)
112cabdff1aSopenharmony_ci */
113cabdff1aSopenharmony_ci
114cabdff1aSopenharmony_ci#if CONST_BITS == 8
115cabdff1aSopenharmony_ci#define FIX_0_382683433  ((int32_t)   98)       /* FIX(0.382683433) */
116cabdff1aSopenharmony_ci#define FIX_0_541196100  ((int32_t)  139)       /* FIX(0.541196100) */
117cabdff1aSopenharmony_ci#define FIX_0_707106781  ((int32_t)  181)       /* FIX(0.707106781) */
118cabdff1aSopenharmony_ci#define FIX_1_306562965  ((int32_t)  334)       /* FIX(1.306562965) */
119cabdff1aSopenharmony_ci#else
120cabdff1aSopenharmony_ci#define FIX_0_382683433  FIX(0.382683433)
121cabdff1aSopenharmony_ci#define FIX_0_541196100  FIX(0.541196100)
122cabdff1aSopenharmony_ci#define FIX_0_707106781  FIX(0.707106781)
123cabdff1aSopenharmony_ci#define FIX_1_306562965  FIX(1.306562965)
124cabdff1aSopenharmony_ci#endif
125cabdff1aSopenharmony_ci
126cabdff1aSopenharmony_ci
127cabdff1aSopenharmony_ci/* We can gain a little more speed, with a further compromise in accuracy,
128cabdff1aSopenharmony_ci * by omitting the addition in a descaling shift.  This yields an incorrectly
129cabdff1aSopenharmony_ci * rounded result half the time...
130cabdff1aSopenharmony_ci */
131cabdff1aSopenharmony_ci
132cabdff1aSopenharmony_ci#ifndef USE_ACCURATE_ROUNDING
133cabdff1aSopenharmony_ci#undef DESCALE
134cabdff1aSopenharmony_ci#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
135cabdff1aSopenharmony_ci#endif
136cabdff1aSopenharmony_ci
137cabdff1aSopenharmony_ci
138cabdff1aSopenharmony_ci/* Multiply a int16_t variable by an int32_t constant, and immediately
139cabdff1aSopenharmony_ci * descale to yield a int16_t result.
140cabdff1aSopenharmony_ci */
141cabdff1aSopenharmony_ci
142cabdff1aSopenharmony_ci#define MULTIPLY(var,const)  ((int16_t) DESCALE((var) * (const), CONST_BITS))
143cabdff1aSopenharmony_ci
144cabdff1aSopenharmony_cistatic av_always_inline void row_fdct(int16_t * data){
145cabdff1aSopenharmony_ci  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
146cabdff1aSopenharmony_ci  int tmp10, tmp11, tmp12, tmp13;
147cabdff1aSopenharmony_ci  int z1, z2, z3, z4, z5, z11, z13;
148cabdff1aSopenharmony_ci  int16_t *dataptr;
149cabdff1aSopenharmony_ci  int ctr;
150cabdff1aSopenharmony_ci
151cabdff1aSopenharmony_ci  /* Pass 1: process rows. */
152cabdff1aSopenharmony_ci
153cabdff1aSopenharmony_ci  dataptr = data;
154cabdff1aSopenharmony_ci  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
155cabdff1aSopenharmony_ci    tmp0 = dataptr[0] + dataptr[7];
156cabdff1aSopenharmony_ci    tmp7 = dataptr[0] - dataptr[7];
157cabdff1aSopenharmony_ci    tmp1 = dataptr[1] + dataptr[6];
158cabdff1aSopenharmony_ci    tmp6 = dataptr[1] - dataptr[6];
159cabdff1aSopenharmony_ci    tmp2 = dataptr[2] + dataptr[5];
160cabdff1aSopenharmony_ci    tmp5 = dataptr[2] - dataptr[5];
161cabdff1aSopenharmony_ci    tmp3 = dataptr[3] + dataptr[4];
162cabdff1aSopenharmony_ci    tmp4 = dataptr[3] - dataptr[4];
163cabdff1aSopenharmony_ci
164cabdff1aSopenharmony_ci    /* Even part */
165cabdff1aSopenharmony_ci
166cabdff1aSopenharmony_ci    tmp10 = tmp0 + tmp3;        /* phase 2 */
167cabdff1aSopenharmony_ci    tmp13 = tmp0 - tmp3;
168cabdff1aSopenharmony_ci    tmp11 = tmp1 + tmp2;
169cabdff1aSopenharmony_ci    tmp12 = tmp1 - tmp2;
170cabdff1aSopenharmony_ci
171cabdff1aSopenharmony_ci    dataptr[0] = tmp10 + tmp11; /* phase 3 */
172cabdff1aSopenharmony_ci    dataptr[4] = tmp10 - tmp11;
173cabdff1aSopenharmony_ci
174cabdff1aSopenharmony_ci    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
175cabdff1aSopenharmony_ci    dataptr[2] = tmp13 + z1;    /* phase 5 */
176cabdff1aSopenharmony_ci    dataptr[6] = tmp13 - z1;
177cabdff1aSopenharmony_ci
178cabdff1aSopenharmony_ci    /* Odd part */
179cabdff1aSopenharmony_ci
180cabdff1aSopenharmony_ci    tmp10 = tmp4 + tmp5;        /* phase 2 */
181cabdff1aSopenharmony_ci    tmp11 = tmp5 + tmp6;
182cabdff1aSopenharmony_ci    tmp12 = tmp6 + tmp7;
183cabdff1aSopenharmony_ci
184cabdff1aSopenharmony_ci    /* The rotator is modified from fig 4-8 to avoid extra negations. */
185cabdff1aSopenharmony_ci    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
186cabdff1aSopenharmony_ci    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;    /* c2-c6 */
187cabdff1aSopenharmony_ci    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;    /* c2+c6 */
188cabdff1aSopenharmony_ci    z3 = MULTIPLY(tmp11, FIX_0_707106781);         /* c4 */
189cabdff1aSopenharmony_ci
190cabdff1aSopenharmony_ci    z11 = tmp7 + z3;            /* phase 5 */
191cabdff1aSopenharmony_ci    z13 = tmp7 - z3;
192cabdff1aSopenharmony_ci
193cabdff1aSopenharmony_ci    dataptr[5] = z13 + z2;      /* phase 6 */
194cabdff1aSopenharmony_ci    dataptr[3] = z13 - z2;
195cabdff1aSopenharmony_ci    dataptr[1] = z11 + z4;
196cabdff1aSopenharmony_ci    dataptr[7] = z11 - z4;
197cabdff1aSopenharmony_ci
198cabdff1aSopenharmony_ci    dataptr += DCTSIZE;         /* advance pointer to next row */
199cabdff1aSopenharmony_ci  }
200cabdff1aSopenharmony_ci}
201cabdff1aSopenharmony_ci
202cabdff1aSopenharmony_ci/*
203cabdff1aSopenharmony_ci * Perform the forward DCT on one block of samples.
204cabdff1aSopenharmony_ci */
205cabdff1aSopenharmony_ci
206cabdff1aSopenharmony_ciGLOBAL(void)
207cabdff1aSopenharmony_ciff_fdct_ifast (int16_t * data)
208cabdff1aSopenharmony_ci{
209cabdff1aSopenharmony_ci  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
210cabdff1aSopenharmony_ci  int tmp10, tmp11, tmp12, tmp13;
211cabdff1aSopenharmony_ci  int z1, z2, z3, z4, z5, z11, z13;
212cabdff1aSopenharmony_ci  int16_t *dataptr;
213cabdff1aSopenharmony_ci  int ctr;
214cabdff1aSopenharmony_ci
215cabdff1aSopenharmony_ci  row_fdct(data);
216cabdff1aSopenharmony_ci
217cabdff1aSopenharmony_ci  /* Pass 2: process columns. */
218cabdff1aSopenharmony_ci
219cabdff1aSopenharmony_ci  dataptr = data;
220cabdff1aSopenharmony_ci  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
221cabdff1aSopenharmony_ci    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
222cabdff1aSopenharmony_ci    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
223cabdff1aSopenharmony_ci    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
224cabdff1aSopenharmony_ci    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
225cabdff1aSopenharmony_ci    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
226cabdff1aSopenharmony_ci    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
227cabdff1aSopenharmony_ci    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
228cabdff1aSopenharmony_ci    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
229cabdff1aSopenharmony_ci
230cabdff1aSopenharmony_ci    /* Even part */
231cabdff1aSopenharmony_ci
232cabdff1aSopenharmony_ci    tmp10 = tmp0 + tmp3;        /* phase 2 */
233cabdff1aSopenharmony_ci    tmp13 = tmp0 - tmp3;
234cabdff1aSopenharmony_ci    tmp11 = tmp1 + tmp2;
235cabdff1aSopenharmony_ci    tmp12 = tmp1 - tmp2;
236cabdff1aSopenharmony_ci
237cabdff1aSopenharmony_ci    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
238cabdff1aSopenharmony_ci    dataptr[DCTSIZE*4] = tmp10 - tmp11;
239cabdff1aSopenharmony_ci
240cabdff1aSopenharmony_ci    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
241cabdff1aSopenharmony_ci    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
242cabdff1aSopenharmony_ci    dataptr[DCTSIZE*6] = tmp13 - z1;
243cabdff1aSopenharmony_ci
244cabdff1aSopenharmony_ci    /* Odd part */
245cabdff1aSopenharmony_ci
246cabdff1aSopenharmony_ci    tmp10 = tmp4 + tmp5;        /* phase 2 */
247cabdff1aSopenharmony_ci    tmp11 = tmp5 + tmp6;
248cabdff1aSopenharmony_ci    tmp12 = tmp6 + tmp7;
249cabdff1aSopenharmony_ci
250cabdff1aSopenharmony_ci    /* The rotator is modified from fig 4-8 to avoid extra negations. */
251cabdff1aSopenharmony_ci    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
252cabdff1aSopenharmony_ci    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
253cabdff1aSopenharmony_ci    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
254cabdff1aSopenharmony_ci    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
255cabdff1aSopenharmony_ci
256cabdff1aSopenharmony_ci    z11 = tmp7 + z3;            /* phase 5 */
257cabdff1aSopenharmony_ci    z13 = tmp7 - z3;
258cabdff1aSopenharmony_ci
259cabdff1aSopenharmony_ci    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
260cabdff1aSopenharmony_ci    dataptr[DCTSIZE*3] = z13 - z2;
261cabdff1aSopenharmony_ci    dataptr[DCTSIZE*1] = z11 + z4;
262cabdff1aSopenharmony_ci    dataptr[DCTSIZE*7] = z11 - z4;
263cabdff1aSopenharmony_ci
264cabdff1aSopenharmony_ci    dataptr++;                  /* advance pointer to next column */
265cabdff1aSopenharmony_ci  }
266cabdff1aSopenharmony_ci}
267cabdff1aSopenharmony_ci
268cabdff1aSopenharmony_ci/*
269cabdff1aSopenharmony_ci * Perform the forward 2-4-8 DCT on one block of samples.
270cabdff1aSopenharmony_ci */
271cabdff1aSopenharmony_ci
272cabdff1aSopenharmony_ciGLOBAL(void)
273cabdff1aSopenharmony_ciff_fdct_ifast248 (int16_t * data)
274cabdff1aSopenharmony_ci{
275cabdff1aSopenharmony_ci  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
276cabdff1aSopenharmony_ci  int tmp10, tmp11, tmp12, tmp13;
277cabdff1aSopenharmony_ci  int z1;
278cabdff1aSopenharmony_ci  int16_t *dataptr;
279cabdff1aSopenharmony_ci  int ctr;
280cabdff1aSopenharmony_ci
281cabdff1aSopenharmony_ci  row_fdct(data);
282cabdff1aSopenharmony_ci
283cabdff1aSopenharmony_ci  /* Pass 2: process columns. */
284cabdff1aSopenharmony_ci
285cabdff1aSopenharmony_ci  dataptr = data;
286cabdff1aSopenharmony_ci  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
287cabdff1aSopenharmony_ci    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
288cabdff1aSopenharmony_ci    tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
289cabdff1aSopenharmony_ci    tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
290cabdff1aSopenharmony_ci    tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
291cabdff1aSopenharmony_ci    tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
292cabdff1aSopenharmony_ci    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
293cabdff1aSopenharmony_ci    tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
294cabdff1aSopenharmony_ci    tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
295cabdff1aSopenharmony_ci
296cabdff1aSopenharmony_ci    /* Even part */
297cabdff1aSopenharmony_ci
298cabdff1aSopenharmony_ci    tmp10 = tmp0 + tmp3;
299cabdff1aSopenharmony_ci    tmp11 = tmp1 + tmp2;
300cabdff1aSopenharmony_ci    tmp12 = tmp1 - tmp2;
301cabdff1aSopenharmony_ci    tmp13 = tmp0 - tmp3;
302cabdff1aSopenharmony_ci
303cabdff1aSopenharmony_ci    dataptr[DCTSIZE*0] = tmp10 + tmp11;
304cabdff1aSopenharmony_ci    dataptr[DCTSIZE*4] = tmp10 - tmp11;
305cabdff1aSopenharmony_ci
306cabdff1aSopenharmony_ci    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
307cabdff1aSopenharmony_ci    dataptr[DCTSIZE*2] = tmp13 + z1;
308cabdff1aSopenharmony_ci    dataptr[DCTSIZE*6] = tmp13 - z1;
309cabdff1aSopenharmony_ci
310cabdff1aSopenharmony_ci    tmp10 = tmp4 + tmp7;
311cabdff1aSopenharmony_ci    tmp11 = tmp5 + tmp6;
312cabdff1aSopenharmony_ci    tmp12 = tmp5 - tmp6;
313cabdff1aSopenharmony_ci    tmp13 = tmp4 - tmp7;
314cabdff1aSopenharmony_ci
315cabdff1aSopenharmony_ci    dataptr[DCTSIZE*1] = tmp10 + tmp11;
316cabdff1aSopenharmony_ci    dataptr[DCTSIZE*5] = tmp10 - tmp11;
317cabdff1aSopenharmony_ci
318cabdff1aSopenharmony_ci    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
319cabdff1aSopenharmony_ci    dataptr[DCTSIZE*3] = tmp13 + z1;
320cabdff1aSopenharmony_ci    dataptr[DCTSIZE*7] = tmp13 - z1;
321cabdff1aSopenharmony_ci
322cabdff1aSopenharmony_ci    dataptr++;                        /* advance pointer to next column */
323cabdff1aSopenharmony_ci  }
324cabdff1aSopenharmony_ci}
325cabdff1aSopenharmony_ci
326cabdff1aSopenharmony_ci
327cabdff1aSopenharmony_ci#undef GLOBAL
328cabdff1aSopenharmony_ci#undef CONST_BITS
329cabdff1aSopenharmony_ci#undef DESCALE
330cabdff1aSopenharmony_ci#undef FIX_0_541196100
331cabdff1aSopenharmony_ci#undef FIX_1_306562965
332