xref: /third_party/ffmpeg/libavcodec/snow_dwt.c (revision cabdff1a)
1/*
2 * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2008 David Conrad
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "libavutil/attributes.h"
23#include "libavutil/avassert.h"
24#include "libavutil/common.h"
25#include "me_cmp.h"
26#include "snow_dwt.h"
27
28int ff_slice_buffer_init(slice_buffer *buf, int line_count,
29                         int max_allocated_lines, int line_width,
30                         IDWTELEM *base_buffer)
31{
32    int i;
33
34    buf->base_buffer = base_buffer;
35    buf->line_count  = line_count;
36    buf->line_width  = line_width;
37    buf->data_count  = max_allocated_lines;
38    buf->line        = av_calloc(line_count, sizeof(*buf->line));
39    if (!buf->line)
40        return AVERROR(ENOMEM);
41    buf->data_stack  = av_malloc_array(max_allocated_lines, sizeof(IDWTELEM *));
42    if (!buf->data_stack) {
43        av_freep(&buf->line);
44        return AVERROR(ENOMEM);
45    }
46
47    for (i = 0; i < max_allocated_lines; i++) {
48        buf->data_stack[i] = av_malloc_array(line_width, sizeof(IDWTELEM));
49        if (!buf->data_stack[i]) {
50            for (i--; i >=0; i--)
51                av_freep(&buf->data_stack[i]);
52            av_freep(&buf->data_stack);
53            av_freep(&buf->line);
54            return AVERROR(ENOMEM);
55        }
56    }
57
58    buf->data_stack_top = max_allocated_lines - 1;
59    return 0;
60}
61
62IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, int line)
63{
64    IDWTELEM *buffer;
65
66    av_assert0(buf->data_stack_top >= 0);
67//  av_assert1(!buf->line[line]);
68    if (buf->line[line])
69        return buf->line[line];
70
71    buffer = buf->data_stack[buf->data_stack_top];
72    buf->data_stack_top--;
73    buf->line[line] = buffer;
74
75    return buffer;
76}
77
78void ff_slice_buffer_release(slice_buffer *buf, int line)
79{
80    IDWTELEM *buffer;
81
82    av_assert1(line >= 0 && line < buf->line_count);
83    av_assert1(buf->line[line]);
84
85    buffer = buf->line[line];
86    buf->data_stack_top++;
87    buf->data_stack[buf->data_stack_top] = buffer;
88    buf->line[line]                      = NULL;
89}
90
91void ff_slice_buffer_flush(slice_buffer *buf)
92{
93    int i;
94
95    if (!buf->line)
96        return;
97
98    for (i = 0; i < buf->line_count; i++)
99        if (buf->line[i])
100            ff_slice_buffer_release(buf, i);
101}
102
103void ff_slice_buffer_destroy(slice_buffer *buf)
104{
105    int i;
106    ff_slice_buffer_flush(buf);
107
108    if (buf->data_stack)
109        for (i = buf->data_count - 1; i >= 0; i--)
110            av_freep(&buf->data_stack[i]);
111    av_freep(&buf->data_stack);
112    av_freep(&buf->line);
113}
114
115static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
116                                  int dst_step, int src_step, int ref_step,
117                                  int width, int mul, int add, int shift,
118                                  int highpass, int inverse)
119{
120    const int mirror_left  = !highpass;
121    const int mirror_right = (width & 1) ^ highpass;
122    const int w            = (width >> 1) - 1 + (highpass & width);
123    int i;
124
125#define LIFT(src, ref, inv) ((src) + ((inv) ? -(ref) : +(ref)))
126    if (mirror_left) {
127        dst[0] = LIFT(src[0], ((mul * 2 * ref[0] + add) >> shift), inverse);
128        dst   += dst_step;
129        src   += src_step;
130    }
131
132    for (i = 0; i < w; i++)
133        dst[i * dst_step] = LIFT(src[i * src_step],
134                                 ((mul * (ref[i * ref_step] +
135                                          ref[(i + 1) * ref_step]) +
136                                   add) >> shift),
137                                 inverse);
138
139    if (mirror_right)
140        dst[w * dst_step] = LIFT(src[w * src_step],
141                                 ((mul * 2 * ref[w * ref_step] + add) >> shift),
142                                 inverse);
143}
144
145static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
146                                   int dst_step, int src_step, int ref_step,
147                                   int width, int mul, int add, int shift,
148                                   int highpass, int inverse)
149{
150    const int mirror_left  = !highpass;
151    const int mirror_right = (width & 1) ^ highpass;
152    const int w            = (width >> 1) - 1 + (highpass & width);
153    int i;
154
155    av_assert1(shift == 4);
156#define LIFTS(src, ref, inv)                                            \
157    ((inv) ? (src) + (((ref) + 4 * (src)) >> shift)                     \
158           : -((-16 * (src) + (ref) + add /                             \
159                4 + 1 + (5 << 25)) / (5 * 4) - (1 << 23)))
160    if (mirror_left) {
161        dst[0] = LIFTS(src[0], mul * 2 * ref[0] + add, inverse);
162        dst   += dst_step;
163        src   += src_step;
164    }
165
166    for (i = 0; i < w; i++)
167        dst[i * dst_step] = LIFTS(src[i * src_step],
168                                  mul * (ref[i * ref_step] +
169                                         ref[(i + 1) * ref_step]) + add,
170                                  inverse);
171
172    if (mirror_right)
173        dst[w * dst_step] = LIFTS(src[w * src_step],
174                                  mul * 2 * ref[w * ref_step] + add,
175                                  inverse);
176}
177
178static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width)
179{
180    const int width2 = width >> 1;
181    int x;
182    const int w2 = (width + 1) >> 1;
183
184    for (x = 0; x < width2; x++) {
185        temp[x]      = b[2 * x];
186        temp[x + w2] = b[2 * x + 1];
187    }
188    if (width & 1)
189        temp[x] = b[2 * x];
190    lift(b + w2, temp + w2, temp,   1, 1, 1, width, -1, 0, 1, 1, 0);
191    lift(b,      temp,      b + w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
192}
193
194static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
195                                    int width)
196{
197    int i;
198
199    for (i = 0; i < width; i++)
200        b1[i] -= (b0[i] + b2[i]) >> 1;
201}
202
203static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
204                                    int width)
205{
206    int i;
207
208    for (i = 0; i < width; i++)
209        b1[i] += (b0[i] + b2[i] + 2) >> 2;
210}
211
212static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp,
213                                 int width, int height, int stride)
214{
215    int y;
216    DWTELEM *b0 = buffer + avpriv_mirror(-2 - 1, height - 1) * stride;
217    DWTELEM *b1 = buffer + avpriv_mirror(-2,     height - 1) * stride;
218
219    for (y = -2; y < height; y += 2) {
220        DWTELEM *b2 = buffer + avpriv_mirror(y + 1, height - 1) * stride;
221        DWTELEM *b3 = buffer + avpriv_mirror(y + 2, height - 1) * stride;
222
223        if (y + 1 < (unsigned)height)
224            horizontal_decompose53i(b2, temp, width);
225        if (y + 2 < (unsigned)height)
226            horizontal_decompose53i(b3, temp, width);
227
228        if (y + 1 < (unsigned)height)
229            vertical_decompose53iH0(b1, b2, b3, width);
230        if (y + 0 < (unsigned)height)
231            vertical_decompose53iL0(b0, b1, b2, width);
232
233        b0 = b2;
234        b1 = b3;
235    }
236}
237
238static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width)
239{
240    const int w2 = (width + 1) >> 1;
241
242    lift(temp + w2, b + 1, b,         1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
243    liftS(temp,     b,     temp + w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
244    lift(b + w2, temp + w2, temp,     1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
245    lift(b,      temp,      b + w2,   1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
246}
247
248static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
249                                    int width)
250{
251    int i;
252
253    for (i = 0; i < width; i++)
254        b1[i] -= (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
255}
256
257static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
258                                    int width)
259{
260    int i;
261
262    for (i = 0; i < width; i++)
263        b1[i] += (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS;
264}
265
266static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
267                                    int width)
268{
269    int i;
270
271    for (i = 0; i < width; i++)
272        b1[i] = (16 * 4 * b1[i] - 4 * (b0[i] + b2[i]) + W_BO * 5 + (5 << 27)) /
273                (5 * 16) - (1 << 23);
274}
275
276static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
277                                    int width)
278{
279    int i;
280
281    for (i = 0; i < width; i++)
282        b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
283}
284
285static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp,
286                                 int width, int height, int stride)
287{
288    int y;
289    DWTELEM *b0 = buffer + avpriv_mirror(-4 - 1, height - 1) * stride;
290    DWTELEM *b1 = buffer + avpriv_mirror(-4,     height - 1) * stride;
291    DWTELEM *b2 = buffer + avpriv_mirror(-4 + 1, height - 1) * stride;
292    DWTELEM *b3 = buffer + avpriv_mirror(-4 + 2, height - 1) * stride;
293
294    for (y = -4; y < height; y += 2) {
295        DWTELEM *b4 = buffer + avpriv_mirror(y + 3, height - 1) * stride;
296        DWTELEM *b5 = buffer + avpriv_mirror(y + 4, height - 1) * stride;
297
298        if (y + 3 < (unsigned)height)
299            horizontal_decompose97i(b4, temp, width);
300        if (y + 4 < (unsigned)height)
301            horizontal_decompose97i(b5, temp, width);
302
303        if (y + 3 < (unsigned)height)
304            vertical_decompose97iH0(b3, b4, b5, width);
305        if (y + 2 < (unsigned)height)
306            vertical_decompose97iL0(b2, b3, b4, width);
307        if (y + 1 < (unsigned)height)
308            vertical_decompose97iH1(b1, b2, b3, width);
309        if (y + 0 < (unsigned)height)
310            vertical_decompose97iL1(b0, b1, b2, width);
311
312        b0 = b2;
313        b1 = b3;
314        b2 = b4;
315        b3 = b5;
316    }
317}
318
319void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height,
320                    int stride, int type, int decomposition_count)
321{
322    int level;
323
324    for (level = 0; level < decomposition_count; level++) {
325        switch (type) {
326        case DWT_97:
327            spatial_decompose97i(buffer, temp,
328                                 width >> level, height >> level,
329                                 stride << level);
330            break;
331        case DWT_53:
332            spatial_decompose53i(buffer, temp,
333                                 width >> level, height >> level,
334                                 stride << level);
335            break;
336        }
337    }
338}
339
340static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width)
341{
342    const int width2 = width >> 1;
343    const int w2     = (width + 1) >> 1;
344    int x;
345
346    for (x = 0; x < width2; x++) {
347        temp[2 * x]     = b[x];
348        temp[2 * x + 1] = b[x + w2];
349    }
350    if (width & 1)
351        temp[2 * x] = b[x];
352
353    b[0] = temp[0] - ((temp[1] + 1) >> 1);
354    for (x = 2; x < width - 1; x += 2) {
355        b[x]     = temp[x]     - ((temp[x - 1] + temp[x + 1] + 2) >> 2);
356        b[x - 1] = temp[x - 1] + ((b[x - 2]    + b[x]        + 1) >> 1);
357    }
358    if (width & 1) {
359        b[x]     = temp[x]     - ((temp[x - 1]     + 1) >> 1);
360        b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1);
361    } else
362        b[x - 1] = temp[x - 1] + b[x - 2];
363}
364
365static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
366                                  int width)
367{
368    int i;
369
370    for (i = 0; i < width; i++)
371        b1[i] += (b0[i] + b2[i]) >> 1;
372}
373
374static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
375                                  int width)
376{
377    int i;
378
379    for (i = 0; i < width; i++)
380        b1[i] -= (b0[i] + b2[i] + 2) >> 2;
381}
382
383static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer *sb,
384                                             int height, int stride_line)
385{
386    cs->b0 = slice_buffer_get_line(sb,
387                                   avpriv_mirror(-1 - 1, height - 1) * stride_line);
388    cs->b1 = slice_buffer_get_line(sb, avpriv_mirror(-1, height - 1) * stride_line);
389    cs->y  = -1;
390}
391
392static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer,
393                                    int height, int stride)
394{
395    cs->b0 = buffer + avpriv_mirror(-1 - 1, height - 1) * stride;
396    cs->b1 = buffer + avpriv_mirror(-1,     height - 1) * stride;
397    cs->y  = -1;
398}
399
400static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb,
401                                           IDWTELEM *temp,
402                                           int width, int height,
403                                           int stride_line)
404{
405    int y = cs->y;
406
407    IDWTELEM *b0 = cs->b0;
408    IDWTELEM *b1 = cs->b1;
409    IDWTELEM *b2 = slice_buffer_get_line(sb,
410                                         avpriv_mirror(y + 1, height - 1) *
411                                         stride_line);
412    IDWTELEM *b3 = slice_buffer_get_line(sb,
413                                         avpriv_mirror(y + 2, height - 1) *
414                                         stride_line);
415
416    if (y + 1 < (unsigned)height && y < (unsigned)height) {
417        int x;
418
419        for (x = 0; x < width; x++) {
420            b2[x] -= (b1[x] + b3[x] + 2) >> 2;
421            b1[x] += (b0[x] + b2[x])     >> 1;
422        }
423    } else {
424        if (y + 1 < (unsigned)height)
425            vertical_compose53iL0(b1, b2, b3, width);
426        if (y + 0 < (unsigned)height)
427            vertical_compose53iH0(b0, b1, b2, width);
428    }
429
430    if (y - 1 < (unsigned)height)
431        horizontal_compose53i(b0, temp, width);
432    if (y + 0 < (unsigned)height)
433        horizontal_compose53i(b1, temp, width);
434
435    cs->b0  = b2;
436    cs->b1  = b3;
437    cs->y  += 2;
438}
439
440static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer,
441                                  IDWTELEM *temp, int width, int height,
442                                  int stride)
443{
444    int y        = cs->y;
445    IDWTELEM *b0 = cs->b0;
446    IDWTELEM *b1 = cs->b1;
447    IDWTELEM *b2 = buffer + avpriv_mirror(y + 1, height - 1) * stride;
448    IDWTELEM *b3 = buffer + avpriv_mirror(y + 2, height - 1) * stride;
449
450    if (y + 1 < (unsigned)height)
451        vertical_compose53iL0(b1, b2, b3, width);
452    if (y + 0 < (unsigned)height)
453        vertical_compose53iH0(b0, b1, b2, width);
454
455    if (y - 1 < (unsigned)height)
456        horizontal_compose53i(b0, temp, width);
457    if (y + 0 < (unsigned)height)
458        horizontal_compose53i(b1, temp, width);
459
460    cs->b0  = b2;
461    cs->b1  = b3;
462    cs->y  += 2;
463}
464
465static void snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width)
466{
467    const int w2 = (width + 1) >> 1;
468    int x;
469
470    temp[0] = b[0] - ((3 * b[w2] + 2) >> 2);
471    for (x = 1; x < (width >> 1); x++) {
472        temp[2 * x]     = b[x] - ((3 * (b[x + w2 - 1] + b[x + w2]) + 4) >> 3);
473        temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
474    }
475    if (width & 1) {
476        temp[2 * x]     = b[x] - ((3 * b[x + w2 - 1] + 2) >> 2);
477        temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
478    } else
479        temp[2 * x - 1] = b[x + w2 - 1] - 2 * temp[2 * x - 2];
480
481    b[0] = temp[0] + ((2 * temp[0] + temp[1] + 4) >> 3);
482    for (x = 2; x < width - 1; x += 2) {
483        b[x]     = temp[x] + ((4 * temp[x] + temp[x - 1] + temp[x + 1] + 8) >> 4);
484        b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
485    }
486    if (width & 1) {
487        b[x]     = temp[x] + ((2 * temp[x] + temp[x - 1] + 4) >> 3);
488        b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
489    } else
490        b[x - 1] = temp[x - 1] + 3 * b[x - 2];
491}
492
493static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
494                                  int width)
495{
496    int i;
497
498    for (i = 0; i < width; i++)
499        b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
500}
501
502static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
503                                  int width)
504{
505    int i;
506
507    for (i = 0; i < width; i++)
508        b1[i] -= (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS;
509}
510
511static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
512                                  int width)
513{
514    int i;
515
516    for (i = 0; i < width; i++)
517        b1[i] += (W_BM * (b0[i] + b2[i]) + 4 * b1[i] + W_BO) >> W_BS;
518}
519
520static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
521                                  int width)
522{
523    int i;
524
525    for (i = 0; i < width; i++)
526        b1[i] -= (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
527}
528
529static void snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
530                                     IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5,
531                                     int width)
532{
533    int i;
534
535    for (i = 0; i < width; i++) {
536        b4[i] -= (W_DM * (b3[i] + b5[i]) + W_DO) >> W_DS;
537        b3[i] -= (W_CM * (b2[i] + b4[i]) + W_CO) >> W_CS;
538        b2[i] += (W_BM * (b1[i] + b3[i]) + 4 * b2[i] + W_BO) >> W_BS;
539        b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
540    }
541}
542
543static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer *sb,
544                                             int height, int stride_line)
545{
546    cs->b0 = slice_buffer_get_line(sb, avpriv_mirror(-3 - 1, height - 1) * stride_line);
547    cs->b1 = slice_buffer_get_line(sb, avpriv_mirror(-3,     height - 1) * stride_line);
548    cs->b2 = slice_buffer_get_line(sb, avpriv_mirror(-3 + 1, height - 1) * stride_line);
549    cs->b3 = slice_buffer_get_line(sb, avpriv_mirror(-3 + 2, height - 1) * stride_line);
550    cs->y  = -3;
551}
552
553static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height,
554                                    int stride)
555{
556    cs->b0 = buffer + avpriv_mirror(-3 - 1, height - 1) * stride;
557    cs->b1 = buffer + avpriv_mirror(-3,     height - 1) * stride;
558    cs->b2 = buffer + avpriv_mirror(-3 + 1, height - 1) * stride;
559    cs->b3 = buffer + avpriv_mirror(-3 + 2, height - 1) * stride;
560    cs->y  = -3;
561}
562
563static void spatial_compose97i_dy_buffered(SnowDWTContext *dsp, DWTCompose *cs,
564                                           slice_buffer * sb, IDWTELEM *temp,
565                                           int width, int height,
566                                           int stride_line)
567{
568    int y = cs->y;
569
570    IDWTELEM *b0 = cs->b0;
571    IDWTELEM *b1 = cs->b1;
572    IDWTELEM *b2 = cs->b2;
573    IDWTELEM *b3 = cs->b3;
574    IDWTELEM *b4 = slice_buffer_get_line(sb,
575                                         avpriv_mirror(y + 3, height - 1) *
576                                         stride_line);
577    IDWTELEM *b5 = slice_buffer_get_line(sb,
578                                         avpriv_mirror(y + 4, height - 1) *
579                                         stride_line);
580
581    if (y > 0 && y + 4 < height) {
582        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
583    } else {
584        if (y + 3 < (unsigned)height)
585            vertical_compose97iL1(b3, b4, b5, width);
586        if (y + 2 < (unsigned)height)
587            vertical_compose97iH1(b2, b3, b4, width);
588        if (y + 1 < (unsigned)height)
589            vertical_compose97iL0(b1, b2, b3, width);
590        if (y + 0 < (unsigned)height)
591            vertical_compose97iH0(b0, b1, b2, width);
592    }
593
594    if (y - 1 < (unsigned)height)
595        dsp->horizontal_compose97i(b0, temp, width);
596    if (y + 0 < (unsigned)height)
597        dsp->horizontal_compose97i(b1, temp, width);
598
599    cs->b0  = b2;
600    cs->b1  = b3;
601    cs->b2  = b4;
602    cs->b3  = b5;
603    cs->y  += 2;
604}
605
606static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer,
607                                  IDWTELEM *temp, int width, int height,
608                                  int stride)
609{
610    int y        = cs->y;
611    IDWTELEM *b0 = cs->b0;
612    IDWTELEM *b1 = cs->b1;
613    IDWTELEM *b2 = cs->b2;
614    IDWTELEM *b3 = cs->b3;
615    IDWTELEM *b4 = buffer + avpriv_mirror(y + 3, height - 1) * stride;
616    IDWTELEM *b5 = buffer + avpriv_mirror(y + 4, height - 1) * stride;
617
618    if (y + 3 < (unsigned)height)
619        vertical_compose97iL1(b3, b4, b5, width);
620    if (y + 2 < (unsigned)height)
621        vertical_compose97iH1(b2, b3, b4, width);
622    if (y + 1 < (unsigned)height)
623        vertical_compose97iL0(b1, b2, b3, width);
624    if (y + 0 < (unsigned)height)
625        vertical_compose97iH0(b0, b1, b2, width);
626
627    if (y - 1 < (unsigned)height)
628        snow_horizontal_compose97i(b0, temp, width);
629    if (y + 0 < (unsigned)height)
630        snow_horizontal_compose97i(b1, temp, width);
631
632    cs->b0  = b2;
633    cs->b1  = b3;
634    cs->b2  = b4;
635    cs->b3  = b5;
636    cs->y  += 2;
637}
638
639void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width,
640                                   int height, int stride_line, int type,
641                                   int decomposition_count)
642{
643    int level;
644    for (level = decomposition_count - 1; level >= 0; level--) {
645        switch (type) {
646        case DWT_97:
647            spatial_compose97i_buffered_init(cs + level, sb, height >> level,
648                                             stride_line << level);
649            break;
650        case DWT_53:
651            spatial_compose53i_buffered_init(cs + level, sb, height >> level,
652                                             stride_line << level);
653            break;
654        }
655    }
656}
657
658void ff_spatial_idwt_buffered_slice(SnowDWTContext *dsp, DWTCompose *cs,
659                                    slice_buffer *slice_buf, IDWTELEM *temp,
660                                    int width, int height, int stride_line,
661                                    int type, int decomposition_count, int y)
662{
663    const int support = type == 1 ? 3 : 5;
664    int level;
665    if (type == 2)
666        return;
667
668    for (level = decomposition_count - 1; level >= 0; level--)
669        while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
670            switch (type) {
671            case DWT_97:
672                spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, temp,
673                                               width >> level,
674                                               height >> level,
675                                               stride_line << level);
676                break;
677            case DWT_53:
678                spatial_compose53i_dy_buffered(cs + level, slice_buf, temp,
679                                               width >> level,
680                                               height >> level,
681                                               stride_line << level);
682                break;
683            }
684        }
685}
686
687static void spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width,
688                                 int height, int stride, int type,
689                                 int decomposition_count)
690{
691    int level;
692    for (level = decomposition_count - 1; level >= 0; level--) {
693        switch (type) {
694        case DWT_97:
695            spatial_compose97i_init(cs + level, buffer, height >> level,
696                                    stride << level);
697            break;
698        case DWT_53:
699            spatial_compose53i_init(cs + level, buffer, height >> level,
700                                    stride << level);
701            break;
702        }
703    }
704}
705
706static void spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer,
707                                  IDWTELEM *temp, int width, int height,
708                                  int stride, int type,
709                                  int decomposition_count, int y)
710{
711    const int support = type == 1 ? 3 : 5;
712    int level;
713    if (type == 2)
714        return;
715
716    for (level = decomposition_count - 1; level >= 0; level--)
717        while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
718            switch (type) {
719            case DWT_97:
720                spatial_compose97i_dy(cs + level, buffer, temp, width >> level,
721                                      height >> level, stride << level);
722                break;
723            case DWT_53:
724                spatial_compose53i_dy(cs + level, buffer, temp, width >> level,
725                                      height >> level, stride << level);
726                break;
727            }
728        }
729}
730
731void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height,
732                     int stride, int type, int decomposition_count)
733{
734    DWTCompose cs[MAX_DECOMPOSITIONS];
735    int y;
736    spatial_idwt_init(cs, buffer, width, height, stride, type,
737                         decomposition_count);
738    for (y = 0; y < height; y += 4)
739        spatial_idwt_slice(cs, buffer, temp, width, height, stride, type,
740                              decomposition_count, y);
741}
742
743static inline int w_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size,
744                      int w, int h, int type)
745{
746    int s, i, j;
747    const int dec_count = w == 8 ? 3 : 4;
748    int tmp[32 * 32], tmp2[32];
749    int level, ori;
750    static const int scale[2][2][4][4] = {
751        {
752            { // 9/7 8x8 dec=3
753                { 268, 239, 239, 213 },
754                { 0,   224, 224, 152 },
755                { 0,   135, 135, 110 },
756            },
757            { // 9/7 16x16 or 32x32 dec=4
758                { 344, 310, 310, 280 },
759                { 0,   320, 320, 228 },
760                { 0,   175, 175, 136 },
761                { 0,   129, 129, 102 },
762            }
763        },
764        {
765            { // 5/3 8x8 dec=3
766                { 275, 245, 245, 218 },
767                { 0,   230, 230, 156 },
768                { 0,   138, 138, 113 },
769            },
770            { // 5/3 16x16 or 32x32 dec=4
771                { 352, 317, 317, 286 },
772                { 0,   328, 328, 233 },
773                { 0,   180, 180, 140 },
774                { 0,   132, 132, 105 },
775            }
776        }
777    };
778
779    for (i = 0; i < h; i++) {
780        for (j = 0; j < w; j += 4) {
781            tmp[32 * i + j + 0] = (pix1[j + 0] - pix2[j + 0]) << 4;
782            tmp[32 * i + j + 1] = (pix1[j + 1] - pix2[j + 1]) << 4;
783            tmp[32 * i + j + 2] = (pix1[j + 2] - pix2[j + 2]) << 4;
784            tmp[32 * i + j + 3] = (pix1[j + 3] - pix2[j + 3]) << 4;
785        }
786        pix1 += line_size;
787        pix2 += line_size;
788    }
789
790    ff_spatial_dwt(tmp, tmp2, w, h, 32, type, dec_count);
791
792    s = 0;
793    av_assert1(w == h);
794    for (level = 0; level < dec_count; level++)
795        for (ori = level ? 1 : 0; ori < 4; ori++) {
796            int size   = w >> (dec_count - level);
797            int sx     = (ori & 1) ? size : 0;
798            int stride = 32 << (dec_count - level);
799            int sy     = (ori & 2) ? stride >> 1 : 0;
800
801            for (i = 0; i < size; i++)
802                for (j = 0; j < size; j++) {
803                    int v = tmp[sx + sy + i * stride + j] *
804                            scale[type][dec_count - 3][level][ori];
805                    s += FFABS(v);
806                }
807        }
808    av_assert1(s >= 0);
809    return s >> 9;
810}
811
812static int w53_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
813{
814    return w_c(v, pix1, pix2, line_size, 8, h, 1);
815}
816
817static int w97_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
818{
819    return w_c(v, pix1, pix2, line_size, 8, h, 0);
820}
821
822static int w53_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
823{
824    return w_c(v, pix1, pix2, line_size, 16, h, 1);
825}
826
827static int w97_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
828{
829    return w_c(v, pix1, pix2, line_size, 16, h, 0);
830}
831
832int ff_w53_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
833{
834    return w_c(v, pix1, pix2, line_size, 32, h, 1);
835}
836
837int ff_w97_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
838{
839    return w_c(v, pix1, pix2, line_size, 32, h, 0);
840}
841
842av_cold void ff_dsputil_init_dwt(MECmpContext *c)
843{
844    c->w53[0] = w53_16_c;
845    c->w53[1] = w53_8_c;
846    c->w97[0] = w97_16_c;
847    c->w97[1] = w97_8_c;
848}
849
850av_cold void ff_dwt_init(SnowDWTContext *c)
851{
852    c->vertical_compose97i   = snow_vertical_compose97i;
853    c->horizontal_compose97i = snow_horizontal_compose97i;
854    c->inner_add_yblock      = ff_snow_inner_add_yblock;
855
856#if ARCH_X86 && HAVE_MMX
857    ff_dwt_init_x86(c);
858#endif
859}
860
861
862