1/*
2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2016 Open Broadcast Systems Ltd.
4 * Author        2016 Rostislav Pehlivanov <atomnuker@gmail.com>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23#include "libavutil/attributes.h"
24#include "libavutil/mem.h"
25#include "vc2enc_dwt.h"
26
27/* Since the transforms spit out interleaved coefficients, this function
28 * rearranges the coefficients into the more traditional subdivision,
29 * making it easier to encode and perform another level. */
30static av_always_inline void deinterleave(dwtcoef *linell, ptrdiff_t stride,
31                                          int width, int height, dwtcoef *synthl)
32{
33    int x, y;
34    ptrdiff_t synthw = width << 1;
35    dwtcoef *linehl = linell + width;
36    dwtcoef *linelh = linell + height*stride;
37    dwtcoef *linehh = linelh + width;
38
39    /* Deinterleave the coefficients. */
40    for (y = 0; y < height; y++) {
41        for (x = 0; x < width; x++) {
42            linell[x] = synthl[(x << 1)];
43            linehl[x] = synthl[(x << 1) + 1];
44            linelh[x] = synthl[(x << 1) + synthw];
45            linehh[x] = synthl[(x << 1) + synthw + 1];
46        }
47        synthl += synthw << 1;
48        linell += stride;
49        linelh += stride;
50        linehl += stride;
51        linehh += stride;
52    }
53}
54
55static void vc2_subband_dwt_97(VC2TransformContext *t, dwtcoef *data,
56                               ptrdiff_t stride, int width, int height)
57{
58    int x, y;
59    dwtcoef *datal = data, *synth = t->buffer, *synthl = synth;
60    const ptrdiff_t synth_width  = width  << 1;
61    const ptrdiff_t synth_height = height << 1;
62
63    /*
64     * Shift in one bit that is used for additional precision and copy
65     * the data to the buffer.
66     */
67    for (y = 0; y < synth_height; y++) {
68        for (x = 0; x < synth_width; x++)
69            synthl[x] = datal[x] * 2;
70        synthl += synth_width;
71        datal += stride;
72    }
73
74    /* Horizontal synthesis. */
75    synthl = synth;
76    for (y = 0; y < synth_height; y++) {
77        /* Lifting stage 2. */
78        synthl[1] -= (8*synthl[0] + 9*synthl[2] - synthl[4] + 8) >> 4;
79        for (x = 1; x < width - 2; x++)
80            synthl[2*x + 1] -= (9*synthl[2*x] + 9*synthl[2*x + 2] - synthl[2*x + 4] -
81                                synthl[2 * x - 2] + 8) >> 4;
82        synthl[synth_width - 1] -= (17*synthl[synth_width - 2] -
83                                    synthl[synth_width - 4] + 8) >> 4;
84        synthl[synth_width - 3] -= (8*synthl[synth_width - 2] +
85                                    9*synthl[synth_width - 4] -
86                                    synthl[synth_width - 6] + 8) >> 4;
87        /* Lifting stage 1. */
88        synthl[0] += (synthl[1] + synthl[1] + 2) >> 2;
89        for (x = 1; x < width - 1; x++)
90            synthl[2*x] += (synthl[2*x - 1] + synthl[2*x + 1] + 2) >> 2;
91
92        synthl[synth_width - 2] += (synthl[synth_width - 3] +
93                                    synthl[synth_width - 1] + 2) >> 2;
94        synthl += synth_width;
95    }
96
97    /* Vertical synthesis: Lifting stage 2. */
98    synthl = synth + synth_width;
99    for (x = 0; x < synth_width; x++)
100        synthl[x] -= (8*synthl[x - synth_width] + 9*synthl[x + synth_width] -
101                      synthl[x + 3 * synth_width] + 8) >> 4;
102
103    synthl = synth + (synth_width << 1);
104    for (y = 1; y < height - 2; y++) {
105        for (x = 0; x < synth_width; x++)
106            synthl[x + synth_width] -= (9*synthl[x] +
107                                        9*synthl[x + 2 * synth_width] -
108                                        synthl[x - 2 * synth_width] -
109                                        synthl[x + 4 * synth_width] + 8) >> 4;
110        synthl += synth_width << 1;
111    }
112
113    synthl = synth + (synth_height - 1) * synth_width;
114    for (x = 0; x < synth_width; x++) {
115        synthl[x] -= (17*synthl[x - synth_width] -
116                      synthl[x - 3*synth_width] + 8) >> 4;
117                      synthl[x - 2*synth_width] -= (9*synthl[x - 3*synth_width] +
118                      8*synthl[x - 1*synth_width] - synthl[x - 5*synth_width] + 8) >> 4;
119    }
120
121    /* Vertical synthesis: Lifting stage 1. */
122    synthl = synth;
123    for (x = 0; x < synth_width; x++)
124        synthl[x] += (synthl[x + synth_width] + synthl[x + synth_width] + 2) >> 2;
125
126    synthl = synth + (synth_width << 1);
127    for (y = 1; y < height - 1; y++) {
128        for (x = 0; x < synth_width; x++)
129            synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2;
130        synthl += synth_width << 1;
131    }
132
133    synthl = synth + (synth_height - 2) * synth_width;
134    for (x = 0; x < synth_width; x++)
135        synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2;
136
137    deinterleave(data, stride, width, height, synth);
138}
139
140static void vc2_subband_dwt_53(VC2TransformContext *t, dwtcoef *data,
141                               ptrdiff_t stride, int width, int height)
142{
143    int x, y;
144    dwtcoef *synth = t->buffer, *synthl = synth, *datal = data;
145    const ptrdiff_t synth_width  = width  << 1;
146    const ptrdiff_t synth_height = height << 1;
147
148    /*
149     * Shift in one bit that is used for additional precision and copy
150     * the data to the buffer.
151     */
152    for (y = 0; y < synth_height; y++) {
153        for (x = 0; x < synth_width; x++)
154            synthl[x] = datal[x] << 1;
155        synthl += synth_width;
156        datal  += stride;
157    }
158
159    /* Horizontal synthesis. */
160    synthl = synth;
161    for (y = 0; y < synth_height; y++) {
162        /* Lifting stage 2. */
163        for (x = 0; x < width - 1; x++)
164            synthl[2 * x + 1] -= (synthl[2 * x] + synthl[2 * x + 2] + 1) >> 1;
165
166        synthl[synth_width - 1] -= (2*synthl[synth_width - 2] + 1) >> 1;
167
168        /* Lifting stage 1. */
169        synthl[0] += (2*synthl[1] + 2) >> 2;
170        for (x = 1; x < width - 1; x++)
171            synthl[2 * x] += (synthl[2 * x - 1] + synthl[2 * x + 1] + 2) >> 2;
172
173        synthl[synth_width - 2] += (synthl[synth_width - 3] + synthl[synth_width - 1] + 2) >> 2;
174
175        synthl += synth_width;
176    }
177
178    /* Vertical synthesis: Lifting stage 2. */
179    synthl = synth + synth_width;
180    for (x = 0; x < synth_width; x++)
181        synthl[x] -= (synthl[x - synth_width] + synthl[x + synth_width] + 1) >> 1;
182
183    synthl = synth + (synth_width << 1);
184    for (y = 1; y < height - 1; y++) {
185        for (x = 0; x < synth_width; x++)
186            synthl[x + synth_width] -= (synthl[x] + synthl[x + synth_width * 2] + 1) >> 1;
187        synthl += (synth_width << 1);
188    }
189
190    synthl = synth + (synth_height - 1) * synth_width;
191    for (x = 0; x < synth_width; x++)
192        synthl[x] -= (2*synthl[x - synth_width] + 1) >> 1;
193
194    /* Vertical synthesis: Lifting stage 1. */
195    synthl = synth;
196    for (x = 0; x < synth_width; x++)
197        synthl[x] += (2*synthl[synth_width + x] + 2) >> 2;
198
199    synthl = synth + (synth_width << 1);
200    for (y = 1; y < height - 1; y++) {
201        for (x = 0; x < synth_width; x++)
202            synthl[x] += (synthl[x + synth_width] + synthl[x - synth_width] + 2) >> 2;
203        synthl += (synth_width << 1);
204    }
205
206    synthl = synth + (synth_height - 2)*synth_width;
207    for (x = 0; x < synth_width; x++)
208        synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2;
209
210
211    deinterleave(data, stride, width, height, synth);
212}
213
214static av_always_inline void dwt_haar(VC2TransformContext *t, dwtcoef *data,
215                                      ptrdiff_t stride, int width, int height,
216                                      const int s)
217{
218    int x, y;
219    dwtcoef *synth = t->buffer, *synthl = synth, *datal = data;
220    const ptrdiff_t synth_width  = width  << 1;
221    const ptrdiff_t synth_height = height << 1;
222
223    /* Horizontal synthesis. */
224    for (y = 0; y < synth_height; y++) {
225        for (x = 0; x < synth_width; x += 2) {
226            synthl[y*synth_width + x + 1] = (datal[y*stride + x + 1] << s) -
227                                            (datal[y*stride + x] << s);
228            synthl[y*synth_width + x] = (datal[y*stride + x + 0] << s) +
229                                        ((synthl[y*synth_width + x + 1] + 1) >> 1);
230        }
231    }
232
233    /* Vertical synthesis. */
234    for (x = 0; x < synth_width; x++) {
235        for (y = 0; y < synth_height; y += 2) {
236            synthl[(y + 1)*synth_width + x] = synthl[(y + 1)*synth_width + x] -
237                                              synthl[y*synth_width + x];
238            synthl[y*synth_width + x] = synthl[y*synth_width + x] +
239                                        ((synthl[(y + 1)*synth_width + x] + 1) >> 1);
240        }
241    }
242
243    deinterleave(data, stride, width, height, synth);
244}
245
246static void vc2_subband_dwt_haar(VC2TransformContext *t, dwtcoef *data,
247                                 ptrdiff_t stride, int width, int height)
248{
249    dwt_haar(t, data, stride, width, height, 0);
250}
251
252static void vc2_subband_dwt_haar_shift(VC2TransformContext *t, dwtcoef *data,
253                                       ptrdiff_t stride, int width, int height)
254{
255    dwt_haar(t, data, stride, width, height, 1);
256}
257
258av_cold int ff_vc2enc_init_transforms(VC2TransformContext *s, int p_stride,
259                                      int p_height, int slice_w, int slice_h)
260{
261    s->vc2_subband_dwt[VC2_TRANSFORM_9_7]    = vc2_subband_dwt_97;
262    s->vc2_subband_dwt[VC2_TRANSFORM_5_3]    = vc2_subband_dwt_53;
263    s->vc2_subband_dwt[VC2_TRANSFORM_HAAR]   = vc2_subband_dwt_haar;
264    s->vc2_subband_dwt[VC2_TRANSFORM_HAAR_S] = vc2_subband_dwt_haar_shift;
265
266    /* Pad by the slice size, only matters for non-Haar wavelets */
267    s->buffer = av_calloc((p_stride + slice_w)*(p_height + slice_h), sizeof(dwtcoef));
268    if (!s->buffer)
269        return 1;
270
271    s->padding = (slice_h >> 1)*p_stride + (slice_w >> 1);
272    s->buffer += s->padding;
273
274    return 0;
275}
276
277av_cold void ff_vc2enc_free_transforms(VC2TransformContext *s)
278{
279    if (s->buffer) {
280        av_free(s->buffer - s->padding);
281        s->buffer = NULL;
282    }
283}
284