xref: /third_party/ffmpeg/libavcodec/snow.c (revision cabdff1a)
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/intmath.h"
22#include "libavutil/log.h"
23#include "libavutil/opt.h"
24#include "libavutil/thread.h"
25#include "avcodec.h"
26#include "encode.h"
27#include "me_cmp.h"
28#include "snow_dwt.h"
29#include "internal.h"
30#include "snow.h"
31#include "snowdata.h"
32
33#include "rangecoder.h"
34#include "mathops.h"
35
36
37void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
38                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
39    int y, x;
40    IDWTELEM * dst;
41    for(y=0; y<b_h; y++){
42        //FIXME ugly misuse of obmc_stride
43        const uint8_t *obmc1= obmc + y*obmc_stride;
44        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
45        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
46        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
47        dst = slice_buffer_get_line(sb, src_y + y);
48        for(x=0; x<b_w; x++){
49            int v=   obmc1[x] * block[3][x + y*src_stride]
50                    +obmc2[x] * block[2][x + y*src_stride]
51                    +obmc3[x] * block[1][x + y*src_stride]
52                    +obmc4[x] * block[0][x + y*src_stride];
53
54            v <<= 8 - LOG2_OBMC_MAX;
55            if(FRAC_BITS != 8){
56                v >>= 8 - FRAC_BITS;
57            }
58            if(add){
59                v += dst[x + src_x];
60                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
61                if(v&(~255)) v= ~(v>>31);
62                dst8[x + y*src_stride] = v;
63            }else{
64                dst[x + src_x] -= v;
65            }
66        }
67    }
68}
69
70int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
71{
72    int ret, i;
73    int edges_needed = av_codec_is_encoder(s->avctx->codec);
74
75    frame->width  = s->avctx->width ;
76    frame->height = s->avctx->height;
77    if (edges_needed) {
78        frame->width  += 2 * EDGE_WIDTH;
79        frame->height += 2 * EDGE_WIDTH;
80
81        ret = ff_encode_alloc_frame(s->avctx, frame);
82    } else
83        ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF);
84    if (ret < 0)
85        return ret;
86    if (edges_needed) {
87        for (i = 0; frame->data[i]; i++) {
88            int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
89                            frame->linesize[i] +
90                            (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
91            frame->data[i] += offset;
92        }
93        frame->width  = s->avctx->width;
94        frame->height = s->avctx->height;
95    }
96
97    return 0;
98}
99
100void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
101    int plane_index, level, orientation;
102
103    for(plane_index=0; plane_index<3; plane_index++){
104        for(level=0; level<MAX_DECOMPOSITIONS; level++){
105            for(orientation=level ? 1:0; orientation<4; orientation++){
106                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
107            }
108        }
109    }
110    memset(s->header_state, MID_STATE, sizeof(s->header_state));
111    memset(s->block_state, MID_STATE, sizeof(s->block_state));
112}
113
114int ff_snow_alloc_blocks(SnowContext *s){
115    int w= AV_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
116    int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
117
118    s->b_width = w;
119    s->b_height= h;
120
121    av_free(s->block);
122    s->block = av_calloc(w * h,  sizeof(*s->block) << (s->block_max_depth*2));
123    if (!s->block)
124        return AVERROR(ENOMEM);
125
126    return 0;
127}
128
129static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
130    static const uint8_t weight[64]={
131    8,7,6,5,4,3,2,1,
132    7,7,0,0,0,0,0,1,
133    6,0,6,0,0,0,2,0,
134    5,0,0,5,0,3,0,0,
135    4,0,0,0,4,0,0,0,
136    3,0,0,5,0,3,0,0,
137    2,0,6,0,0,0,2,0,
138    1,7,0,0,0,0,0,1,
139    };
140
141    static const uint8_t brane[256]={
142    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
143    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
144    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
145    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
146    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
147    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
148    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
149    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
150    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
151    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
152    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
153    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
154    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
155    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
156    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
157    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
158    };
159
160    static const uint8_t needs[16]={
161    0,1,0,0,
162    2,4,2,0,
163    0,1,0,0,
164    15
165    };
166
167    int x, y, b, r, l;
168    int16_t tmpIt   [64*(32+HTAPS_MAX)];
169    uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
170    int16_t *tmpI= tmpIt;
171    uint8_t *tmp2= tmp2t[0];
172    const uint8_t *hpel[11];
173    av_assert2(dx<16 && dy<16);
174    r= brane[dx + 16*dy]&15;
175    l= brane[dx + 16*dy]>>4;
176
177    b= needs[l] | needs[r];
178    if(p && !p->diag_mc)
179        b= 15;
180
181    if(b&5){
182        for(y=0; y < b_h+HTAPS_MAX-1; y++){
183            for(x=0; x < b_w; x++){
184                int a_1=src[x + HTAPS_MAX/2-4];
185                int a0= src[x + HTAPS_MAX/2-3];
186                int a1= src[x + HTAPS_MAX/2-2];
187                int a2= src[x + HTAPS_MAX/2-1];
188                int a3= src[x + HTAPS_MAX/2+0];
189                int a4= src[x + HTAPS_MAX/2+1];
190                int a5= src[x + HTAPS_MAX/2+2];
191                int a6= src[x + HTAPS_MAX/2+3];
192                int am=0;
193                if(!p || p->fast_mc){
194                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
195                    tmpI[x]= am;
196                    am= (am+16)>>5;
197                }else{
198                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
199                    tmpI[x]= am;
200                    am= (am+32)>>6;
201                }
202
203                if(am&(~255)) am= ~(am>>31);
204                tmp2[x]= am;
205            }
206            tmpI+= 64;
207            tmp2+= 64;
208            src += stride;
209        }
210        src -= stride*y;
211    }
212    src += HTAPS_MAX/2 - 1;
213    tmp2= tmp2t[1];
214
215    if(b&2){
216        for(y=0; y < b_h; y++){
217            for(x=0; x < b_w+1; x++){
218                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
219                int a0= src[x + (HTAPS_MAX/2-3)*stride];
220                int a1= src[x + (HTAPS_MAX/2-2)*stride];
221                int a2= src[x + (HTAPS_MAX/2-1)*stride];
222                int a3= src[x + (HTAPS_MAX/2+0)*stride];
223                int a4= src[x + (HTAPS_MAX/2+1)*stride];
224                int a5= src[x + (HTAPS_MAX/2+2)*stride];
225                int a6= src[x + (HTAPS_MAX/2+3)*stride];
226                int am=0;
227                if(!p || p->fast_mc)
228                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
229                else
230                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
231
232                if(am&(~255)) am= ~(am>>31);
233                tmp2[x]= am;
234            }
235            src += stride;
236            tmp2+= 64;
237        }
238        src -= stride*y;
239    }
240    src += stride*(HTAPS_MAX/2 - 1);
241    tmp2= tmp2t[2];
242    tmpI= tmpIt;
243    if(b&4){
244        for(y=0; y < b_h; y++){
245            for(x=0; x < b_w; x++){
246                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
247                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
248                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
249                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
250                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
251                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
252                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
253                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
254                int am=0;
255                if(!p || p->fast_mc)
256                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
257                else
258                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
259                if(am&(~255)) am= ~(am>>31);
260                tmp2[x]= am;
261            }
262            tmpI+= 64;
263            tmp2+= 64;
264        }
265    }
266
267    hpel[ 0]= src;
268    hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
269    hpel[ 2]= src + 1;
270
271    hpel[ 4]= tmp2t[1];
272    hpel[ 5]= tmp2t[2];
273    hpel[ 6]= tmp2t[1] + 1;
274
275    hpel[ 8]= src + stride;
276    hpel[ 9]= hpel[1] + 64;
277    hpel[10]= hpel[8] + 1;
278
279#define MC_STRIDE(x) (needs[x] ? 64 : stride)
280
281    if(b==15){
282        int dxy = dx / 8 + dy / 8 * 4;
283        const uint8_t *src1 = hpel[dxy    ];
284        const uint8_t *src2 = hpel[dxy + 1];
285        const uint8_t *src3 = hpel[dxy + 4];
286        const uint8_t *src4 = hpel[dxy + 5];
287        int stride1 = MC_STRIDE(dxy);
288        int stride2 = MC_STRIDE(dxy + 1);
289        int stride3 = MC_STRIDE(dxy + 4);
290        int stride4 = MC_STRIDE(dxy + 5);
291        dx&=7;
292        dy&=7;
293        for(y=0; y < b_h; y++){
294            for(x=0; x < b_w; x++){
295                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
296                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
297            }
298            src1+=stride1;
299            src2+=stride2;
300            src3+=stride3;
301            src4+=stride4;
302            dst +=stride;
303        }
304    }else{
305        const uint8_t *src1= hpel[l];
306        const uint8_t *src2= hpel[r];
307        int stride1 = MC_STRIDE(l);
308        int stride2 = MC_STRIDE(r);
309        int a= weight[((dx&7) + (8*(dy&7)))];
310        int b= 8-a;
311        for(y=0; y < b_h; y++){
312            for(x=0; x < b_w; x++){
313                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
314            }
315            src1+=stride1;
316            src2+=stride2;
317            dst +=stride;
318        }
319    }
320}
321
322void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
323    if(block->type & BLOCK_INTRA){
324        int x, y;
325        const unsigned color  = block->color[plane_index];
326        const unsigned color4 = color*0x01010101;
327        if(b_w==32){
328            for(y=0; y < b_h; y++){
329                *(uint32_t*)&dst[0 + y*stride]= color4;
330                *(uint32_t*)&dst[4 + y*stride]= color4;
331                *(uint32_t*)&dst[8 + y*stride]= color4;
332                *(uint32_t*)&dst[12+ y*stride]= color4;
333                *(uint32_t*)&dst[16+ y*stride]= color4;
334                *(uint32_t*)&dst[20+ y*stride]= color4;
335                *(uint32_t*)&dst[24+ y*stride]= color4;
336                *(uint32_t*)&dst[28+ y*stride]= color4;
337            }
338        }else if(b_w==16){
339            for(y=0; y < b_h; y++){
340                *(uint32_t*)&dst[0 + y*stride]= color4;
341                *(uint32_t*)&dst[4 + y*stride]= color4;
342                *(uint32_t*)&dst[8 + y*stride]= color4;
343                *(uint32_t*)&dst[12+ y*stride]= color4;
344            }
345        }else if(b_w==8){
346            for(y=0; y < b_h; y++){
347                *(uint32_t*)&dst[0 + y*stride]= color4;
348                *(uint32_t*)&dst[4 + y*stride]= color4;
349            }
350        }else if(b_w==4){
351            for(y=0; y < b_h; y++){
352                *(uint32_t*)&dst[0 + y*stride]= color4;
353            }
354        }else{
355            for(y=0; y < b_h; y++){
356                for(x=0; x < b_w; x++){
357                    dst[x + y*stride]= color;
358                }
359            }
360        }
361    }else{
362        uint8_t *src= s->last_picture[block->ref]->data[plane_index];
363        const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
364        int mx= block->mx*scale;
365        int my= block->my*scale;
366        const int dx= mx&15;
367        const int dy= my&15;
368        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
369        sx += (mx>>4) - (HTAPS_MAX/2-1);
370        sy += (my>>4) - (HTAPS_MAX/2-1);
371        src += sx + sy*stride;
372        if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
373           || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
374            s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
375                                     stride, stride,
376                                     b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
377                                     sx, sy, w, h);
378            src= tmp + MB_SIZE;
379        }
380
381        av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
382
383        av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
384        if(    (dx&3) || (dy&3)
385            || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
386            || (b_w&(b_w-1))
387            || b_w == 1
388            || b_h == 1
389            || !s->plane[plane_index].fast_mc )
390            mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
391        else if(b_w==32){
392            int y;
393            for(y=0; y<b_h; y+=16){
394                s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
395                s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
396            }
397        }else if(b_w==b_h)
398            s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
399        else if(b_w==2*b_h){
400            s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
401            s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
402        }else{
403            av_assert2(2*b_w==b_h);
404            s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
405            s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
406        }
407    }
408}
409
410#define mca(dx,dy,b_w)\
411static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
412    av_assert2(h==b_w);\
413    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
414}
415
416mca( 0, 0,16)
417mca( 8, 0,16)
418mca( 0, 8,16)
419mca( 8, 8,16)
420mca( 0, 0,8)
421mca( 8, 0,8)
422mca( 0, 8,8)
423mca( 8, 8,8)
424
425static av_cold void snow_static_init(void)
426{
427    for (int i = 0; i < MAX_REF_FRAMES; i++)
428        for (int j = 0; j < MAX_REF_FRAMES; j++)
429            ff_scale_mv_ref[i][j] = 256 * (i + 1) / (j + 1);
430}
431
432av_cold int ff_snow_common_init(AVCodecContext *avctx){
433    static AVOnce init_static_once = AV_ONCE_INIT;
434    SnowContext *s = avctx->priv_data;
435    int width, height;
436    int i;
437
438    s->avctx= avctx;
439    s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
440    s->spatial_decomposition_count = 1;
441
442    ff_me_cmp_init(&s->mecc, avctx);
443    ff_hpeldsp_init(&s->hdsp, avctx->flags);
444    ff_videodsp_init(&s->vdsp, 8);
445    ff_dwt_init(&s->dwt);
446    ff_h264qpel_init(&s->h264qpel, 8);
447
448#define mcf(dx,dy)\
449    s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
450    s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
451        s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
452    s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
453    s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
454        s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
455
456    mcf( 0, 0)
457    mcf( 4, 0)
458    mcf( 8, 0)
459    mcf(12, 0)
460    mcf( 0, 4)
461    mcf( 4, 4)
462    mcf( 8, 4)
463    mcf(12, 4)
464    mcf( 0, 8)
465    mcf( 4, 8)
466    mcf( 8, 8)
467    mcf(12, 8)
468    mcf( 0,12)
469    mcf( 4,12)
470    mcf( 8,12)
471    mcf(12,12)
472
473#define mcfh(dx,dy)\
474    s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
475    s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
476        mc_block_hpel ## dx ## dy ## 16;\
477    s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
478    s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
479        mc_block_hpel ## dx ## dy ## 8;
480
481    mcfh(0, 0)
482    mcfh(8, 0)
483    mcfh(0, 8)
484    mcfh(8, 8)
485
486//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
487
488    width= s->avctx->width;
489    height= s->avctx->height;
490
491    if (!FF_ALLOCZ_TYPED_ARRAY(s->spatial_idwt_buffer, width * height) ||
492        !FF_ALLOCZ_TYPED_ARRAY(s->spatial_dwt_buffer,  width * height) ||  //FIXME this does not belong here
493        !FF_ALLOCZ_TYPED_ARRAY(s->temp_dwt_buffer,     width)          ||
494        !FF_ALLOCZ_TYPED_ARRAY(s->temp_idwt_buffer,    width)          ||
495        !FF_ALLOCZ_TYPED_ARRAY(s->run_buffer, ((width + 1) >> 1) * ((height + 1) >> 1)))
496        return AVERROR(ENOMEM);
497
498    for(i=0; i<MAX_REF_FRAMES; i++) {
499        s->last_picture[i] = av_frame_alloc();
500        if (!s->last_picture[i])
501            return AVERROR(ENOMEM);
502    }
503
504    s->mconly_picture = av_frame_alloc();
505    s->current_picture = av_frame_alloc();
506    if (!s->mconly_picture || !s->current_picture)
507        return AVERROR(ENOMEM);
508
509    ff_thread_once(&init_static_once, snow_static_init);
510
511    return 0;
512}
513
514int ff_snow_common_init_after_header(AVCodecContext *avctx) {
515    SnowContext *s = avctx->priv_data;
516    int plane_index, level, orientation;
517    int ret, emu_buf_size;
518
519    if(!s->scratchbuf) {
520        if (av_codec_is_decoder(avctx->codec)) {
521            if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
522                                     AV_GET_BUFFER_FLAG_REF)) < 0)
523                return ret;
524        }
525
526        emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
527        if (!FF_ALLOCZ_TYPED_ARRAY(s->scratchbuf,      FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * 7 * MB_SIZE) ||
528            !FF_ALLOCZ_TYPED_ARRAY(s->emu_edge_buffer, emu_buf_size))
529            return AVERROR(ENOMEM);
530    }
531
532    if (av_codec_is_decoder(avctx->codec) &&
533        s->mconly_picture->format != avctx->pix_fmt) {
534        av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
535        return AVERROR_INVALIDDATA;
536    }
537
538    for(plane_index=0; plane_index < s->nb_planes; plane_index++){
539        int w= s->avctx->width;
540        int h= s->avctx->height;
541
542        if(plane_index){
543            w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
544            h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
545        }
546        s->plane[plane_index].width = w;
547        s->plane[plane_index].height= h;
548
549        for(level=s->spatial_decomposition_count-1; level>=0; level--){
550            for(orientation=level ? 1 : 0; orientation<4; orientation++){
551                SubBand *b= &s->plane[plane_index].band[level][orientation];
552
553                b->buf= s->spatial_dwt_buffer;
554                b->level= level;
555                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
556                b->width = (w + !(orientation&1))>>1;
557                b->height= (h + !(orientation>1))>>1;
558
559                b->stride_line = 1 << (s->spatial_decomposition_count - level);
560                b->buf_x_offset = 0;
561                b->buf_y_offset = 0;
562
563                if(orientation&1){
564                    b->buf += (w+1)>>1;
565                    b->buf_x_offset = (w+1)>>1;
566                }
567                if(orientation>1){
568                    b->buf += b->stride>>1;
569                    b->buf_y_offset = b->stride_line >> 1;
570                }
571                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
572
573                if(level)
574                    b->parent= &s->plane[plane_index].band[level-1][orientation];
575                //FIXME avoid this realloc
576                av_freep(&b->x_coeff);
577                b->x_coeff = av_calloc((b->width + 1) * b->height + 1,
578                                       sizeof(*b->x_coeff));
579                if (!b->x_coeff)
580                    return AVERROR(ENOMEM);
581            }
582            w= (w+1)>>1;
583            h= (h+1)>>1;
584        }
585    }
586
587    return 0;
588}
589
590#define USE_HALFPEL_PLANE 0
591
592static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
593    int p,x,y;
594
595    for(p=0; p < s->nb_planes; p++){
596        int is_chroma= !!p;
597        int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width,  s->chroma_h_shift) : s->avctx->width;
598        int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
599        int ls= frame->linesize[p];
600        uint8_t *src= frame->data[p];
601
602        halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
603        halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
604        halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
605        if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
606            av_freep(&halfpel[1][p]);
607            av_freep(&halfpel[2][p]);
608            av_freep(&halfpel[3][p]);
609            return AVERROR(ENOMEM);
610        }
611        halfpel[1][p] += EDGE_WIDTH * (1 + ls);
612        halfpel[2][p] += EDGE_WIDTH * (1 + ls);
613        halfpel[3][p] += EDGE_WIDTH * (1 + ls);
614
615        halfpel[0][p]= src;
616        for(y=0; y<h; y++){
617            for(x=0; x<w; x++){
618                int i= y*ls + x;
619
620                halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
621            }
622        }
623        for(y=0; y<h; y++){
624            for(x=0; x<w; x++){
625                int i= y*ls + x;
626
627                halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
628            }
629        }
630        src= halfpel[1][p];
631        for(y=0; y<h; y++){
632            for(x=0; x<w; x++){
633                int i= y*ls + x;
634
635                halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
636            }
637        }
638
639//FIXME border!
640    }
641    return 0;
642}
643
644void ff_snow_release_buffer(AVCodecContext *avctx)
645{
646    SnowContext *s = avctx->priv_data;
647    int i;
648
649    if(s->last_picture[s->max_ref_frames-1]->data[0]){
650        av_frame_unref(s->last_picture[s->max_ref_frames-1]);
651        for(i=0; i<9; i++)
652            if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
653                av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
654                s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
655            }
656    }
657}
658
659int ff_snow_frame_start(SnowContext *s){
660   AVFrame *tmp;
661   int i, ret;
662
663    ff_snow_release_buffer(s->avctx);
664
665    tmp= s->last_picture[s->max_ref_frames-1];
666    for(i=s->max_ref_frames-1; i>0; i--)
667        s->last_picture[i] = s->last_picture[i-1];
668    memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
669    if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
670        if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
671            return ret;
672    }
673    s->last_picture[0] = s->current_picture;
674    s->current_picture = tmp;
675
676    if(s->keyframe){
677        s->ref_frames= 0;
678    }else{
679        int i;
680        for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
681            if(i && s->last_picture[i-1]->key_frame)
682                break;
683        s->ref_frames= i;
684        if(s->ref_frames==0){
685            av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
686            return AVERROR_INVALIDDATA;
687        }
688    }
689    if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
690        return ret;
691
692    s->current_picture->key_frame= s->keyframe;
693
694    return 0;
695}
696
697av_cold void ff_snow_common_end(SnowContext *s)
698{
699    int plane_index, level, orientation, i;
700
701    av_freep(&s->spatial_dwt_buffer);
702    av_freep(&s->temp_dwt_buffer);
703    av_freep(&s->spatial_idwt_buffer);
704    av_freep(&s->temp_idwt_buffer);
705    av_freep(&s->run_buffer);
706
707    s->m.me.temp= NULL;
708    av_freep(&s->m.me.scratchpad);
709    av_freep(&s->m.me.map);
710    av_freep(&s->m.me.score_map);
711    av_freep(&s->m.sc.obmc_scratchpad);
712
713    av_freep(&s->block);
714    av_freep(&s->scratchbuf);
715    av_freep(&s->emu_edge_buffer);
716
717    for(i=0; i<MAX_REF_FRAMES; i++){
718        av_freep(&s->ref_mvs[i]);
719        av_freep(&s->ref_scores[i]);
720        if(s->last_picture[i] && s->last_picture[i]->data[0]) {
721            av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
722        }
723        av_frame_free(&s->last_picture[i]);
724    }
725
726    for(plane_index=0; plane_index < MAX_PLANES; plane_index++){
727        for(level=MAX_DECOMPOSITIONS-1; level>=0; level--){
728            for(orientation=level ? 1 : 0; orientation<4; orientation++){
729                SubBand *b= &s->plane[plane_index].band[level][orientation];
730
731                av_freep(&b->x_coeff);
732            }
733        }
734    }
735    av_frame_free(&s->mconly_picture);
736    av_frame_free(&s->current_picture);
737}
738