1/* 2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "libavutil/intmath.h" 22#include "libavutil/libm.h" 23#include "libavutil/log.h" 24#include "libavutil/opt.h" 25#include "libavutil/pixdesc.h" 26#include "avcodec.h" 27#include "codec_internal.h" 28#include "encode.h" 29#include "packet_internal.h" 30#include "snow_dwt.h" 31#include "snow.h" 32 33#include "rangecoder.h" 34#include "mathops.h" 35 36#include "mpegvideo.h" 37#include "h263enc.h" 38 39static av_cold int encode_init(AVCodecContext *avctx) 40{ 41 SnowContext *s = avctx->priv_data; 42 int plane_index, ret; 43 int i; 44 45 if(s->pred == DWT_97 46 && (avctx->flags & AV_CODEC_FLAG_QSCALE) 47 && avctx->global_quality == 0){ 48 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n"); 49 return AVERROR(EINVAL); 50 } 51 52 s->spatial_decomposition_type= s->pred; //FIXME add decorrelator type r transform_type 53 54 s->mv_scale = (avctx->flags & AV_CODEC_FLAG_QPEL) ? 2 : 4; 55 s->block_max_depth= (avctx->flags & AV_CODEC_FLAG_4MV ) ? 1 : 0; 56 57 for(plane_index=0; plane_index<3; plane_index++){ 58 s->plane[plane_index].diag_mc= 1; 59 s->plane[plane_index].htaps= 6; 60 s->plane[plane_index].hcoeff[0]= 40; 61 s->plane[plane_index].hcoeff[1]= -10; 62 s->plane[plane_index].hcoeff[2]= 2; 63 s->plane[plane_index].fast_mc= 1; 64 } 65 66 if ((ret = ff_snow_common_init(avctx)) < 0) { 67 return ret; 68 } 69 ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); 70 71 ff_snow_alloc_blocks(s); 72 73 s->version=0; 74 75 s->m.avctx = avctx; 76 s->m.bit_rate= avctx->bit_rate; 77 s->m.lmin = avctx->mb_lmin; 78 s->m.lmax = avctx->mb_lmax; 79 s->m.mb_num = (avctx->width * avctx->height + 255) / 256; // For ratecontrol 80 81 s->m.me.temp = 82 s->m.me.scratchpad = av_calloc(avctx->width + 64, 2*16*2*sizeof(uint8_t)); 83 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); 84 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); 85 s->m.sc.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); 86 if (!s->m.me.scratchpad || !s->m.me.map || !s->m.me.score_map || !s->m.sc.obmc_scratchpad) 87 return AVERROR(ENOMEM); 88 89 ff_h263_encode_init(&s->m); //mv_penalty 90 91 s->max_ref_frames = av_clip(avctx->refs, 1, MAX_REF_FRAMES); 92 93 if(avctx->flags&AV_CODEC_FLAG_PASS1){ 94 if(!avctx->stats_out) 95 avctx->stats_out = av_mallocz(256); 96 97 if (!avctx->stats_out) 98 return AVERROR(ENOMEM); 99 } 100 if((avctx->flags&AV_CODEC_FLAG_PASS2) || !(avctx->flags&AV_CODEC_FLAG_QSCALE)){ 101 ret = ff_rate_control_init(&s->m); 102 if(ret < 0) 103 return ret; 104 } 105 s->pass1_rc= !(avctx->flags & (AV_CODEC_FLAG_QSCALE|AV_CODEC_FLAG_PASS2)); 106 107 switch(avctx->pix_fmt){ 108 case AV_PIX_FMT_YUV444P: 109// case AV_PIX_FMT_YUV422P: 110 case AV_PIX_FMT_YUV420P: 111// case AV_PIX_FMT_YUV411P: 112 case AV_PIX_FMT_YUV410P: 113 s->nb_planes = 3; 114 s->colorspace_type= 0; 115 break; 116 case AV_PIX_FMT_GRAY8: 117 s->nb_planes = 1; 118 s->colorspace_type = 1; 119 break; 120/* case AV_PIX_FMT_RGB32: 121 s->colorspace= 1; 122 break;*/ 123 } 124 125 ret = av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, 126 &s->chroma_v_shift); 127 if (ret) 128 return ret; 129 130 ret = ff_set_cmp(&s->mecc, s->mecc.me_cmp, s->avctx->me_cmp); 131 ret |= ff_set_cmp(&s->mecc, s->mecc.me_sub_cmp, s->avctx->me_sub_cmp); 132 if (ret < 0) 133 return AVERROR(EINVAL); 134 135 s->input_picture = av_frame_alloc(); 136 if (!s->input_picture) 137 return AVERROR(ENOMEM); 138 139 if ((ret = ff_snow_get_buffer(s, s->input_picture)) < 0) 140 return ret; 141 142 if(s->motion_est == FF_ME_ITER){ 143 int size= s->b_width * s->b_height << 2*s->block_max_depth; 144 for(i=0; i<s->max_ref_frames; i++){ 145 s->ref_mvs[i] = av_calloc(size, sizeof(*s->ref_mvs[i])); 146 s->ref_scores[i] = av_calloc(size, sizeof(*s->ref_scores[i])); 147 if (!s->ref_mvs[i] || !s->ref_scores[i]) 148 return AVERROR(ENOMEM); 149 } 150 } 151 152 return 0; 153} 154 155//near copy & paste from dsputil, FIXME 156static int pix_sum(uint8_t * pix, int line_size, int w, int h) 157{ 158 int s, i, j; 159 160 s = 0; 161 for (i = 0; i < h; i++) { 162 for (j = 0; j < w; j++) { 163 s += pix[0]; 164 pix ++; 165 } 166 pix += line_size - w; 167 } 168 return s; 169} 170 171//near copy & paste from dsputil, FIXME 172static int pix_norm1(uint8_t * pix, int line_size, int w) 173{ 174 int s, i, j; 175 const uint32_t *sq = ff_square_tab + 256; 176 177 s = 0; 178 for (i = 0; i < w; i++) { 179 for (j = 0; j < w; j ++) { 180 s += sq[pix[0]]; 181 pix ++; 182 } 183 pix += line_size - w; 184 } 185 return s; 186} 187 188static inline int get_penalty_factor(int lambda, int lambda2, int type){ 189 switch(type&0xFF){ 190 default: 191 case FF_CMP_SAD: 192 return lambda>>FF_LAMBDA_SHIFT; 193 case FF_CMP_DCT: 194 return (3*lambda)>>(FF_LAMBDA_SHIFT+1); 195 case FF_CMP_W53: 196 return (4*lambda)>>(FF_LAMBDA_SHIFT); 197 case FF_CMP_W97: 198 return (2*lambda)>>(FF_LAMBDA_SHIFT); 199 case FF_CMP_SATD: 200 case FF_CMP_DCT264: 201 return (2*lambda)>>FF_LAMBDA_SHIFT; 202 case FF_CMP_RD: 203 case FF_CMP_PSNR: 204 case FF_CMP_SSE: 205 case FF_CMP_NSSE: 206 return lambda2>>FF_LAMBDA_SHIFT; 207 case FF_CMP_BIT: 208 return 1; 209 } 210} 211 212//FIXME copy&paste 213#define P_LEFT P[1] 214#define P_TOP P[2] 215#define P_TOPRIGHT P[3] 216#define P_MEDIAN P[4] 217#define P_MV1 P[9] 218#define FLAG_QPEL 1 //must be 1 219 220static int encode_q_branch(SnowContext *s, int level, int x, int y){ 221 uint8_t p_buffer[1024]; 222 uint8_t i_buffer[1024]; 223 uint8_t p_state[sizeof(s->block_state)]; 224 uint8_t i_state[sizeof(s->block_state)]; 225 RangeCoder pc, ic; 226 uint8_t *pbbak= s->c.bytestream; 227 uint8_t *pbbak_start= s->c.bytestream_start; 228 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits; 229 const int w= s->b_width << s->block_max_depth; 230 const int h= s->b_height << s->block_max_depth; 231 const int rem_depth= s->block_max_depth - level; 232 const int index= (x + y*w) << rem_depth; 233 const int block_w= 1<<(LOG2_MB_SIZE - level); 234 int trx= (x+1)<<rem_depth; 235 int try= (y+1)<<rem_depth; 236 const BlockNode *left = x ? &s->block[index-1] : &null_block; 237 const BlockNode *top = y ? &s->block[index-w] : &null_block; 238 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block; 239 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; 240 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; 241 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt 242 int pl = left->color[0]; 243 int pcb= left->color[1]; 244 int pcr= left->color[2]; 245 int pmx, pmy; 246 int mx=0, my=0; 247 int l,cr,cb; 248 const int stride= s->current_picture->linesize[0]; 249 const int uvstride= s->current_picture->linesize[1]; 250 uint8_t *current_data[3]= { s->input_picture->data[0] + (x + y* stride)*block_w, 251 s->input_picture->data[1] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift), 252 s->input_picture->data[2] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift)}; 253 int P[10][2]; 254 int16_t last_mv[3][2]; 255 int qpel= !!(s->avctx->flags & AV_CODEC_FLAG_QPEL); //unused 256 const int shift= 1+qpel; 257 MotionEstContext *c= &s->m.me; 258 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); 259 int mx_context= av_log2(2*FFABS(left->mx - top->mx)); 260 int my_context= av_log2(2*FFABS(left->my - top->my)); 261 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; 262 int ref, best_ref, ref_score, ref_mx, ref_my; 263 int range = MAX_MV >> (1 + qpel); 264 265 av_assert0(sizeof(s->block_state) >= 256); 266 if(s->keyframe){ 267 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); 268 return 0; 269 } 270 271// clip predictors / edge ? 272 273 P_LEFT[0]= left->mx; 274 P_LEFT[1]= left->my; 275 P_TOP [0]= top->mx; 276 P_TOP [1]= top->my; 277 P_TOPRIGHT[0]= tr->mx; 278 P_TOPRIGHT[1]= tr->my; 279 280 last_mv[0][0]= s->block[index].mx; 281 last_mv[0][1]= s->block[index].my; 282 last_mv[1][0]= right->mx; 283 last_mv[1][1]= right->my; 284 last_mv[2][0]= bottom->mx; 285 last_mv[2][1]= bottom->my; 286 287 s->m.mb_stride=2; 288 s->m.mb_x= 289 s->m.mb_y= 0; 290 c->skip= 0; 291 292 av_assert1(c-> stride == stride); 293 av_assert1(c->uvstride == uvstride); 294 295 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); 296 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); 297 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); 298 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_DMV; 299 300 c->xmin = - x*block_w - 16+3; 301 c->ymin = - y*block_w - 16+3; 302 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3; 303 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3; 304 305 c->xmin = FFMAX(c->xmin, -range); 306 c->xmax = FFMIN(c->xmax, range); 307 c->ymin = FFMAX(c->ymin, -range); 308 c->ymax = FFMIN(c->ymax, range); 309 310 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); 311 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); 312 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); 313 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); 314 if(P_TOPRIGHT[0] < (c->xmin * (1<<shift))) P_TOPRIGHT[0]= (c->xmin * (1<<shift)); 315 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip 316 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); 317 318 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 319 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 320 321 if (!y) { 322 c->pred_x= P_LEFT[0]; 323 c->pred_y= P_LEFT[1]; 324 } else { 325 c->pred_x = P_MEDIAN[0]; 326 c->pred_y = P_MEDIAN[1]; 327 } 328 329 score= INT_MAX; 330 best_ref= 0; 331 for(ref=0; ref<s->ref_frames; ref++){ 332 init_ref(c, current_data, s->last_picture[ref]->data, NULL, block_w*x, block_w*y, 0); 333 334 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv, 335 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); 336 337 av_assert2(ref_mx >= c->xmin); 338 av_assert2(ref_mx <= c->xmax); 339 av_assert2(ref_my >= c->ymin); 340 av_assert2(ref_my <= c->ymax); 341 342 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); 343 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); 344 ref_score+= 2*av_log2(2*ref)*c->penalty_factor; 345 if(s->ref_mvs[ref]){ 346 s->ref_mvs[ref][index][0]= ref_mx; 347 s->ref_mvs[ref][index][1]= ref_my; 348 s->ref_scores[ref][index]= ref_score; 349 } 350 if(score > ref_score){ 351 score= ref_score; 352 best_ref= ref; 353 mx= ref_mx; 354 my= ref_my; 355 } 356 } 357 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2 358 359 // subpel search 360 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start); 361 pc= s->c; 362 pc.bytestream_start= 363 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo 364 memcpy(p_state, s->block_state, sizeof(s->block_state)); 365 366 if(level!=s->block_max_depth) 367 put_rac(&pc, &p_state[4 + s_context], 1); 368 put_rac(&pc, &p_state[1 + left->type + top->type], 0); 369 if(s->ref_frames > 1) 370 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); 371 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); 372 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1); 373 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1); 374 p_len= pc.bytestream - pc.bytestream_start; 375 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT; 376 377 block_s= block_w*block_w; 378 sum = pix_sum(current_data[0], stride, block_w, block_w); 379 l= (sum + block_s/2)/block_s; 380 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; 381 382 if (s->nb_planes > 2) { 383 block_s= block_w*block_w>>(s->chroma_h_shift + s->chroma_v_shift); 384 sum = pix_sum(current_data[1], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift); 385 cb= (sum + block_s/2)/block_s; 386 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; 387 sum = pix_sum(current_data[2], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift); 388 cr= (sum + block_s/2)/block_s; 389 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; 390 }else 391 cb = cr = 0; 392 393 ic= s->c; 394 ic.bytestream_start= 395 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo 396 memcpy(i_state, s->block_state, sizeof(s->block_state)); 397 if(level!=s->block_max_depth) 398 put_rac(&ic, &i_state[4 + s_context], 1); 399 put_rac(&ic, &i_state[1 + left->type + top->type], 1); 400 put_symbol(&ic, &i_state[32], l-pl , 1); 401 if (s->nb_planes > 2) { 402 put_symbol(&ic, &i_state[64], cb-pcb, 1); 403 put_symbol(&ic, &i_state[96], cr-pcr, 1); 404 } 405 i_len= ic.bytestream - ic.bytestream_start; 406 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT; 407 408 av_assert1(iscore < 255*255*256 + s->lambda2*10); 409 av_assert1(iscore >= 0); 410 av_assert1(l>=0 && l<=255); 411 av_assert1(pl>=0 && pl<=255); 412 413 if(level==0){ 414 int varc= iscore >> 8; 415 int vard= score >> 8; 416 if (vard <= 64 || vard < varc) 417 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); 418 else 419 c->scene_change_score+= s->m.qscale; 420 } 421 422 if(level!=s->block_max_depth){ 423 put_rac(&s->c, &s->block_state[4 + s_context], 0); 424 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); 425 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); 426 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); 427 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); 428 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead 429 430 if(score2 < score && score2 < iscore) 431 return score2; 432 } 433 434 if(iscore < score){ 435 pred_mv(s, &pmx, &pmy, 0, left, top, tr); 436 memcpy(pbbak, i_buffer, i_len); 437 s->c= ic; 438 s->c.bytestream_start= pbbak_start; 439 s->c.bytestream= pbbak + i_len; 440 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); 441 memcpy(s->block_state, i_state, sizeof(s->block_state)); 442 return iscore; 443 }else{ 444 memcpy(pbbak, p_buffer, p_len); 445 s->c= pc; 446 s->c.bytestream_start= pbbak_start; 447 s->c.bytestream= pbbak + p_len; 448 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); 449 memcpy(s->block_state, p_state, sizeof(s->block_state)); 450 return score; 451 } 452} 453 454static void encode_q_branch2(SnowContext *s, int level, int x, int y){ 455 const int w= s->b_width << s->block_max_depth; 456 const int rem_depth= s->block_max_depth - level; 457 const int index= (x + y*w) << rem_depth; 458 int trx= (x+1)<<rem_depth; 459 BlockNode *b= &s->block[index]; 460 const BlockNode *left = x ? &s->block[index-1] : &null_block; 461 const BlockNode *top = y ? &s->block[index-w] : &null_block; 462 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; 463 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt 464 int pl = left->color[0]; 465 int pcb= left->color[1]; 466 int pcr= left->color[2]; 467 int pmx, pmy; 468 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); 469 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref; 470 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref; 471 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; 472 473 if(s->keyframe){ 474 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); 475 return; 476 } 477 478 if(level!=s->block_max_depth){ 479 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){ 480 put_rac(&s->c, &s->block_state[4 + s_context], 1); 481 }else{ 482 put_rac(&s->c, &s->block_state[4 + s_context], 0); 483 encode_q_branch2(s, level+1, 2*x+0, 2*y+0); 484 encode_q_branch2(s, level+1, 2*x+1, 2*y+0); 485 encode_q_branch2(s, level+1, 2*x+0, 2*y+1); 486 encode_q_branch2(s, level+1, 2*x+1, 2*y+1); 487 return; 488 } 489 } 490 if(b->type & BLOCK_INTRA){ 491 pred_mv(s, &pmx, &pmy, 0, left, top, tr); 492 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); 493 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); 494 if (s->nb_planes > 2) { 495 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); 496 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); 497 } 498 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA); 499 }else{ 500 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); 501 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); 502 if(s->ref_frames > 1) 503 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); 504 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); 505 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); 506 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); 507 } 508} 509 510static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ 511 int i, x2, y2; 512 Plane *p= &s->plane[plane_index]; 513 const int block_size = MB_SIZE >> s->block_max_depth; 514 const int block_w = plane_index ? block_size>>s->chroma_h_shift : block_size; 515 const int block_h = plane_index ? block_size>>s->chroma_v_shift : block_size; 516 const uint8_t *obmc = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth]; 517 const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size; 518 const int ref_stride= s->current_picture->linesize[plane_index]; 519 uint8_t *src= s-> input_picture->data[plane_index]; 520 IDWTELEM *dst= (IDWTELEM*)s->m.sc.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned 521 const int b_stride = s->b_width << s->block_max_depth; 522 const int w= p->width; 523 const int h= p->height; 524 int index= mb_x + mb_y*b_stride; 525 BlockNode *b= &s->block[index]; 526 BlockNode backup= *b; 527 int ab=0; 528 int aa=0; 529 530 av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc stuff above 531 532 b->type|= BLOCK_INTRA; 533 b->color[plane_index]= 0; 534 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM)); 535 536 for(i=0; i<4; i++){ 537 int mb_x2= mb_x + (i &1) - 1; 538 int mb_y2= mb_y + (i>>1) - 1; 539 int x= block_w*mb_x2 + block_w/2; 540 int y= block_h*mb_y2 + block_h/2; 541 542 add_yblock(s, 0, NULL, dst + (i&1)*block_w + (i>>1)*obmc_stride*block_h, NULL, obmc, 543 x, y, block_w, block_h, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); 544 545 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_h); y2++){ 546 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ 547 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_h*mb_y - block_h/2))*obmc_stride; 548 int obmc_v= obmc[index]; 549 int d; 550 if(y<0) obmc_v += obmc[index + block_h*obmc_stride]; 551 if(x<0) obmc_v += obmc[index + block_w]; 552 if(y+block_h>h) obmc_v += obmc[index - block_h*obmc_stride]; 553 if(x+block_w>w) obmc_v += obmc[index - block_w]; 554 //FIXME precalculate this or simplify it somehow else 555 556 d = -dst[index] + (1<<(FRAC_BITS-1)); 557 dst[index] = d; 558 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; 559 aa += obmc_v * obmc_v; //FIXME precalculate this 560 } 561 } 562 } 563 *b= backup; 564 565 return av_clip_uint8( ROUNDED_DIV(ab<<LOG2_OBMC_MAX, aa) ); //FIXME we should not need clipping 566} 567 568static inline int get_block_bits(SnowContext *s, int x, int y, int w){ 569 const int b_stride = s->b_width << s->block_max_depth; 570 const int b_height = s->b_height<< s->block_max_depth; 571 int index= x + y*b_stride; 572 const BlockNode *b = &s->block[index]; 573 const BlockNode *left = x ? &s->block[index-1] : &null_block; 574 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block; 575 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; 576 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl; 577 int dmx, dmy; 578// int mx_context= av_log2(2*FFABS(left->mx - top->mx)); 579// int my_context= av_log2(2*FFABS(left->my - top->my)); 580 581 if(x<0 || x>=b_stride || y>=b_height) 582 return 0; 583/* 5841 0 0 58501X 1-2 1 586001XX 3-6 2-3 5870001XXX 7-14 4-7 58800001XXXX 15-30 8-15 589*/ 590//FIXME try accurate rate 591//FIXME intra and inter predictors if surrounding blocks are not the same type 592 if(b->type & BLOCK_INTRA){ 593 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0])) 594 + av_log2(2*FFABS(left->color[1] - b->color[1])) 595 + av_log2(2*FFABS(left->color[2] - b->color[2]))); 596 }else{ 597 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); 598 dmx-= b->mx; 599 dmy-= b->my; 600 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda 601 + av_log2(2*FFABS(dmy)) 602 + av_log2(2*b->ref)); 603 } 604} 605 606static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, uint8_t (*obmc_edged)[MB_SIZE * 2]){ 607 Plane *p= &s->plane[plane_index]; 608 const int block_size = MB_SIZE >> s->block_max_depth; 609 const int block_w = plane_index ? block_size>>s->chroma_h_shift : block_size; 610 const int block_h = plane_index ? block_size>>s->chroma_v_shift : block_size; 611 const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size; 612 const int ref_stride= s->current_picture->linesize[plane_index]; 613 uint8_t *dst= s->current_picture->data[plane_index]; 614 uint8_t *src= s-> input_picture->data[plane_index]; 615 IDWTELEM *pred= (IDWTELEM*)s->m.sc.obmc_scratchpad + plane_index*block_size*block_size*4; 616 uint8_t *cur = s->scratchbuf; 617 uint8_t *tmp = s->emu_edge_buffer; 618 const int b_stride = s->b_width << s->block_max_depth; 619 const int b_height = s->b_height<< s->block_max_depth; 620 const int w= p->width; 621 const int h= p->height; 622 int distortion; 623 int rate= 0; 624 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); 625 int sx= block_w*mb_x - block_w/2; 626 int sy= block_h*mb_y - block_h/2; 627 int x0= FFMAX(0,-sx); 628 int y0= FFMAX(0,-sy); 629 int x1= FFMIN(block_w*2, w-sx); 630 int y1= FFMIN(block_h*2, h-sy); 631 int i,x,y; 632 633 av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below chckinhg only block_w 634 635 ff_snow_pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_h*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); 636 637 for(y=y0; y<y1; y++){ 638 const uint8_t *obmc1= obmc_edged[y]; 639 const IDWTELEM *pred1 = pred + y*obmc_stride; 640 uint8_t *cur1 = cur + y*ref_stride; 641 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; 642 for(x=x0; x<x1; x++){ 643#if FRAC_BITS >= LOG2_OBMC_MAX 644 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); 645#else 646 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS); 647#endif 648 v = (v + pred1[x]) >> FRAC_BITS; 649 if(v&(~255)) v= ~(v>>31); 650 dst1[x] = v; 651 } 652 } 653 654 /* copy the regions where obmc[] = (uint8_t)256 */ 655 if(LOG2_OBMC_MAX == 8 656 && (mb_x == 0 || mb_x == b_stride-1) 657 && (mb_y == 0 || mb_y == b_height-1)){ 658 if(mb_x == 0) 659 x1 = block_w; 660 else 661 x0 = block_w; 662 if(mb_y == 0) 663 y1 = block_h; 664 else 665 y0 = block_h; 666 for(y=y0; y<y1; y++) 667 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); 668 } 669 670 if(block_w==16){ 671 /* FIXME rearrange dsputil to fit 32x32 cmp functions */ 672 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ 673 /* FIXME cmps overlap but do not cover the wavelet's whole support. 674 * So improving the score of one block is not strictly guaranteed 675 * to improve the score of the whole frame, thus iterative motion 676 * estimation does not always converge. */ 677 if(s->avctx->me_cmp == FF_CMP_W97) 678 distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); 679 else if(s->avctx->me_cmp == FF_CMP_W53) 680 distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); 681 else{ 682 distortion = 0; 683 for(i=0; i<4; i++){ 684 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; 685 distortion += s->mecc.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); 686 } 687 } 688 }else{ 689 av_assert2(block_w==8); 690 distortion = s->mecc.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2); 691 } 692 693 if(plane_index==0){ 694 for(i=0; i<4; i++){ 695/* ..RRr 696 * .RXx. 697 * rxx.. 698 */ 699 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1); 700 } 701 if(mb_x == b_stride-2) 702 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1); 703 } 704 return distortion + rate*penalty_factor; 705} 706 707static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ 708 int i, y2; 709 Plane *p= &s->plane[plane_index]; 710 const int block_size = MB_SIZE >> s->block_max_depth; 711 const int block_w = plane_index ? block_size>>s->chroma_h_shift : block_size; 712 const int block_h = plane_index ? block_size>>s->chroma_v_shift : block_size; 713 const uint8_t *obmc = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth]; 714 const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size; 715 const int ref_stride= s->current_picture->linesize[plane_index]; 716 uint8_t *dst= s->current_picture->data[plane_index]; 717 uint8_t *src= s-> input_picture->data[plane_index]; 718 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst 719 // const has only been removed from zero_dst to suppress a warning 720 static IDWTELEM zero_dst[4096]; //FIXME 721 const int b_stride = s->b_width << s->block_max_depth; 722 const int w= p->width; 723 const int h= p->height; 724 int distortion= 0; 725 int rate= 0; 726 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); 727 728 av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below 729 730 for(i=0; i<9; i++){ 731 int mb_x2= mb_x + (i%3) - 1; 732 int mb_y2= mb_y + (i/3) - 1; 733 int x= block_w*mb_x2 + block_w/2; 734 int y= block_h*mb_y2 + block_h/2; 735 736 add_yblock(s, 0, NULL, zero_dst, dst, obmc, 737 x, y, block_w, block_h, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); 738 739 //FIXME find a cleaner/simpler way to skip the outside stuff 740 for(y2= y; y2<0; y2++) 741 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); 742 for(y2= h; y2<y+block_h; y2++) 743 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); 744 if(x<0){ 745 for(y2= y; y2<y+block_h; y2++) 746 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); 747 } 748 if(x+block_w > w){ 749 for(y2= y; y2<y+block_h; y2++) 750 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); 751 } 752 753 av_assert1(block_w== 8 || block_w==16); 754 distortion += s->mecc.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_h); 755 } 756 757 if(plane_index==0){ 758 BlockNode *b= &s->block[mb_x+mb_y*b_stride]; 759 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1); 760 761/* ..RRRr 762 * .RXXx. 763 * .RXXx. 764 * rxxx. 765 */ 766 if(merged) 767 rate = get_block_bits(s, mb_x, mb_y, 2); 768 for(i=merged?4:0; i<9; i++){ 769 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}}; 770 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1); 771 } 772 } 773 return distortion + rate*penalty_factor; 774} 775 776static int encode_subband_c0run(SnowContext *s, SubBand *b, const IDWTELEM *src, const IDWTELEM *parent, int stride, int orientation){ 777 const int w= b->width; 778 const int h= b->height; 779 int x, y; 780 781 if(1){ 782 int run=0; 783 int *runs = s->run_buffer; 784 int run_index=0; 785 int max_index; 786 787 for(y=0; y<h; y++){ 788 for(x=0; x<w; x++){ 789 int v, p=0; 790 int /*ll=0, */l=0, lt=0, t=0, rt=0; 791 v= src[x + y*stride]; 792 793 if(y){ 794 t= src[x + (y-1)*stride]; 795 if(x){ 796 lt= src[x - 1 + (y-1)*stride]; 797 } 798 if(x + 1 < w){ 799 rt= src[x + 1 + (y-1)*stride]; 800 } 801 } 802 if(x){ 803 l= src[x - 1 + y*stride]; 804 /*if(x > 1){ 805 if(orientation==1) ll= src[y + (x-2)*stride]; 806 else ll= src[x - 2 + y*stride]; 807 }*/ 808 } 809 if(parent){ 810 int px= x>>1; 811 int py= y>>1; 812 if(px<b->parent->width && py<b->parent->height) 813 p= parent[px + py*2*stride]; 814 } 815 if(!(/*ll|*/l|lt|t|rt|p)){ 816 if(v){ 817 runs[run_index++]= run; 818 run=0; 819 }else{ 820 run++; 821 } 822 } 823 } 824 } 825 max_index= run_index; 826 runs[run_index++]= run; 827 run_index=0; 828 run= runs[run_index++]; 829 830 put_symbol2(&s->c, b->state[30], max_index, 0); 831 if(run_index <= max_index) 832 put_symbol2(&s->c, b->state[1], run, 3); 833 834 for(y=0; y<h; y++){ 835 if(s->c.bytestream_end - s->c.bytestream < w*40){ 836 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); 837 return AVERROR(ENOMEM); 838 } 839 for(x=0; x<w; x++){ 840 int v, p=0; 841 int /*ll=0, */l=0, lt=0, t=0, rt=0; 842 v= src[x + y*stride]; 843 844 if(y){ 845 t= src[x + (y-1)*stride]; 846 if(x){ 847 lt= src[x - 1 + (y-1)*stride]; 848 } 849 if(x + 1 < w){ 850 rt= src[x + 1 + (y-1)*stride]; 851 } 852 } 853 if(x){ 854 l= src[x - 1 + y*stride]; 855 /*if(x > 1){ 856 if(orientation==1) ll= src[y + (x-2)*stride]; 857 else ll= src[x - 2 + y*stride]; 858 }*/ 859 } 860 if(parent){ 861 int px= x>>1; 862 int py= y>>1; 863 if(px<b->parent->width && py<b->parent->height) 864 p= parent[px + py*2*stride]; 865 } 866 if(/*ll|*/l|lt|t|rt|p){ 867 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); 868 869 put_rac(&s->c, &b->state[0][context], !!v); 870 }else{ 871 if(!run){ 872 run= runs[run_index++]; 873 874 if(run_index <= max_index) 875 put_symbol2(&s->c, b->state[1], run, 3); 876 av_assert2(v); 877 }else{ 878 run--; 879 av_assert2(!v); 880 } 881 } 882 if(v){ 883 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); 884 int l2= 2*FFABS(l) + (l<0); 885 int t2= 2*FFABS(t) + (t<0); 886 887 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); 888 put_rac(&s->c, &b->state[0][16 + 1 + 3 + ff_quant3bA[l2&0xFF] + 3*ff_quant3bA[t2&0xFF]], v<0); 889 } 890 } 891 } 892 } 893 return 0; 894} 895 896static int encode_subband(SnowContext *s, SubBand *b, const IDWTELEM *src, const IDWTELEM *parent, int stride, int orientation){ 897// encode_subband_qtree(s, b, src, parent, stride, orientation); 898// encode_subband_z0run(s, b, src, parent, stride, orientation); 899 return encode_subband_c0run(s, b, src, parent, stride, orientation); 900// encode_subband_dzr(s, b, src, parent, stride, orientation); 901} 902 903static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd){ 904 const int b_stride= s->b_width << s->block_max_depth; 905 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; 906 BlockNode backup= *block; 907 unsigned value; 908 int rd, index; 909 910 av_assert2(mb_x>=0 && mb_y>=0); 911 av_assert2(mb_x<b_stride); 912 913 if(intra){ 914 block->color[0] = p[0]; 915 block->color[1] = p[1]; 916 block->color[2] = p[2]; 917 block->type |= BLOCK_INTRA; 918 }else{ 919 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); 920 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12); 921 if(s->me_cache[index] == value) 922 return 0; 923 s->me_cache[index]= value; 924 925 block->mx= p[0]; 926 block->my= p[1]; 927 block->type &= ~BLOCK_INTRA; 928 } 929 930 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged) + s->intra_penalty * !!intra; 931 932//FIXME chroma 933 if(rd < *best_rd){ 934 *best_rd= rd; 935 return 1; 936 }else{ 937 *block= backup; 938 return 0; 939 } 940} 941 942/* special case for int[2] args we discard afterwards, 943 * fixes compilation problem with gcc 2.95 */ 944static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd){ 945 int p[2] = {p0, p1}; 946 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); 947} 948 949static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ 950 const int b_stride= s->b_width << s->block_max_depth; 951 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; 952 BlockNode backup[4]; 953 unsigned value; 954 int rd, index; 955 956 /* We don't initialize backup[] during variable declaration, because 957 * that fails to compile on MSVC: "cannot convert from 'BlockNode' to 958 * 'int16_t'". */ 959 backup[0] = block[0]; 960 backup[1] = block[1]; 961 backup[2] = block[b_stride]; 962 backup[3] = block[b_stride + 1]; 963 964 av_assert2(mb_x>=0 && mb_y>=0); 965 av_assert2(mb_x<b_stride); 966 av_assert2(((mb_x|mb_y)&1) == 0); 967 968 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1); 969 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12); 970 if(s->me_cache[index] == value) 971 return 0; 972 s->me_cache[index]= value; 973 974 block->mx= p0; 975 block->my= p1; 976 block->ref= ref; 977 block->type &= ~BLOCK_INTRA; 978 block[1]= block[b_stride]= block[b_stride+1]= *block; 979 980 rd= get_4block_rd(s, mb_x, mb_y, 0); 981 982//FIXME chroma 983 if(rd < *best_rd){ 984 *best_rd= rd; 985 return 1; 986 }else{ 987 block[0]= backup[0]; 988 block[1]= backup[1]; 989 block[b_stride]= backup[2]; 990 block[b_stride+1]= backup[3]; 991 return 0; 992 } 993} 994 995static void iterative_me(SnowContext *s){ 996 int pass, mb_x, mb_y; 997 const int b_width = s->b_width << s->block_max_depth; 998 const int b_height= s->b_height << s->block_max_depth; 999 const int b_stride= b_width; 1000 int color[3]; 1001 1002 { 1003 RangeCoder r = s->c; 1004 uint8_t state[sizeof(s->block_state)]; 1005 memcpy(state, s->block_state, sizeof(s->block_state)); 1006 for(mb_y= 0; mb_y<s->b_height; mb_y++) 1007 for(mb_x= 0; mb_x<s->b_width; mb_x++) 1008 encode_q_branch(s, 0, mb_x, mb_y); 1009 s->c = r; 1010 memcpy(s->block_state, state, sizeof(s->block_state)); 1011 } 1012 1013 for(pass=0; pass<25; pass++){ 1014 int change= 0; 1015 1016 for(mb_y= 0; mb_y<b_height; mb_y++){ 1017 for(mb_x= 0; mb_x<b_width; mb_x++){ 1018 int dia_change, i, j, ref; 1019 int best_rd= INT_MAX, ref_rd; 1020 BlockNode backup, ref_b; 1021 const int index= mb_x + mb_y * b_stride; 1022 BlockNode *block= &s->block[index]; 1023 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL; 1024 BlockNode *lb = mb_x ? &s->block[index -1] : NULL; 1025 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL; 1026 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL; 1027 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL; 1028 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL; 1029 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL; 1030 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL; 1031 const int b_w= (MB_SIZE >> s->block_max_depth); 1032 uint8_t obmc_edged[MB_SIZE * 2][MB_SIZE * 2]; 1033 1034 if(pass && (block->type & BLOCK_OPT)) 1035 continue; 1036 block->type |= BLOCK_OPT; 1037 1038 backup= *block; 1039 1040 if(!s->me_cache_generation) 1041 memset(s->me_cache, 0, sizeof(s->me_cache)); 1042 s->me_cache_generation += 1<<22; 1043 1044 //FIXME precalculate 1045 { 1046 int x, y; 1047 for (y = 0; y < b_w * 2; y++) 1048 memcpy(obmc_edged[y], ff_obmc_tab[s->block_max_depth] + y * b_w * 2, b_w * 2); 1049 if(mb_x==0) 1050 for(y=0; y<b_w*2; y++) 1051 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w); 1052 if(mb_x==b_stride-1) 1053 for(y=0; y<b_w*2; y++) 1054 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w); 1055 if(mb_y==0){ 1056 for(x=0; x<b_w*2; x++) 1057 obmc_edged[0][x] += obmc_edged[b_w-1][x]; 1058 for(y=1; y<b_w; y++) 1059 memcpy(obmc_edged[y], obmc_edged[0], b_w*2); 1060 } 1061 if(mb_y==b_height-1){ 1062 for(x=0; x<b_w*2; x++) 1063 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x]; 1064 for(y=b_w; y<b_w*2-1; y++) 1065 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2); 1066 } 1067 } 1068 1069 //skip stuff outside the picture 1070 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){ 1071 uint8_t *src= s-> input_picture->data[0]; 1072 uint8_t *dst= s->current_picture->data[0]; 1073 const int stride= s->current_picture->linesize[0]; 1074 const int block_w= MB_SIZE >> s->block_max_depth; 1075 const int block_h= MB_SIZE >> s->block_max_depth; 1076 const int sx= block_w*mb_x - block_w/2; 1077 const int sy= block_h*mb_y - block_h/2; 1078 const int w= s->plane[0].width; 1079 const int h= s->plane[0].height; 1080 int y; 1081 1082 for(y=sy; y<0; y++) 1083 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); 1084 for(y=h; y<sy+block_h*2; y++) 1085 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); 1086 if(sx<0){ 1087 for(y=sy; y<sy+block_h*2; y++) 1088 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx); 1089 } 1090 if(sx+block_w*2 > w){ 1091 for(y=sy; y<sy+block_h*2; y++) 1092 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w); 1093 } 1094 } 1095 1096 // intra(black) = neighbors' contribution to the current block 1097 for(i=0; i < s->nb_planes; i++) 1098 color[i]= get_dc(s, mb_x, mb_y, i); 1099 1100 // get previous score (cannot be cached due to OBMC) 1101 if(pass > 0 && (block->type&BLOCK_INTRA)){ 1102 int color0[3]= {block->color[0], block->color[1], block->color[2]}; 1103 check_block(s, mb_x, mb_y, color0, 1, obmc_edged, &best_rd); 1104 }else 1105 check_block_inter(s, mb_x, mb_y, block->mx, block->my, obmc_edged, &best_rd); 1106 1107 ref_b= *block; 1108 ref_rd= best_rd; 1109 for(ref=0; ref < s->ref_frames; ref++){ 1110 int16_t (*mvr)[2]= &s->ref_mvs[ref][index]; 1111 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold 1112 continue; 1113 block->ref= ref; 1114 best_rd= INT_MAX; 1115 1116 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], obmc_edged, &best_rd); 1117 check_block_inter(s, mb_x, mb_y, 0, 0, obmc_edged, &best_rd); 1118 if(tb) 1119 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], obmc_edged, &best_rd); 1120 if(lb) 1121 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], obmc_edged, &best_rd); 1122 if(rb) 1123 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], obmc_edged, &best_rd); 1124 if(bb) 1125 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], obmc_edged, &best_rd); 1126 1127 /* fullpel ME */ 1128 //FIXME avoid subpel interpolation / round to nearest integer 1129 do{ 1130 int newx = block->mx; 1131 int newy = block->my; 1132 int dia_size = s->iterative_dia_size ? s->iterative_dia_size : FFMAX(s->avctx->dia_size, 1); 1133 dia_change=0; 1134 for(i=0; i < dia_size; i++){ 1135 for(j=0; j<i; j++){ 1136 dia_change |= check_block_inter(s, mb_x, mb_y, newx+4*(i-j), newy+(4*j), obmc_edged, &best_rd); 1137 dia_change |= check_block_inter(s, mb_x, mb_y, newx-4*(i-j), newy-(4*j), obmc_edged, &best_rd); 1138 dia_change |= check_block_inter(s, mb_x, mb_y, newx-(4*j), newy+4*(i-j), obmc_edged, &best_rd); 1139 dia_change |= check_block_inter(s, mb_x, mb_y, newx+(4*j), newy-4*(i-j), obmc_edged, &best_rd); 1140 } 1141 } 1142 }while(dia_change); 1143 /* subpel ME */ 1144 do{ 1145 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; 1146 dia_change=0; 1147 for(i=0; i<8; i++) 1148 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], obmc_edged, &best_rd); 1149 }while(dia_change); 1150 //FIXME or try the standard 2 pass qpel or similar 1151 1152 mvr[0][0]= block->mx; 1153 mvr[0][1]= block->my; 1154 if(ref_rd > best_rd){ 1155 ref_rd= best_rd; 1156 ref_b= *block; 1157 } 1158 } 1159 best_rd= ref_rd; 1160 *block= ref_b; 1161 check_block(s, mb_x, mb_y, color, 1, obmc_edged, &best_rd); 1162 //FIXME RD style color selection 1163 if(!same_block(block, &backup)){ 1164 if(tb ) tb ->type &= ~BLOCK_OPT; 1165 if(lb ) lb ->type &= ~BLOCK_OPT; 1166 if(rb ) rb ->type &= ~BLOCK_OPT; 1167 if(bb ) bb ->type &= ~BLOCK_OPT; 1168 if(tlb) tlb->type &= ~BLOCK_OPT; 1169 if(trb) trb->type &= ~BLOCK_OPT; 1170 if(blb) blb->type &= ~BLOCK_OPT; 1171 if(brb) brb->type &= ~BLOCK_OPT; 1172 change ++; 1173 } 1174 } 1175 } 1176 av_log(s->avctx, AV_LOG_DEBUG, "pass:%d changed:%d\n", pass, change); 1177 if(!change) 1178 break; 1179 } 1180 1181 if(s->block_max_depth == 1){ 1182 int change= 0; 1183 for(mb_y= 0; mb_y<b_height; mb_y+=2){ 1184 for(mb_x= 0; mb_x<b_width; mb_x+=2){ 1185 int i; 1186 int best_rd, init_rd; 1187 const int index= mb_x + mb_y * b_stride; 1188 BlockNode *b[4]; 1189 1190 b[0]= &s->block[index]; 1191 b[1]= b[0]+1; 1192 b[2]= b[0]+b_stride; 1193 b[3]= b[2]+1; 1194 if(same_block(b[0], b[1]) && 1195 same_block(b[0], b[2]) && 1196 same_block(b[0], b[3])) 1197 continue; 1198 1199 if(!s->me_cache_generation) 1200 memset(s->me_cache, 0, sizeof(s->me_cache)); 1201 s->me_cache_generation += 1<<22; 1202 1203 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0); 1204 1205 //FIXME more multiref search? 1206 check_4block_inter(s, mb_x, mb_y, 1207 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2, 1208 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd); 1209 1210 for(i=0; i<4; i++) 1211 if(!(b[i]->type&BLOCK_INTRA)) 1212 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd); 1213 1214 if(init_rd != best_rd) 1215 change++; 1216 } 1217 } 1218 av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); 1219 } 1220} 1221 1222static void encode_blocks(SnowContext *s, int search){ 1223 int x, y; 1224 int w= s->b_width; 1225 int h= s->b_height; 1226 1227 if(s->motion_est == FF_ME_ITER && !s->keyframe && search) 1228 iterative_me(s); 1229 1230 for(y=0; y<h; y++){ 1231 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit 1232 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); 1233 return; 1234 } 1235 for(x=0; x<w; x++){ 1236 if(s->motion_est == FF_ME_ITER || !search) 1237 encode_q_branch2(s, 0, x, y); 1238 else 1239 encode_q_branch (s, 0, x, y); 1240 } 1241 } 1242} 1243 1244static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ 1245 const int w= b->width; 1246 const int h= b->height; 1247 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 1248 const int qmul= ff_qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); 1249 int x,y, thres1, thres2; 1250 1251 if(s->qlog == LOSSLESS_QLOG){ 1252 for(y=0; y<h; y++) 1253 for(x=0; x<w; x++) 1254 dst[x + y*stride]= src[x + y*stride]; 1255 return; 1256 } 1257 1258 bias= bias ? 0 : (3*qmul)>>3; 1259 thres1= ((qmul - bias)>>QEXPSHIFT) - 1; 1260 thres2= 2*thres1; 1261 1262 if(!bias){ 1263 for(y=0; y<h; y++){ 1264 for(x=0; x<w; x++){ 1265 int i= src[x + y*stride]; 1266 1267 if((unsigned)(i+thres1) > thres2){ 1268 if(i>=0){ 1269 i<<= QEXPSHIFT; 1270 i/= qmul; //FIXME optimize 1271 dst[x + y*stride]= i; 1272 }else{ 1273 i= -i; 1274 i<<= QEXPSHIFT; 1275 i/= qmul; //FIXME optimize 1276 dst[x + y*stride]= -i; 1277 } 1278 }else 1279 dst[x + y*stride]= 0; 1280 } 1281 } 1282 }else{ 1283 for(y=0; y<h; y++){ 1284 for(x=0; x<w; x++){ 1285 int i= src[x + y*stride]; 1286 1287 if((unsigned)(i+thres1) > thres2){ 1288 if(i>=0){ 1289 i<<= QEXPSHIFT; 1290 i= (i + bias) / qmul; //FIXME optimize 1291 dst[x + y*stride]= i; 1292 }else{ 1293 i= -i; 1294 i<<= QEXPSHIFT; 1295 i= (i + bias) / qmul; //FIXME optimize 1296 dst[x + y*stride]= -i; 1297 } 1298 }else 1299 dst[x + y*stride]= 0; 1300 } 1301 } 1302 } 1303} 1304 1305static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ 1306 const int w= b->width; 1307 const int h= b->height; 1308 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 1309 const int qmul= ff_qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 1310 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; 1311 int x,y; 1312 1313 if(s->qlog == LOSSLESS_QLOG) return; 1314 1315 for(y=0; y<h; y++){ 1316 for(x=0; x<w; x++){ 1317 int i= src[x + y*stride]; 1318 if(i<0){ 1319 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias 1320 }else if(i>0){ 1321 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); 1322 } 1323 } 1324 } 1325} 1326 1327static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ 1328 const int w= b->width; 1329 const int h= b->height; 1330 int x,y; 1331 1332 for(y=h-1; y>=0; y--){ 1333 for(x=w-1; x>=0; x--){ 1334 int i= x + y*stride; 1335 1336 if(x){ 1337 if(use_median){ 1338 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); 1339 else src[i] -= src[i - 1]; 1340 }else{ 1341 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); 1342 else src[i] -= src[i - 1]; 1343 } 1344 }else{ 1345 if(y) src[i] -= src[i - stride]; 1346 } 1347 } 1348 } 1349} 1350 1351static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ 1352 const int w= b->width; 1353 const int h= b->height; 1354 int x,y; 1355 1356 for(y=0; y<h; y++){ 1357 for(x=0; x<w; x++){ 1358 int i= x + y*stride; 1359 1360 if(x){ 1361 if(use_median){ 1362 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); 1363 else src[i] += src[i - 1]; 1364 }else{ 1365 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); 1366 else src[i] += src[i - 1]; 1367 } 1368 }else{ 1369 if(y) src[i] += src[i - stride]; 1370 } 1371 } 1372 } 1373} 1374 1375static void encode_qlogs(SnowContext *s){ 1376 int plane_index, level, orientation; 1377 1378 for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){ 1379 for(level=0; level<s->spatial_decomposition_count; level++){ 1380 for(orientation=level ? 1:0; orientation<4; orientation++){ 1381 if(orientation==2) continue; 1382 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1); 1383 } 1384 } 1385 } 1386} 1387 1388static void encode_header(SnowContext *s){ 1389 int plane_index, i; 1390 uint8_t kstate[32]; 1391 1392 memset(kstate, MID_STATE, sizeof(kstate)); 1393 1394 put_rac(&s->c, kstate, s->keyframe); 1395 if(s->keyframe || s->always_reset){ 1396 ff_snow_reset_contexts(s); 1397 s->last_spatial_decomposition_type= 1398 s->last_qlog= 1399 s->last_qbias= 1400 s->last_mv_scale= 1401 s->last_block_max_depth= 0; 1402 for(plane_index=0; plane_index<2; plane_index++){ 1403 Plane *p= &s->plane[plane_index]; 1404 p->last_htaps=0; 1405 p->last_diag_mc=0; 1406 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff)); 1407 } 1408 } 1409 if(s->keyframe){ 1410 put_symbol(&s->c, s->header_state, s->version, 0); 1411 put_rac(&s->c, s->header_state, s->always_reset); 1412 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0); 1413 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0); 1414 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); 1415 put_symbol(&s->c, s->header_state, s->colorspace_type, 0); 1416 if (s->nb_planes > 2) { 1417 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0); 1418 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); 1419 } 1420 put_rac(&s->c, s->header_state, s->spatial_scalability); 1421// put_rac(&s->c, s->header_state, s->rate_scalability); 1422 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0); 1423 1424 encode_qlogs(s); 1425 } 1426 1427 if(!s->keyframe){ 1428 int update_mc=0; 1429 for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){ 1430 Plane *p= &s->plane[plane_index]; 1431 update_mc |= p->last_htaps != p->htaps; 1432 update_mc |= p->last_diag_mc != p->diag_mc; 1433 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); 1434 } 1435 put_rac(&s->c, s->header_state, update_mc); 1436 if(update_mc){ 1437 for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){ 1438 Plane *p= &s->plane[plane_index]; 1439 put_rac(&s->c, s->header_state, p->diag_mc); 1440 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0); 1441 for(i= p->htaps/2; i; i--) 1442 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0); 1443 } 1444 } 1445 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ 1446 put_rac(&s->c, s->header_state, 1); 1447 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); 1448 encode_qlogs(s); 1449 }else 1450 put_rac(&s->c, s->header_state, 0); 1451 } 1452 1453 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); 1454 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); 1455 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); 1456 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); 1457 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); 1458 1459} 1460 1461static void update_last_header_values(SnowContext *s){ 1462 int plane_index; 1463 1464 if(!s->keyframe){ 1465 for(plane_index=0; plane_index<2; plane_index++){ 1466 Plane *p= &s->plane[plane_index]; 1467 p->last_diag_mc= p->diag_mc; 1468 p->last_htaps = p->htaps; 1469 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); 1470 } 1471 } 1472 1473 s->last_spatial_decomposition_type = s->spatial_decomposition_type; 1474 s->last_qlog = s->qlog; 1475 s->last_qbias = s->qbias; 1476 s->last_mv_scale = s->mv_scale; 1477 s->last_block_max_depth = s->block_max_depth; 1478 s->last_spatial_decomposition_count = s->spatial_decomposition_count; 1479} 1480 1481static int qscale2qlog(int qscale){ 1482 return lrint(QROOT*log2(qscale / (float)FF_QP2LAMBDA)) 1483 + 61*QROOT/8; ///< 64 > 60 1484} 1485 1486static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) 1487{ 1488 /* Estimate the frame's complexity as a sum of weighted dwt coefficients. 1489 * FIXME we know exact mv bits at this point, 1490 * but ratecontrol isn't set up to include them. */ 1491 uint32_t coef_sum= 0; 1492 int level, orientation, delta_qlog; 1493 1494 for(level=0; level<s->spatial_decomposition_count; level++){ 1495 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 1496 SubBand *b= &s->plane[0].band[level][orientation]; 1497 IDWTELEM *buf= b->ibuf; 1498 const int w= b->width; 1499 const int h= b->height; 1500 const int stride= b->stride; 1501 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); 1502 const int qmul= ff_qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 1503 const int qdiv= (1<<16)/qmul; 1504 int x, y; 1505 //FIXME this is ugly 1506 for(y=0; y<h; y++) 1507 for(x=0; x<w; x++) 1508 buf[x+y*stride]= b->buf[x+y*stride]; 1509 if(orientation==0) 1510 decorrelate(s, b, buf, stride, 1, 0); 1511 for(y=0; y<h; y++) 1512 for(x=0; x<w; x++) 1513 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; 1514 } 1515 } 1516 1517 /* ugly, ratecontrol just takes a sqrt again */ 1518 av_assert0(coef_sum < INT_MAX); 1519 coef_sum = (uint64_t)coef_sum * coef_sum >> 16; 1520 1521 if(pict->pict_type == AV_PICTURE_TYPE_I){ 1522 s->m.current_picture.mb_var_sum= coef_sum; 1523 s->m.current_picture.mc_mb_var_sum= 0; 1524 }else{ 1525 s->m.current_picture.mc_mb_var_sum= coef_sum; 1526 s->m.current_picture.mb_var_sum= 0; 1527 } 1528 1529 pict->quality= ff_rate_estimate_qscale(&s->m, 1); 1530 if (pict->quality < 0) 1531 return INT_MIN; 1532 s->lambda= pict->quality * 3/2; 1533 delta_qlog= qscale2qlog(pict->quality) - s->qlog; 1534 s->qlog+= delta_qlog; 1535 return delta_qlog; 1536} 1537 1538static void calculate_visual_weight(SnowContext *s, Plane *p){ 1539 int width = p->width; 1540 int height= p->height; 1541 int level, orientation, x, y; 1542 1543 for(level=0; level<s->spatial_decomposition_count; level++){ 1544 int64_t error=0; 1545 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 1546 SubBand *b= &p->band[level][orientation]; 1547 IDWTELEM *ibuf= b->ibuf; 1548 1549 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height); 1550 ibuf[b->width/2 + b->height/2*b->stride]= 256*16; 1551 ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); 1552 for(y=0; y<height; y++){ 1553 for(x=0; x<width; x++){ 1554 int64_t d= s->spatial_idwt_buffer[x + y*width]*16; 1555 error += d*d; 1556 } 1557 } 1558 if (orientation == 2) 1559 error /= 2; 1560 b->qlog= (int)(QROOT * log2(352256.0/sqrt(error)) + 0.5); 1561 if (orientation != 1) 1562 error = 0; 1563 } 1564 p->band[level][1].qlog = p->band[level][2].qlog; 1565 } 1566} 1567 1568static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, 1569 const AVFrame *pict, int *got_packet) 1570{ 1571 SnowContext *s = avctx->priv_data; 1572 RangeCoder * const c= &s->c; 1573 AVFrame *pic; 1574 const int width= s->avctx->width; 1575 const int height= s->avctx->height; 1576 int level, orientation, plane_index, i, y, ret; 1577 uint8_t rc_header_bak[sizeof(s->header_state)]; 1578 uint8_t rc_block_bak[sizeof(s->block_state)]; 1579 1580 if ((ret = ff_alloc_packet(avctx, pkt, s->b_width*s->b_height*MB_SIZE*MB_SIZE*3 + AV_INPUT_BUFFER_MIN_SIZE)) < 0) 1581 return ret; 1582 1583 ff_init_range_encoder(c, pkt->data, pkt->size); 1584 ff_build_rac_states(c, (1LL<<32)/20, 256-8); 1585 1586 for(i=0; i < s->nb_planes; i++){ 1587 int hshift= i ? s->chroma_h_shift : 0; 1588 int vshift= i ? s->chroma_v_shift : 0; 1589 for(y=0; y<AV_CEIL_RSHIFT(height, vshift); y++) 1590 memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]], 1591 &pict->data[i][y * pict->linesize[i]], 1592 AV_CEIL_RSHIFT(width, hshift)); 1593 s->mpvencdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i], 1594 AV_CEIL_RSHIFT(width, hshift), AV_CEIL_RSHIFT(height, vshift), 1595 EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, 1596 EDGE_TOP | EDGE_BOTTOM); 1597 1598 } 1599 emms_c(); 1600 pic = s->input_picture; 1601 pic->pict_type = pict->pict_type; 1602 pic->quality = pict->quality; 1603 1604 s->m.picture_number= avctx->frame_number; 1605 if(avctx->flags&AV_CODEC_FLAG_PASS2){ 1606 s->m.pict_type = pic->pict_type = s->m.rc_context.entry[avctx->frame_number].new_pict_type; 1607 s->keyframe = pic->pict_type == AV_PICTURE_TYPE_I; 1608 if(!(avctx->flags&AV_CODEC_FLAG_QSCALE)) { 1609 pic->quality = ff_rate_estimate_qscale(&s->m, 0); 1610 if (pic->quality < 0) 1611 return -1; 1612 } 1613 }else{ 1614 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; 1615 s->m.pict_type = pic->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; 1616 } 1617 1618 if(s->pass1_rc && avctx->frame_number == 0) 1619 pic->quality = 2*FF_QP2LAMBDA; 1620 if (pic->quality) { 1621 s->qlog = qscale2qlog(pic->quality); 1622 s->lambda = pic->quality * 3/2; 1623 } 1624 if (s->qlog < 0 || (!pic->quality && (avctx->flags & AV_CODEC_FLAG_QSCALE))) { 1625 s->qlog= LOSSLESS_QLOG; 1626 s->lambda = 0; 1627 }//else keep previous frame's qlog until after motion estimation 1628 1629 if (s->current_picture->data[0]) { 1630 int w = s->avctx->width; 1631 int h = s->avctx->height; 1632 1633 s->mpvencdsp.draw_edges(s->current_picture->data[0], 1634 s->current_picture->linesize[0], w , h , 1635 EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM); 1636 if (s->current_picture->data[2]) { 1637 s->mpvencdsp.draw_edges(s->current_picture->data[1], 1638 s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift, 1639 EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); 1640 s->mpvencdsp.draw_edges(s->current_picture->data[2], 1641 s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift, 1642 EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); 1643 } 1644 emms_c(); 1645 } 1646 1647 ff_snow_frame_start(s); 1648 1649 s->m.current_picture_ptr= &s->m.current_picture; 1650 s->m.current_picture.f = s->current_picture; 1651 s->m.current_picture.f->pts = pict->pts; 1652 if(pic->pict_type == AV_PICTURE_TYPE_P){ 1653 int block_width = (width +15)>>4; 1654 int block_height= (height+15)>>4; 1655 int stride= s->current_picture->linesize[0]; 1656 1657 av_assert0(s->current_picture->data[0]); 1658 av_assert0(s->last_picture[0]->data[0]); 1659 1660 s->m.avctx= s->avctx; 1661 s->m. last_picture.f = s->last_picture[0]; 1662 s->m. new_picture = s->input_picture; 1663 s->m. last_picture_ptr= &s->m. last_picture; 1664 s->m.linesize = stride; 1665 s->m.uvlinesize= s->current_picture->linesize[1]; 1666 s->m.width = width; 1667 s->m.height= height; 1668 s->m.mb_width = block_width; 1669 s->m.mb_height= block_height; 1670 s->m.mb_stride= s->m.mb_width+1; 1671 s->m.b8_stride= 2*s->m.mb_width+1; 1672 s->m.f_code=1; 1673 s->m.pict_type = pic->pict_type; 1674 s->m.motion_est= s->motion_est; 1675 s->m.me.scene_change_score=0; 1676 s->m.me.dia_size = avctx->dia_size; 1677 s->m.quarter_sample= (s->avctx->flags & AV_CODEC_FLAG_QPEL)!=0; 1678 s->m.out_format= FMT_H263; 1679 s->m.unrestricted_mv= 1; 1680 1681 s->m.lambda = s->lambda; 1682 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); 1683 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; 1684 1685 s->m.mecc= s->mecc; //move 1686 s->m.qdsp= s->qdsp; //move 1687 s->m.hdsp = s->hdsp; 1688 ff_init_me(&s->m); 1689 s->hdsp = s->m.hdsp; 1690 s->mecc= s->m.mecc; 1691 } 1692 1693 if(s->pass1_rc){ 1694 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state)); 1695 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state)); 1696 } 1697 1698redo_frame: 1699 1700 s->spatial_decomposition_count= 5; 1701 1702 while( !(width >>(s->chroma_h_shift + s->spatial_decomposition_count)) 1703 || !(height>>(s->chroma_v_shift + s->spatial_decomposition_count))) 1704 s->spatial_decomposition_count--; 1705 1706 if (s->spatial_decomposition_count <= 0) { 1707 av_log(avctx, AV_LOG_ERROR, "Resolution too low\n"); 1708 return AVERROR(EINVAL); 1709 } 1710 1711 s->m.pict_type = pic->pict_type; 1712 s->qbias = pic->pict_type == AV_PICTURE_TYPE_P ? 2 : 0; 1713 1714 ff_snow_common_init_after_header(avctx); 1715 1716 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ 1717 for(plane_index=0; plane_index < s->nb_planes; plane_index++){ 1718 calculate_visual_weight(s, &s->plane[plane_index]); 1719 } 1720 } 1721 1722 encode_header(s); 1723 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start); 1724 encode_blocks(s, 1); 1725 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits; 1726 1727 for(plane_index=0; plane_index < s->nb_planes; plane_index++){ 1728 Plane *p= &s->plane[plane_index]; 1729 int w= p->width; 1730 int h= p->height; 1731 int x, y; 1732// int bits= put_bits_count(&s->c.pb); 1733 1734 if (!s->memc_only) { 1735 //FIXME optimize 1736 if(pict->data[plane_index]) //FIXME gray hack 1737 for(y=0; y<h; y++){ 1738 for(x=0; x<w; x++){ 1739 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; 1740 } 1741 } 1742 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0); 1743 1744 if( plane_index==0 1745 && pic->pict_type == AV_PICTURE_TYPE_P 1746 && !(avctx->flags&AV_CODEC_FLAG_PASS2) 1747 && s->m.me.scene_change_score > s->scenechange_threshold){ 1748 ff_init_range_encoder(c, pkt->data, pkt->size); 1749 ff_build_rac_states(c, (1LL<<32)/20, 256-8); 1750 pic->pict_type= AV_PICTURE_TYPE_I; 1751 s->keyframe=1; 1752 s->current_picture->key_frame=1; 1753 goto redo_frame; 1754 } 1755 1756 if(s->qlog == LOSSLESS_QLOG){ 1757 for(y=0; y<h; y++){ 1758 for(x=0; x<w; x++){ 1759 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; 1760 } 1761 } 1762 }else{ 1763 for(y=0; y<h; y++){ 1764 for(x=0; x<w; x++){ 1765 s->spatial_dwt_buffer[y*w + x]= s->spatial_idwt_buffer[y*w + x] * (1 << ENCODER_EXTRA_BITS); 1766 } 1767 } 1768 } 1769 1770 ff_spatial_dwt(s->spatial_dwt_buffer, s->temp_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); 1771 1772 if(s->pass1_rc && plane_index==0){ 1773 int delta_qlog = ratecontrol_1pass(s, pic); 1774 if (delta_qlog <= INT_MIN) 1775 return -1; 1776 if(delta_qlog){ 1777 //reordering qlog in the bitstream would eliminate this reset 1778 ff_init_range_encoder(c, pkt->data, pkt->size); 1779 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); 1780 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); 1781 encode_header(s); 1782 encode_blocks(s, 0); 1783 } 1784 } 1785 1786 for(level=0; level<s->spatial_decomposition_count; level++){ 1787 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 1788 SubBand *b= &p->band[level][orientation]; 1789 1790 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); 1791 if(orientation==0) 1792 decorrelate(s, b, b->ibuf, b->stride, pic->pict_type == AV_PICTURE_TYPE_P, 0); 1793 if (!s->no_bitstream) 1794 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); 1795 av_assert0(b->parent==NULL || b->parent->stride == b->stride*2); 1796 if(orientation==0) 1797 correlate(s, b, b->ibuf, b->stride, 1, 0); 1798 } 1799 } 1800 1801 for(level=0; level<s->spatial_decomposition_count; level++){ 1802 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 1803 SubBand *b= &p->band[level][orientation]; 1804 1805 dequantize(s, b, b->ibuf, b->stride); 1806 } 1807 } 1808 1809 ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); 1810 if(s->qlog == LOSSLESS_QLOG){ 1811 for(y=0; y<h; y++){ 1812 for(x=0; x<w; x++){ 1813 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; 1814 } 1815 } 1816 } 1817 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); 1818 }else{ 1819 //ME/MC only 1820 if(pic->pict_type == AV_PICTURE_TYPE_I){ 1821 for(y=0; y<h; y++){ 1822 for(x=0; x<w; x++){ 1823 s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x]= 1824 pict->data[plane_index][y*pict->linesize[plane_index] + x]; 1825 } 1826 } 1827 }else{ 1828 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h); 1829 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); 1830 } 1831 } 1832 if(s->avctx->flags&AV_CODEC_FLAG_PSNR){ 1833 int64_t error= 0; 1834 1835 if(pict->data[plane_index]) //FIXME gray hack 1836 for(y=0; y<h; y++){ 1837 for(x=0; x<w; x++){ 1838 int d= s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; 1839 error += d*d; 1840 } 1841 } 1842 s->avctx->error[plane_index] += error; 1843 s->encoding_error[plane_index] = error; 1844 } 1845 1846 } 1847 emms_c(); 1848 1849 update_last_header_values(s); 1850 1851 ff_snow_release_buffer(avctx); 1852 1853 s->current_picture->coded_picture_number = avctx->frame_number; 1854 s->current_picture->pict_type = pic->pict_type; 1855 s->current_picture->quality = pic->quality; 1856 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start); 1857 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits; 1858 s->m.current_picture.f->display_picture_number = 1859 s->m.current_picture.f->coded_picture_number = avctx->frame_number; 1860 s->m.current_picture.f->quality = pic->quality; 1861 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); 1862 if(s->pass1_rc) 1863 if (ff_rate_estimate_qscale(&s->m, 0) < 0) 1864 return -1; 1865 if(avctx->flags&AV_CODEC_FLAG_PASS1) 1866 ff_write_pass1_stats(&s->m); 1867 s->m.last_pict_type = s->m.pict_type; 1868 1869 emms_c(); 1870 1871 ff_side_data_set_encoder_stats(pkt, s->current_picture->quality, 1872 s->encoding_error, 1873 (s->avctx->flags&AV_CODEC_FLAG_PSNR) ? SNOW_MAX_PLANES : 0, 1874 s->current_picture->pict_type); 1875 1876 pkt->size = ff_rac_terminate(c, 0); 1877 if (s->current_picture->key_frame) 1878 pkt->flags |= AV_PKT_FLAG_KEY; 1879 *got_packet = 1; 1880 1881 return 0; 1882} 1883 1884static av_cold int encode_end(AVCodecContext *avctx) 1885{ 1886 SnowContext *s = avctx->priv_data; 1887 1888 ff_snow_common_end(s); 1889 ff_rate_control_uninit(&s->m); 1890 av_frame_free(&s->input_picture); 1891 av_freep(&avctx->stats_out); 1892 1893 return 0; 1894} 1895 1896#define OFFSET(x) offsetof(SnowContext, x) 1897#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM 1898static const AVOption options[] = { 1899 {"motion_est", "motion estimation algorithm", OFFSET(motion_est), AV_OPT_TYPE_INT, {.i64 = FF_ME_EPZS }, FF_ME_ZERO, FF_ME_ITER, VE, "motion_est" }, 1900 { "zero", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ZERO }, 0, 0, VE, "motion_est" }, 1901 { "epzs", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_EPZS }, 0, 0, VE, "motion_est" }, 1902 { "xone", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_XONE }, 0, 0, VE, "motion_est" }, 1903 { "iter", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ITER }, 0, 0, VE, "motion_est" }, 1904 { "memc_only", "Only do ME/MC (I frames -> ref, P frame -> ME+MC).", OFFSET(memc_only), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, 1905 { "no_bitstream", "Skip final bitstream writeout.", OFFSET(no_bitstream), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, 1906 { "intra_penalty", "Penalty for intra blocks in block decission", OFFSET(intra_penalty), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, 1907 { "iterative_dia_size", "Dia size for the iterative ME", OFFSET(iterative_dia_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, 1908 { "sc_threshold", "Scene change threshold", OFFSET(scenechange_threshold), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, VE }, 1909 { "pred", "Spatial decomposition type", OFFSET(pred), AV_OPT_TYPE_INT, { .i64 = 0 }, DWT_97, DWT_53, VE, "pred" }, 1910 { "dwt97", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, VE, "pred" }, 1911 { "dwt53", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, VE, "pred" }, 1912 { "rc_eq", "Set rate control equation. When computing the expression, besides the standard functions " 1913 "defined in the section 'Expression Evaluation', the following functions are available: " 1914 "bits2qp(bits), qp2bits(qp). Also the following constants are available: iTex pTex tex mv " 1915 "fCode iCount mcVar var isI isP isB avgQP qComp avgIITex avgPITex avgPPTex avgBPTex avgTex.", 1916 OFFSET(m.rc_eq), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VE }, 1917 { NULL }, 1918}; 1919 1920static const AVClass snowenc_class = { 1921 .class_name = "snow encoder", 1922 .item_name = av_default_item_name, 1923 .option = options, 1924 .version = LIBAVUTIL_VERSION_INT, 1925}; 1926 1927const FFCodec ff_snow_encoder = { 1928 .p.name = "snow", 1929 .p.long_name = NULL_IF_CONFIG_SMALL("Snow"), 1930 .p.type = AVMEDIA_TYPE_VIDEO, 1931 .p.id = AV_CODEC_ID_SNOW, 1932 .priv_data_size = sizeof(SnowContext), 1933 .init = encode_init, 1934 FF_CODEC_ENCODE_CB(encode_frame), 1935 .close = encode_end, 1936 .p.pix_fmts = (const enum AVPixelFormat[]){ 1937 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV444P, 1938 AV_PIX_FMT_GRAY8, 1939 AV_PIX_FMT_NONE 1940 }, 1941 .p.priv_class = &snowenc_class, 1942 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | 1943 FF_CODEC_CAP_INIT_CLEANUP, 1944}; 1945