1/*
2 * The simplest mpeg encoder (well, it was the simplest!)
3 * Copyright (c) 2000,2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 *
6 * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25/*
26 * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
27 */
28
29/**
30 * @file
31 * The simplest mpeg encoder (well, it was the simplest!).
32 */
33
34#include "config_components.h"
35
36#include <stdint.h>
37
38#include "libavutil/internal.h"
39#include "libavutil/intmath.h"
40#include "libavutil/mathematics.h"
41#include "libavutil/mem_internal.h"
42#include "libavutil/pixdesc.h"
43#include "libavutil/opt.h"
44#include "libavutil/thread.h"
45#include "avcodec.h"
46#include "dct.h"
47#include "encode.h"
48#include "idctdsp.h"
49#include "mpeg12.h"
50#include "mpeg12data.h"
51#include "mpeg12enc.h"
52#include "mpegvideo.h"
53#include "mpegvideodata.h"
54#include "mpegvideoenc.h"
55#include "h261enc.h"
56#include "h263.h"
57#include "h263data.h"
58#include "h263enc.h"
59#include "mjpegenc_common.h"
60#include "mathops.h"
61#include "mpegutils.h"
62#include "mjpegenc.h"
63#include "speedhqenc.h"
64#include "msmpeg4enc.h"
65#include "pixblockdsp.h"
66#include "qpeldsp.h"
67#include "faandct.h"
68#include "aandcttab.h"
69#include "flvenc.h"
70#include "mpeg4video.h"
71#include "mpeg4videodata.h"
72#include "mpeg4videoenc.h"
73#include "internal.h"
74#include "bytestream.h"
75#include "wmv2enc.h"
76#include "rv10enc.h"
77#include "packet_internal.h"
78#include <limits.h>
79#include "sp5x.h"
80
81#define QUANT_BIAS_SHIFT 8
82
83#define QMAT_SHIFT_MMX 16
84#define QMAT_SHIFT 21
85
86static int encode_picture(MpegEncContext *s, int picture_number);
87static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
88static int sse_mb(MpegEncContext *s);
89static void denoise_dct_c(MpegEncContext *s, int16_t *block);
90static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
91
92static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_DMV * 2 + 1];
93static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
94
95static const AVOption mpv_generic_options[] = {
96    FF_MPV_COMMON_OPTS
97    FF_MPV_COMMON_MOTION_EST_OPTS
98    { NULL },
99};
100
101const AVClass ff_mpv_enc_class = {
102    .class_name = "generic mpegvideo encoder",
103    .item_name  = av_default_item_name,
104    .option     = mpv_generic_options,
105    .version    = LIBAVUTIL_VERSION_INT,
106};
107
108void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
109                       uint16_t (*qmat16)[2][64],
110                       const uint16_t *quant_matrix,
111                       int bias, int qmin, int qmax, int intra)
112{
113    FDCTDSPContext *fdsp = &s->fdsp;
114    int qscale;
115    int shift = 0;
116
117    for (qscale = qmin; qscale <= qmax; qscale++) {
118        int i;
119        int qscale2;
120
121        if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
122        else                 qscale2 = qscale << 1;
123
124        if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
125#if CONFIG_FAANDCT
126            fdsp->fdct == ff_faandct            ||
127#endif /* CONFIG_FAANDCT */
128            fdsp->fdct == ff_jpeg_fdct_islow_10) {
129            for (i = 0; i < 64; i++) {
130                const int j = s->idsp.idct_permutation[i];
131                int64_t den = (int64_t) qscale2 * quant_matrix[j];
132                /* 16 <= qscale * quant_matrix[i] <= 7905
133                 * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
134                 *             19952 <=              x  <= 249205026
135                 * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
136                 *           3444240 >= (1 << 36) / (x) >= 275 */
137
138                qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
139            }
140        } else if (fdsp->fdct == ff_fdct_ifast) {
141            for (i = 0; i < 64; i++) {
142                const int j = s->idsp.idct_permutation[i];
143                int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
144                /* 16 <= qscale * quant_matrix[i] <= 7905
145                 * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
146                 *             19952 <=              x  <= 249205026
147                 * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
148                 *           3444240 >= (1 << 36) / (x) >= 275 */
149
150                qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
151            }
152        } else {
153            for (i = 0; i < 64; i++) {
154                const int j = s->idsp.idct_permutation[i];
155                int64_t den = (int64_t) qscale2 * quant_matrix[j];
156                /* We can safely suppose that 16 <= quant_matrix[i] <= 255
157                 * Assume x = qscale * quant_matrix[i]
158                 * So             16 <=              x  <= 7905
159                 * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
160                 * so          32768 >= (1 << 19) / (x) >= 67 */
161                qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
162                //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
163                //                    (qscale * quant_matrix[i]);
164                qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
165
166                if (qmat16[qscale][0][i] == 0 ||
167                    qmat16[qscale][0][i] == 128 * 256)
168                    qmat16[qscale][0][i] = 128 * 256 - 1;
169                qmat16[qscale][1][i] =
170                    ROUNDED_DIV(bias * (1<<(16 - QUANT_BIAS_SHIFT)),
171                                qmat16[qscale][0][i]);
172            }
173        }
174
175        for (i = intra; i < 64; i++) {
176            int64_t max = 8191;
177            if (fdsp->fdct == ff_fdct_ifast) {
178                max = (8191LL * ff_aanscales[i]) >> 14;
179            }
180            while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
181                shift++;
182            }
183        }
184    }
185    if (shift) {
186        av_log(s->avctx, AV_LOG_INFO,
187               "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
188               QMAT_SHIFT - shift);
189    }
190}
191
192static inline void update_qscale(MpegEncContext *s)
193{
194    if (s->q_scale_type == 1 && 0) {
195        int i;
196        int bestdiff=INT_MAX;
197        int best = 1;
198
199        for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
200            int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
201            if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
202                (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
203                continue;
204            if (diff < bestdiff) {
205                bestdiff = diff;
206                best = i;
207            }
208        }
209        s->qscale = best;
210    } else {
211        s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
212                    (FF_LAMBDA_SHIFT + 7);
213        s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
214    }
215
216    s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
217                 FF_LAMBDA_SHIFT;
218}
219
220void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
221{
222    int i;
223
224    if (matrix) {
225        put_bits(pb, 1, 1);
226        for (i = 0; i < 64; i++) {
227            put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
228        }
229    } else
230        put_bits(pb, 1, 0);
231}
232
233/**
234 * init s->current_picture.qscale_table from s->lambda_table
235 */
236void ff_init_qscale_tab(MpegEncContext *s)
237{
238    int8_t * const qscale_table = s->current_picture.qscale_table;
239    int i;
240
241    for (i = 0; i < s->mb_num; i++) {
242        unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
243        int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
244        qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
245                                                  s->avctx->qmax);
246    }
247}
248
249static void update_duplicate_context_after_me(MpegEncContext *dst,
250                                              const MpegEncContext *src)
251{
252#define COPY(a) dst->a= src->a
253    COPY(pict_type);
254    COPY(current_picture);
255    COPY(f_code);
256    COPY(b_code);
257    COPY(qscale);
258    COPY(lambda);
259    COPY(lambda2);
260    COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
261    COPY(progressive_frame);    // FIXME don't set in encode_header
262    COPY(partitioned_frame);    // FIXME don't set in encode_header
263#undef COPY
264}
265
266static void mpv_encode_init_static(void)
267{
268   for (int i = -16; i < 16; i++)
269        default_fcode_tab[i + MAX_MV] = 1;
270}
271
272/**
273 * Set the given MpegEncContext to defaults for encoding.
274 * the changed fields will not depend upon the prior state of the MpegEncContext.
275 */
276static void mpv_encode_defaults(MpegEncContext *s)
277{
278    static AVOnce init_static_once = AV_ONCE_INIT;
279
280    ff_mpv_common_defaults(s);
281
282    ff_thread_once(&init_static_once, mpv_encode_init_static);
283
284    s->me.mv_penalty = default_mv_penalty;
285    s->fcode_tab     = default_fcode_tab;
286
287    s->input_picture_number  = 0;
288    s->picture_in_gop_number = 0;
289}
290
291av_cold int ff_dct_encode_init(MpegEncContext *s)
292{
293#if ARCH_X86
294    ff_dct_encode_init_x86(s);
295#endif
296
297    if (CONFIG_H263_ENCODER)
298        ff_h263dsp_init(&s->h263dsp);
299    if (!s->dct_quantize)
300        s->dct_quantize = ff_dct_quantize_c;
301    if (!s->denoise_dct)
302        s->denoise_dct  = denoise_dct_c;
303    s->fast_dct_quantize = s->dct_quantize;
304    if (s->avctx->trellis)
305        s->dct_quantize  = dct_quantize_trellis_c;
306
307    return 0;
308}
309
310/* init video encoder */
311av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
312{
313    MpegEncContext *s = avctx->priv_data;
314    AVCPBProperties *cpb_props;
315    int i, ret;
316
317    mpv_encode_defaults(s);
318
319    switch (avctx->pix_fmt) {
320    case AV_PIX_FMT_YUVJ444P:
321    case AV_PIX_FMT_YUV444P:
322        s->chroma_format = CHROMA_444;
323        break;
324    case AV_PIX_FMT_YUVJ422P:
325    case AV_PIX_FMT_YUV422P:
326        s->chroma_format = CHROMA_422;
327        break;
328    case AV_PIX_FMT_YUVJ420P:
329    case AV_PIX_FMT_YUV420P:
330    default:
331        s->chroma_format = CHROMA_420;
332        break;
333    }
334
335    avctx->bits_per_raw_sample = av_clip(avctx->bits_per_raw_sample, 0, 8);
336
337    s->bit_rate = avctx->bit_rate;
338    s->width    = avctx->width;
339    s->height   = avctx->height;
340    if (avctx->gop_size > 600 &&
341        avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
342        av_log(avctx, AV_LOG_WARNING,
343               "keyframe interval too large!, reducing it from %d to %d\n",
344               avctx->gop_size, 600);
345        avctx->gop_size = 600;
346    }
347    s->gop_size     = avctx->gop_size;
348    s->avctx        = avctx;
349    if (avctx->max_b_frames > MAX_B_FRAMES) {
350        av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
351               "is %d.\n", MAX_B_FRAMES);
352        avctx->max_b_frames = MAX_B_FRAMES;
353    } else if (avctx->max_b_frames < 0) {
354        av_log(avctx, AV_LOG_ERROR,
355               "max b frames must be 0 or positive for mpegvideo based encoders\n");
356        return AVERROR(EINVAL);
357    }
358    s->max_b_frames = avctx->max_b_frames;
359    s->codec_id     = avctx->codec->id;
360    if (s->max_b_frames && !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY)) {
361        av_log(avctx, AV_LOG_ERROR, "B-frames not supported by codec\n");
362        return AVERROR(EINVAL);
363    }
364
365    s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
366    s->rtp_mode           = !!s->rtp_payload_size;
367    s->intra_dc_precision = avctx->intra_dc_precision;
368
369    // workaround some differences between how applications specify dc precision
370    if (s->intra_dc_precision < 0) {
371        s->intra_dc_precision += 8;
372    } else if (s->intra_dc_precision >= 8)
373        s->intra_dc_precision -= 8;
374
375    if (s->intra_dc_precision < 0) {
376        av_log(avctx, AV_LOG_ERROR,
377                "intra dc precision must be positive, note some applications use"
378                " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
379        return AVERROR(EINVAL);
380    }
381
382    if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
383        av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
384        return AVERROR(EINVAL);
385    }
386    s->user_specified_pts = AV_NOPTS_VALUE;
387
388    if (s->gop_size <= 1) {
389        s->intra_only = 1;
390        s->gop_size   = 12;
391    } else {
392        s->intra_only = 0;
393    }
394
395    /* Fixed QSCALE */
396    s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
397
398    s->adaptive_quant = (avctx->lumi_masking ||
399                         avctx->dark_masking ||
400                         avctx->temporal_cplx_masking ||
401                         avctx->spatial_cplx_masking  ||
402                         avctx->p_masking      ||
403                         s->border_masking ||
404                         (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
405                        !s->fixed_qscale;
406
407    s->loop_filter = !!(avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
408
409    if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
410        switch(avctx->codec_id) {
411        case AV_CODEC_ID_MPEG1VIDEO:
412        case AV_CODEC_ID_MPEG2VIDEO:
413            avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
414            break;
415        case AV_CODEC_ID_MPEG4:
416        case AV_CODEC_ID_MSMPEG4V1:
417        case AV_CODEC_ID_MSMPEG4V2:
418        case AV_CODEC_ID_MSMPEG4V3:
419            if       (avctx->rc_max_rate >= 15000000) {
420                avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
421            } else if(avctx->rc_max_rate >=  2000000) {
422                avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
423            } else if(avctx->rc_max_rate >=   384000) {
424                avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
425            } else
426                avctx->rc_buffer_size = 40;
427            avctx->rc_buffer_size *= 16384;
428            break;
429        }
430        if (avctx->rc_buffer_size) {
431            av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
432        }
433    }
434
435    if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
436        av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
437        return AVERROR(EINVAL);
438    }
439
440    if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
441        av_log(avctx, AV_LOG_INFO,
442               "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
443    }
444
445    if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
446        av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
447        return AVERROR(EINVAL);
448    }
449
450    if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
451        av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
452        return AVERROR(EINVAL);
453    }
454
455    if (avctx->rc_max_rate &&
456        avctx->rc_max_rate == avctx->bit_rate &&
457        avctx->rc_max_rate != avctx->rc_min_rate) {
458        av_log(avctx, AV_LOG_INFO,
459               "impossible bitrate constraints, this will fail\n");
460    }
461
462    if (avctx->rc_buffer_size &&
463        avctx->bit_rate * (int64_t)avctx->time_base.num >
464            avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
465        av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
466        return AVERROR(EINVAL);
467    }
468
469    if (!s->fixed_qscale &&
470        avctx->bit_rate * av_q2d(avctx->time_base) >
471            avctx->bit_rate_tolerance) {
472        double nbt = avctx->bit_rate * av_q2d(avctx->time_base) * 5;
473        av_log(avctx, AV_LOG_WARNING,
474               "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
475        if (nbt <= INT_MAX) {
476            avctx->bit_rate_tolerance = nbt;
477        } else
478            avctx->bit_rate_tolerance = INT_MAX;
479    }
480
481    if (avctx->rc_max_rate &&
482        avctx->rc_min_rate == avctx->rc_max_rate &&
483        (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
484         s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
485        90000LL * (avctx->rc_buffer_size - 1) >
486            avctx->rc_max_rate * 0xFFFFLL) {
487        av_log(avctx, AV_LOG_INFO,
488               "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
489               "specified vbv buffer is too large for the given bitrate!\n");
490    }
491
492    if ((avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
493        s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
494        s->codec_id != AV_CODEC_ID_FLV1) {
495        av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
496        return AVERROR(EINVAL);
497    }
498
499    if (s->obmc && avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
500        av_log(avctx, AV_LOG_ERROR,
501               "OBMC is only supported with simple mb decision\n");
502        return AVERROR(EINVAL);
503    }
504
505    if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
506        av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
507        return AVERROR(EINVAL);
508    }
509
510    if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
511         s->codec_id == AV_CODEC_ID_H263  ||
512         s->codec_id == AV_CODEC_ID_H263P) &&
513        (avctx->sample_aspect_ratio.num > 255 ||
514         avctx->sample_aspect_ratio.den > 255)) {
515        av_log(avctx, AV_LOG_WARNING,
516               "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
517               avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
518        av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
519                   avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
520    }
521
522    if ((s->codec_id == AV_CODEC_ID_H263  ||
523         s->codec_id == AV_CODEC_ID_H263P) &&
524        (avctx->width  > 2048 ||
525         avctx->height > 1152 )) {
526        av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
527        return AVERROR(EINVAL);
528    }
529    if ((s->codec_id == AV_CODEC_ID_H263  ||
530         s->codec_id == AV_CODEC_ID_H263P ||
531         s->codec_id == AV_CODEC_ID_RV20) &&
532        ((avctx->width &3) ||
533         (avctx->height&3) )) {
534        av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
535        return AVERROR(EINVAL);
536    }
537
538    if (s->codec_id == AV_CODEC_ID_RV10 &&
539        (avctx->width &15 ||
540         avctx->height&15 )) {
541        av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
542        return AVERROR(EINVAL);
543    }
544
545    if ((s->codec_id == AV_CODEC_ID_WMV1 ||
546         s->codec_id == AV_CODEC_ID_WMV2) &&
547         avctx->width & 1) {
548        av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
549        return AVERROR(EINVAL);
550    }
551
552    if ((avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
553        s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
554        av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
555        return AVERROR(EINVAL);
556    }
557
558    if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
559        av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
560        return AVERROR(EINVAL);
561    }
562
563    if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
564        avctx->mb_decision != FF_MB_DECISION_RD) {
565        av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
566        return AVERROR(EINVAL);
567    }
568
569    if (s->scenechange_threshold < 1000000000 &&
570        (avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
571        av_log(avctx, AV_LOG_ERROR,
572               "closed gop with scene change detection are not supported yet, "
573               "set threshold to 1000000000\n");
574        return AVERROR_PATCHWELCOME;
575    }
576
577    if (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
578        if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
579            avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
580            av_log(avctx, AV_LOG_ERROR,
581                   "low delay forcing is only available for mpeg2, "
582                   "set strict_std_compliance to 'unofficial' or lower in order to allow it\n");
583            return AVERROR(EINVAL);
584        }
585        if (s->max_b_frames != 0) {
586            av_log(avctx, AV_LOG_ERROR,
587                   "B-frames cannot be used with low delay\n");
588            return AVERROR(EINVAL);
589        }
590    }
591
592    if (s->q_scale_type == 1) {
593        if (avctx->qmax > 28) {
594            av_log(avctx, AV_LOG_ERROR,
595                   "non linear quant only supports qmax <= 28 currently\n");
596            return AVERROR_PATCHWELCOME;
597        }
598    }
599
600    if (avctx->slices > 1 &&
601        !(avctx->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS)) {
602        av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
603        return AVERROR(EINVAL);
604    }
605
606    if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
607        av_log(avctx, AV_LOG_INFO,
608               "notice: b_frame_strategy only affects the first pass\n");
609        s->b_frame_strategy = 0;
610    }
611
612    i = av_gcd(avctx->time_base.den, avctx->time_base.num);
613    if (i > 1) {
614        av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
615        avctx->time_base.den /= i;
616        avctx->time_base.num /= i;
617        //return -1;
618    }
619
620    if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id == AV_CODEC_ID_AMV || s->codec_id == AV_CODEC_ID_SPEEDHQ) {
621        // (a + x * 3 / 8) / x
622        s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
623        s->inter_quant_bias = 0;
624    } else {
625        s->intra_quant_bias = 0;
626        // (a - x / 4) / x
627        s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
628    }
629
630    if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
631        av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
632        return AVERROR(EINVAL);
633    }
634
635    av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
636
637    if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
638        avctx->time_base.den > (1 << 16) - 1) {
639        av_log(avctx, AV_LOG_ERROR,
640               "timebase %d/%d not supported by MPEG 4 standard, "
641               "the maximum admitted value for the timebase denominator "
642               "is %d\n", avctx->time_base.num, avctx->time_base.den,
643               (1 << 16) - 1);
644        return AVERROR(EINVAL);
645    }
646    s->time_increment_bits = av_log2(avctx->time_base.den - 1) + 1;
647
648    switch (avctx->codec->id) {
649    case AV_CODEC_ID_MPEG1VIDEO:
650        s->out_format = FMT_MPEG1;
651        s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
652        avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
653        break;
654    case AV_CODEC_ID_MPEG2VIDEO:
655        s->out_format = FMT_MPEG1;
656        s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
657        avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
658        s->rtp_mode   = 1;
659        break;
660#if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
661    case AV_CODEC_ID_MJPEG:
662    case AV_CODEC_ID_AMV:
663        s->out_format = FMT_MJPEG;
664        s->intra_only = 1; /* force intra only for jpeg */
665        if ((ret = ff_mjpeg_encode_init(s)) < 0)
666            return ret;
667        avctx->delay = 0;
668        s->low_delay = 1;
669        break;
670#endif
671    case AV_CODEC_ID_SPEEDHQ:
672        s->out_format = FMT_SPEEDHQ;
673        s->intra_only = 1; /* force intra only for SHQ */
674        if (!CONFIG_SPEEDHQ_ENCODER)
675            return AVERROR_ENCODER_NOT_FOUND;
676        if ((ret = ff_speedhq_encode_init(s)) < 0)
677            return ret;
678        avctx->delay = 0;
679        s->low_delay = 1;
680        break;
681    case AV_CODEC_ID_H261:
682        if (!CONFIG_H261_ENCODER)
683            return AVERROR_ENCODER_NOT_FOUND;
684        if (ff_h261_get_picture_format(s->width, s->height) < 0) {
685            av_log(avctx, AV_LOG_ERROR,
686                   "The specified picture size of %dx%d is not valid for the "
687                   "H.261 codec.\nValid sizes are 176x144, 352x288\n",
688                    s->width, s->height);
689            return AVERROR(EINVAL);
690        }
691        s->out_format = FMT_H261;
692        avctx->delay  = 0;
693        s->low_delay  = 1;
694        s->rtp_mode   = 0; /* Sliced encoding not supported */
695        break;
696    case AV_CODEC_ID_H263:
697        if (!CONFIG_H263_ENCODER)
698            return AVERROR_ENCODER_NOT_FOUND;
699        if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
700                             s->width, s->height) == 8) {
701            av_log(avctx, AV_LOG_ERROR,
702                   "The specified picture size of %dx%d is not valid for "
703                   "the H.263 codec.\nValid sizes are 128x96, 176x144, "
704                   "352x288, 704x576, and 1408x1152. "
705                   "Try H.263+.\n", s->width, s->height);
706            return AVERROR(EINVAL);
707        }
708        s->out_format = FMT_H263;
709        avctx->delay  = 0;
710        s->low_delay  = 1;
711        break;
712    case AV_CODEC_ID_H263P:
713        s->out_format = FMT_H263;
714        s->h263_plus  = 1;
715        /* Fx */
716        s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
717        s->modified_quant  = s->h263_aic;
718        s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
719        s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
720
721        /* /Fx */
722        /* These are just to be sure */
723        avctx->delay = 0;
724        s->low_delay = 1;
725        break;
726    case AV_CODEC_ID_FLV1:
727        s->out_format      = FMT_H263;
728        s->h263_flv        = 2; /* format = 1; 11-bit codes */
729        s->unrestricted_mv = 1;
730        s->rtp_mode  = 0; /* don't allow GOB */
731        avctx->delay = 0;
732        s->low_delay = 1;
733        break;
734    case AV_CODEC_ID_RV10:
735        s->out_format = FMT_H263;
736        avctx->delay  = 0;
737        s->low_delay  = 1;
738        break;
739    case AV_CODEC_ID_RV20:
740        s->out_format      = FMT_H263;
741        avctx->delay       = 0;
742        s->low_delay       = 1;
743        s->modified_quant  = 1;
744        s->h263_aic        = 1;
745        s->h263_plus       = 1;
746        s->loop_filter     = 1;
747        s->unrestricted_mv = 0;
748        break;
749    case AV_CODEC_ID_MPEG4:
750        s->out_format      = FMT_H263;
751        s->h263_pred       = 1;
752        s->unrestricted_mv = 1;
753        s->low_delay       = s->max_b_frames ? 0 : 1;
754        avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
755        break;
756    case AV_CODEC_ID_MSMPEG4V2:
757        s->out_format      = FMT_H263;
758        s->h263_pred       = 1;
759        s->unrestricted_mv = 1;
760        s->msmpeg4_version = 2;
761        avctx->delay       = 0;
762        s->low_delay       = 1;
763        break;
764    case AV_CODEC_ID_MSMPEG4V3:
765        s->out_format        = FMT_H263;
766        s->h263_pred         = 1;
767        s->unrestricted_mv   = 1;
768        s->msmpeg4_version   = 3;
769        s->flipflop_rounding = 1;
770        avctx->delay         = 0;
771        s->low_delay         = 1;
772        break;
773    case AV_CODEC_ID_WMV1:
774        s->out_format        = FMT_H263;
775        s->h263_pred         = 1;
776        s->unrestricted_mv   = 1;
777        s->msmpeg4_version   = 4;
778        s->flipflop_rounding = 1;
779        avctx->delay         = 0;
780        s->low_delay         = 1;
781        break;
782    case AV_CODEC_ID_WMV2:
783        s->out_format        = FMT_H263;
784        s->h263_pred         = 1;
785        s->unrestricted_mv   = 1;
786        s->msmpeg4_version   = 5;
787        s->flipflop_rounding = 1;
788        avctx->delay         = 0;
789        s->low_delay         = 1;
790        break;
791    default:
792        return AVERROR(EINVAL);
793    }
794
795    avctx->has_b_frames = !s->low_delay;
796
797    s->encoding = 1;
798
799    s->progressive_frame    =
800    s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
801                                                AV_CODEC_FLAG_INTERLACED_ME) ||
802                                s->alternate_scan);
803
804    /* init */
805    ff_mpv_idct_init(s);
806    if ((ret = ff_mpv_common_init(s)) < 0)
807        return ret;
808
809    ff_fdctdsp_init(&s->fdsp, avctx);
810    ff_me_cmp_init(&s->mecc, avctx);
811    ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
812    ff_pixblockdsp_init(&s->pdsp, avctx);
813    ff_qpeldsp_init(&s->qdsp);
814
815    if (!(avctx->stats_out = av_mallocz(256))               ||
816        !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix,          32) ||
817        !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix,   32) ||
818        !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix,          32) ||
819        !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix16,        32) ||
820        !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix16, 32) ||
821        !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix16,        32) ||
822        !FF_ALLOCZ_TYPED_ARRAY(s->input_picture,           MAX_PICTURE_COUNT) ||
823        !FF_ALLOCZ_TYPED_ARRAY(s->reordered_input_picture, MAX_PICTURE_COUNT))
824        return AVERROR(ENOMEM);
825
826    if (s->noise_reduction) {
827        if (!FF_ALLOCZ_TYPED_ARRAY(s->dct_offset, 2))
828            return AVERROR(ENOMEM);
829    }
830
831    ff_dct_encode_init(s);
832
833    if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
834        s->chroma_qscale_table = ff_h263_chroma_qscale_table;
835
836    if (s->slice_context_count > 1) {
837        s->rtp_mode = 1;
838
839        if (avctx->codec_id == AV_CODEC_ID_H263P)
840            s->h263_slice_structured = 1;
841    }
842
843    s->quant_precision = 5;
844
845    ret  = ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      avctx->ildct_cmp);
846    ret |= ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
847    if (ret < 0)
848        return AVERROR(EINVAL);
849
850    if (CONFIG_H261_ENCODER && s->out_format == FMT_H261) {
851        ff_h261_encode_init(s);
852    } else if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
853               && s->out_format == FMT_MPEG1) {
854        ff_mpeg1_encode_init(s);
855    } else if (CONFIG_H263_ENCODER && s->out_format == FMT_H263) {
856        ff_h263_encode_init(s);
857        if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
858            ff_msmpeg4_encode_init(s);
859    }
860
861    /* init q matrix */
862    for (i = 0; i < 64; i++) {
863        int j = s->idsp.idct_permutation[i];
864        if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
865            s->mpeg_quant) {
866            s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
867            s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
868        } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
869            s->intra_matrix[j] =
870            s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
871        } else if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
872            s->intra_matrix[j] =
873            s->inter_matrix[j] = ff_mpeg1_default_intra_matrix[i];
874        } else {
875            /* MPEG-1/2 */
876            s->chroma_intra_matrix[j] =
877            s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
878            s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
879        }
880        if (avctx->intra_matrix)
881            s->intra_matrix[j] = avctx->intra_matrix[i];
882        if (avctx->inter_matrix)
883            s->inter_matrix[j] = avctx->inter_matrix[i];
884    }
885
886    /* precompute matrix */
887    /* for mjpeg, we do include qscale in the matrix */
888    if (s->out_format != FMT_MJPEG) {
889        ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
890                          s->intra_matrix, s->intra_quant_bias, avctx->qmin,
891                          31, 1);
892        ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
893                          s->inter_matrix, s->inter_quant_bias, avctx->qmin,
894                          31, 0);
895    }
896
897    if ((ret = ff_rate_control_init(s)) < 0)
898        return ret;
899
900    if (s->b_frame_strategy == 2) {
901        for (i = 0; i < s->max_b_frames + 2; i++) {
902            s->tmp_frames[i] = av_frame_alloc();
903            if (!s->tmp_frames[i])
904                return AVERROR(ENOMEM);
905
906            s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
907            s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
908            s->tmp_frames[i]->height = s->height >> s->brd_scale;
909
910            ret = av_frame_get_buffer(s->tmp_frames[i], 0);
911            if (ret < 0)
912                return ret;
913        }
914    }
915
916    cpb_props = ff_add_cpb_side_data(avctx);
917    if (!cpb_props)
918        return AVERROR(ENOMEM);
919    cpb_props->max_bitrate = avctx->rc_max_rate;
920    cpb_props->min_bitrate = avctx->rc_min_rate;
921    cpb_props->avg_bitrate = avctx->bit_rate;
922    cpb_props->buffer_size = avctx->rc_buffer_size;
923
924    return 0;
925}
926
927av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
928{
929    MpegEncContext *s = avctx->priv_data;
930    int i;
931
932    ff_rate_control_uninit(s);
933
934    ff_mpv_common_end(s);
935
936    for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
937        av_frame_free(&s->tmp_frames[i]);
938
939    av_frame_free(&s->new_picture);
940
941    av_freep(&avctx->stats_out);
942
943    if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
944    if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
945    s->q_chroma_intra_matrix=   NULL;
946    s->q_chroma_intra_matrix16= NULL;
947    av_freep(&s->q_intra_matrix);
948    av_freep(&s->q_inter_matrix);
949    av_freep(&s->q_intra_matrix16);
950    av_freep(&s->q_inter_matrix16);
951    av_freep(&s->input_picture);
952    av_freep(&s->reordered_input_picture);
953    av_freep(&s->dct_offset);
954
955    return 0;
956}
957
958static int get_sae(uint8_t *src, int ref, int stride)
959{
960    int x,y;
961    int acc = 0;
962
963    for (y = 0; y < 16; y++) {
964        for (x = 0; x < 16; x++) {
965            acc += FFABS(src[x + y * stride] - ref);
966        }
967    }
968
969    return acc;
970}
971
972static int get_intra_count(MpegEncContext *s, uint8_t *src,
973                           uint8_t *ref, int stride)
974{
975    int x, y, w, h;
976    int acc = 0;
977
978    w = s->width  & ~15;
979    h = s->height & ~15;
980
981    for (y = 0; y < h; y += 16) {
982        for (x = 0; x < w; x += 16) {
983            int offset = x + y * stride;
984            int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
985                                      stride, 16);
986            int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
987            int sae  = get_sae(src + offset, mean, stride);
988
989            acc += sae + 500 < sad;
990        }
991    }
992    return acc;
993}
994
995static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
996{
997    return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
998                            s->chroma_x_shift, s->chroma_y_shift, s->out_format,
999                            s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1000                            &s->linesize, &s->uvlinesize);
1001}
1002
1003static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1004{
1005    Picture *pic = NULL;
1006    int64_t pts;
1007    int i, display_picture_number = 0, ret;
1008    int encoding_delay = s->max_b_frames ? s->max_b_frames
1009                                         : (s->low_delay ? 0 : 1);
1010    int flush_offset = 1;
1011    int direct = 1;
1012
1013    if (pic_arg) {
1014        pts = pic_arg->pts;
1015        display_picture_number = s->input_picture_number++;
1016
1017        if (pts != AV_NOPTS_VALUE) {
1018            if (s->user_specified_pts != AV_NOPTS_VALUE) {
1019                int64_t last = s->user_specified_pts;
1020
1021                if (pts <= last) {
1022                    av_log(s->avctx, AV_LOG_ERROR,
1023                           "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1024                           pts, last);
1025                    return AVERROR(EINVAL);
1026                }
1027
1028                if (!s->low_delay && display_picture_number == 1)
1029                    s->dts_delta = pts - last;
1030            }
1031            s->user_specified_pts = pts;
1032        } else {
1033            if (s->user_specified_pts != AV_NOPTS_VALUE) {
1034                s->user_specified_pts =
1035                pts = s->user_specified_pts + 1;
1036                av_log(s->avctx, AV_LOG_INFO,
1037                       "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1038                       pts);
1039            } else {
1040                pts = display_picture_number;
1041            }
1042        }
1043
1044        if (!pic_arg->buf[0] ||
1045            pic_arg->linesize[0] != s->linesize ||
1046            pic_arg->linesize[1] != s->uvlinesize ||
1047            pic_arg->linesize[2] != s->uvlinesize)
1048            direct = 0;
1049        if ((s->width & 15) || (s->height & 15))
1050            direct = 0;
1051        if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1052            direct = 0;
1053        if (s->linesize & (STRIDE_ALIGN-1))
1054            direct = 0;
1055
1056        ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1057                pic_arg->linesize[1], s->linesize, s->uvlinesize);
1058
1059        i = ff_find_unused_picture(s->avctx, s->picture, direct);
1060        if (i < 0)
1061            return i;
1062
1063        pic = &s->picture[i];
1064        pic->reference = 3;
1065
1066        if (direct) {
1067            if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1068                return ret;
1069        }
1070        ret = alloc_picture(s, pic, direct);
1071        if (ret < 0)
1072            return ret;
1073
1074        if (!direct) {
1075            if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1076                pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1077                pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1078                // empty
1079            } else {
1080                int h_chroma_shift, v_chroma_shift;
1081                av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1082                                                 &h_chroma_shift,
1083                                                 &v_chroma_shift);
1084
1085                for (i = 0; i < 3; i++) {
1086                    int src_stride = pic_arg->linesize[i];
1087                    int dst_stride = i ? s->uvlinesize : s->linesize;
1088                    int h_shift = i ? h_chroma_shift : 0;
1089                    int v_shift = i ? v_chroma_shift : 0;
1090                    int w = s->width  >> h_shift;
1091                    int h = s->height >> v_shift;
1092                    uint8_t *src = pic_arg->data[i];
1093                    uint8_t *dst = pic->f->data[i];
1094                    int vpad = 16;
1095
1096                    if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1097                        && !s->progressive_sequence
1098                        && FFALIGN(s->height, 32) - s->height > 16)
1099                        vpad = 32;
1100
1101                    if (!s->avctx->rc_buffer_size)
1102                        dst += INPLACE_OFFSET;
1103
1104                    if (src_stride == dst_stride)
1105                        memcpy(dst, src, src_stride * h);
1106                    else {
1107                        int h2 = h;
1108                        uint8_t *dst2 = dst;
1109                        while (h2--) {
1110                            memcpy(dst2, src, w);
1111                            dst2 += dst_stride;
1112                            src += src_stride;
1113                        }
1114                    }
1115                    if ((s->width & 15) || (s->height & (vpad-1))) {
1116                        s->mpvencdsp.draw_edges(dst, dst_stride,
1117                                                w, h,
1118                                                16 >> h_shift,
1119                                                vpad >> v_shift,
1120                                                EDGE_BOTTOM);
1121                    }
1122                }
1123                emms_c();
1124            }
1125        }
1126        ret = av_frame_copy_props(pic->f, pic_arg);
1127        if (ret < 0)
1128            return ret;
1129
1130        pic->f->display_picture_number = display_picture_number;
1131        pic->f->pts = pts; // we set this here to avoid modifying pic_arg
1132    } else {
1133        /* Flushing: When we have not received enough input frames,
1134         * ensure s->input_picture[0] contains the first picture */
1135        for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1136            if (s->input_picture[flush_offset])
1137                break;
1138
1139        if (flush_offset <= 1)
1140            flush_offset = 1;
1141        else
1142            encoding_delay = encoding_delay - flush_offset + 1;
1143    }
1144
1145    /* shift buffer entries */
1146    for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1147        s->input_picture[i - flush_offset] = s->input_picture[i];
1148
1149    s->input_picture[encoding_delay] = (Picture*) pic;
1150
1151    return 0;
1152}
1153
1154static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1155{
1156    int x, y, plane;
1157    int score = 0;
1158    int64_t score64 = 0;
1159
1160    for (plane = 0; plane < 3; plane++) {
1161        const int stride = p->f->linesize[plane];
1162        const int bw = plane ? 1 : 2;
1163        for (y = 0; y < s->mb_height * bw; y++) {
1164            for (x = 0; x < s->mb_width * bw; x++) {
1165                int off = p->shared ? 0 : 16;
1166                uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1167                uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1168                int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1169
1170                switch (FFABS(s->frame_skip_exp)) {
1171                case 0: score    =  FFMAX(score, v);          break;
1172                case 1: score   += FFABS(v);                  break;
1173                case 2: score64 += v * (int64_t)v;                       break;
1174                case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1175                case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1176                }
1177            }
1178        }
1179    }
1180    emms_c();
1181
1182    if (score)
1183        score64 = score;
1184    if (s->frame_skip_exp < 0)
1185        score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1186                      -1.0/s->frame_skip_exp);
1187
1188    if (score64 < s->frame_skip_threshold)
1189        return 1;
1190    if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1191        return 1;
1192    return 0;
1193}
1194
1195static int encode_frame(AVCodecContext *c, AVFrame *frame, AVPacket *pkt)
1196{
1197    int ret;
1198    int size = 0;
1199
1200    ret = avcodec_send_frame(c, frame);
1201    if (ret < 0)
1202        return ret;
1203
1204    do {
1205        ret = avcodec_receive_packet(c, pkt);
1206        if (ret >= 0) {
1207            size += pkt->size;
1208            av_packet_unref(pkt);
1209        } else if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
1210            return ret;
1211    } while (ret >= 0);
1212
1213    return size;
1214}
1215
1216static int estimate_best_b_count(MpegEncContext *s)
1217{
1218    AVPacket *pkt;
1219    const int scale = s->brd_scale;
1220    int width  = s->width  >> scale;
1221    int height = s->height >> scale;
1222    int i, j, out_size, p_lambda, b_lambda, lambda2;
1223    int64_t best_rd  = INT64_MAX;
1224    int best_b_count = -1;
1225    int ret = 0;
1226
1227    av_assert0(scale >= 0 && scale <= 3);
1228
1229    pkt = av_packet_alloc();
1230    if (!pkt)
1231        return AVERROR(ENOMEM);
1232
1233    //emms_c();
1234    //s->next_picture_ptr->quality;
1235    p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1236    //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1237    b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1238    if (!b_lambda) // FIXME we should do this somewhere else
1239        b_lambda = p_lambda;
1240    lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1241               FF_LAMBDA_SHIFT;
1242
1243    for (i = 0; i < s->max_b_frames + 2; i++) {
1244        Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1245                                                s->next_picture_ptr;
1246        uint8_t *data[4];
1247
1248        if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1249            pre_input = *pre_input_ptr;
1250            memcpy(data, pre_input_ptr->f->data, sizeof(data));
1251
1252            if (!pre_input.shared && i) {
1253                data[0] += INPLACE_OFFSET;
1254                data[1] += INPLACE_OFFSET;
1255                data[2] += INPLACE_OFFSET;
1256            }
1257
1258            s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1259                                       s->tmp_frames[i]->linesize[0],
1260                                       data[0],
1261                                       pre_input.f->linesize[0],
1262                                       width, height);
1263            s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1264                                       s->tmp_frames[i]->linesize[1],
1265                                       data[1],
1266                                       pre_input.f->linesize[1],
1267                                       width >> 1, height >> 1);
1268            s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1269                                       s->tmp_frames[i]->linesize[2],
1270                                       data[2],
1271                                       pre_input.f->linesize[2],
1272                                       width >> 1, height >> 1);
1273        }
1274    }
1275
1276    for (j = 0; j < s->max_b_frames + 1; j++) {
1277        AVCodecContext *c;
1278        int64_t rd = 0;
1279
1280        if (!s->input_picture[j])
1281            break;
1282
1283        c = avcodec_alloc_context3(NULL);
1284        if (!c) {
1285            ret = AVERROR(ENOMEM);
1286            goto fail;
1287        }
1288
1289        c->width        = width;
1290        c->height       = height;
1291        c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1292        c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1293        c->mb_decision  = s->avctx->mb_decision;
1294        c->me_cmp       = s->avctx->me_cmp;
1295        c->mb_cmp       = s->avctx->mb_cmp;
1296        c->me_sub_cmp   = s->avctx->me_sub_cmp;
1297        c->pix_fmt      = AV_PIX_FMT_YUV420P;
1298        c->time_base    = s->avctx->time_base;
1299        c->max_b_frames = s->max_b_frames;
1300
1301        ret = avcodec_open2(c, s->avctx->codec, NULL);
1302        if (ret < 0)
1303            goto fail;
1304
1305
1306        s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1307        s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1308
1309        out_size = encode_frame(c, s->tmp_frames[0], pkt);
1310        if (out_size < 0) {
1311            ret = out_size;
1312            goto fail;
1313        }
1314
1315        //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1316
1317        for (i = 0; i < s->max_b_frames + 1; i++) {
1318            int is_p = i % (j + 1) == j || i == s->max_b_frames;
1319
1320            s->tmp_frames[i + 1]->pict_type = is_p ?
1321                                     AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1322            s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1323
1324            out_size = encode_frame(c, s->tmp_frames[i + 1], pkt);
1325            if (out_size < 0) {
1326                ret = out_size;
1327                goto fail;
1328            }
1329
1330            rd += (out_size * (uint64_t)lambda2) >> (FF_LAMBDA_SHIFT - 3);
1331        }
1332
1333        /* get the delayed frames */
1334        out_size = encode_frame(c, NULL, pkt);
1335        if (out_size < 0) {
1336            ret = out_size;
1337            goto fail;
1338        }
1339        rd += (out_size * (uint64_t)lambda2) >> (FF_LAMBDA_SHIFT - 3);
1340
1341        rd += c->error[0] + c->error[1] + c->error[2];
1342
1343        if (rd < best_rd) {
1344            best_rd = rd;
1345            best_b_count = j;
1346        }
1347
1348fail:
1349        avcodec_free_context(&c);
1350        av_packet_unref(pkt);
1351        if (ret < 0) {
1352            best_b_count = ret;
1353            break;
1354        }
1355    }
1356
1357    av_packet_free(&pkt);
1358
1359    return best_b_count;
1360}
1361
1362static int select_input_picture(MpegEncContext *s)
1363{
1364    int i, ret;
1365
1366    for (i = 1; i < MAX_PICTURE_COUNT; i++)
1367        s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1368    s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1369
1370    /* set next picture type & ordering */
1371    if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1372        if (s->frame_skip_threshold || s->frame_skip_factor) {
1373            if (s->picture_in_gop_number < s->gop_size &&
1374                s->next_picture_ptr &&
1375                skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1376                // FIXME check that the gop check above is +-1 correct
1377                av_frame_unref(s->input_picture[0]->f);
1378
1379                ff_vbv_update(s, 0);
1380
1381                goto no_output_pic;
1382            }
1383        }
1384
1385        if (/*s->picture_in_gop_number >= s->gop_size ||*/
1386            !s->next_picture_ptr || s->intra_only) {
1387            s->reordered_input_picture[0] = s->input_picture[0];
1388            s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1389            s->reordered_input_picture[0]->f->coded_picture_number =
1390                s->coded_picture_number++;
1391        } else {
1392            int b_frames = 0;
1393
1394            if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1395                for (i = 0; i < s->max_b_frames + 1; i++) {
1396                    int pict_num = s->input_picture[0]->f->display_picture_number + i;
1397
1398                    if (pict_num >= s->rc_context.num_entries)
1399                        break;
1400                    if (!s->input_picture[i]) {
1401                        s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1402                        break;
1403                    }
1404
1405                    s->input_picture[i]->f->pict_type =
1406                        s->rc_context.entry[pict_num].new_pict_type;
1407                }
1408            }
1409
1410            if (s->b_frame_strategy == 0) {
1411                b_frames = s->max_b_frames;
1412                while (b_frames && !s->input_picture[b_frames])
1413                    b_frames--;
1414            } else if (s->b_frame_strategy == 1) {
1415                for (i = 1; i < s->max_b_frames + 1; i++) {
1416                    if (s->input_picture[i] &&
1417                        s->input_picture[i]->b_frame_score == 0) {
1418                        s->input_picture[i]->b_frame_score =
1419                            get_intra_count(s,
1420                                            s->input_picture[i    ]->f->data[0],
1421                                            s->input_picture[i - 1]->f->data[0],
1422                                            s->linesize) + 1;
1423                    }
1424                }
1425                for (i = 0; i < s->max_b_frames + 1; i++) {
1426                    if (!s->input_picture[i] ||
1427                        s->input_picture[i]->b_frame_score - 1 >
1428                            s->mb_num / s->b_sensitivity)
1429                        break;
1430                }
1431
1432                b_frames = FFMAX(0, i - 1);
1433
1434                /* reset scores */
1435                for (i = 0; i < b_frames + 1; i++) {
1436                    s->input_picture[i]->b_frame_score = 0;
1437                }
1438            } else if (s->b_frame_strategy == 2) {
1439                b_frames = estimate_best_b_count(s);
1440                if (b_frames < 0)
1441                    return b_frames;
1442            }
1443
1444            emms_c();
1445
1446            for (i = b_frames - 1; i >= 0; i--) {
1447                int type = s->input_picture[i]->f->pict_type;
1448                if (type && type != AV_PICTURE_TYPE_B)
1449                    b_frames = i;
1450            }
1451            if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1452                b_frames == s->max_b_frames) {
1453                av_log(s->avctx, AV_LOG_ERROR,
1454                       "warning, too many B-frames in a row\n");
1455            }
1456
1457            if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1458                if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1459                    s->gop_size > s->picture_in_gop_number) {
1460                    b_frames = s->gop_size - s->picture_in_gop_number - 1;
1461                } else {
1462                    if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1463                        b_frames = 0;
1464                    s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1465                }
1466            }
1467
1468            if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1469                s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1470                b_frames--;
1471
1472            s->reordered_input_picture[0] = s->input_picture[b_frames];
1473            if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1474                s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1475            s->reordered_input_picture[0]->f->coded_picture_number =
1476                s->coded_picture_number++;
1477            for (i = 0; i < b_frames; i++) {
1478                s->reordered_input_picture[i + 1] = s->input_picture[i];
1479                s->reordered_input_picture[i + 1]->f->pict_type =
1480                    AV_PICTURE_TYPE_B;
1481                s->reordered_input_picture[i + 1]->f->coded_picture_number =
1482                    s->coded_picture_number++;
1483            }
1484        }
1485    }
1486no_output_pic:
1487    av_frame_unref(s->new_picture);
1488
1489    if (s->reordered_input_picture[0]) {
1490        s->reordered_input_picture[0]->reference =
1491           s->reordered_input_picture[0]->f->pict_type !=
1492               AV_PICTURE_TYPE_B ? 3 : 0;
1493
1494        if ((ret = av_frame_ref(s->new_picture,
1495                                s->reordered_input_picture[0]->f)))
1496            return ret;
1497
1498        if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1499            // input is a shared pix, so we can't modify it -> allocate a new
1500            // one & ensure that the shared one is reuseable
1501
1502            Picture *pic;
1503            int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1504            if (i < 0)
1505                return i;
1506            pic = &s->picture[i];
1507
1508            pic->reference = s->reordered_input_picture[0]->reference;
1509            if (alloc_picture(s, pic, 0) < 0) {
1510                return -1;
1511            }
1512
1513            ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1514            if (ret < 0)
1515                return ret;
1516
1517            /* mark us unused / free shared pic */
1518            av_frame_unref(s->reordered_input_picture[0]->f);
1519            s->reordered_input_picture[0]->shared = 0;
1520
1521            s->current_picture_ptr = pic;
1522        } else {
1523            // input is not a shared pix -> reuse buffer for current_pix
1524            s->current_picture_ptr = s->reordered_input_picture[0];
1525            for (i = 0; i < 4; i++) {
1526                if (s->new_picture->data[i])
1527                    s->new_picture->data[i] += INPLACE_OFFSET;
1528            }
1529        }
1530        s->picture_number = s->new_picture->display_picture_number;
1531    }
1532    return 0;
1533}
1534
1535static void frame_end(MpegEncContext *s)
1536{
1537    if (s->unrestricted_mv &&
1538        s->current_picture.reference &&
1539        !s->intra_only) {
1540        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1541        int hshift = desc->log2_chroma_w;
1542        int vshift = desc->log2_chroma_h;
1543        s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1544                                s->current_picture.f->linesize[0],
1545                                s->h_edge_pos, s->v_edge_pos,
1546                                EDGE_WIDTH, EDGE_WIDTH,
1547                                EDGE_TOP | EDGE_BOTTOM);
1548        s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1549                                s->current_picture.f->linesize[1],
1550                                s->h_edge_pos >> hshift,
1551                                s->v_edge_pos >> vshift,
1552                                EDGE_WIDTH >> hshift,
1553                                EDGE_WIDTH >> vshift,
1554                                EDGE_TOP | EDGE_BOTTOM);
1555        s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1556                                s->current_picture.f->linesize[2],
1557                                s->h_edge_pos >> hshift,
1558                                s->v_edge_pos >> vshift,
1559                                EDGE_WIDTH >> hshift,
1560                                EDGE_WIDTH >> vshift,
1561                                EDGE_TOP | EDGE_BOTTOM);
1562    }
1563
1564    emms_c();
1565
1566    s->last_pict_type                 = s->pict_type;
1567    s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1568    if (s->pict_type!= AV_PICTURE_TYPE_B)
1569        s->last_non_b_pict_type = s->pict_type;
1570}
1571
1572static void update_noise_reduction(MpegEncContext *s)
1573{
1574    int intra, i;
1575
1576    for (intra = 0; intra < 2; intra++) {
1577        if (s->dct_count[intra] > (1 << 16)) {
1578            for (i = 0; i < 64; i++) {
1579                s->dct_error_sum[intra][i] >>= 1;
1580            }
1581            s->dct_count[intra] >>= 1;
1582        }
1583
1584        for (i = 0; i < 64; i++) {
1585            s->dct_offset[intra][i] = (s->noise_reduction *
1586                                       s->dct_count[intra] +
1587                                       s->dct_error_sum[intra][i] / 2) /
1588                                      (s->dct_error_sum[intra][i] + 1);
1589        }
1590    }
1591}
1592
1593static int frame_start(MpegEncContext *s)
1594{
1595    int ret;
1596
1597    /* mark & release old frames */
1598    if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1599        s->last_picture_ptr != s->next_picture_ptr &&
1600        s->last_picture_ptr->f->buf[0]) {
1601        ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1602    }
1603
1604    s->current_picture_ptr->f->pict_type = s->pict_type;
1605    s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1606
1607    ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1608    if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1609                                   s->current_picture_ptr)) < 0)
1610        return ret;
1611
1612    if (s->pict_type != AV_PICTURE_TYPE_B) {
1613        s->last_picture_ptr = s->next_picture_ptr;
1614        s->next_picture_ptr = s->current_picture_ptr;
1615    }
1616
1617    if (s->last_picture_ptr) {
1618        ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1619        if (s->last_picture_ptr->f->buf[0] &&
1620            (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1621                                       s->last_picture_ptr)) < 0)
1622            return ret;
1623    }
1624    if (s->next_picture_ptr) {
1625        ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1626        if (s->next_picture_ptr->f->buf[0] &&
1627            (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1628                                       s->next_picture_ptr)) < 0)
1629            return ret;
1630    }
1631
1632    if (s->picture_structure!= PICT_FRAME) {
1633        int i;
1634        for (i = 0; i < 4; i++) {
1635            if (s->picture_structure == PICT_BOTTOM_FIELD) {
1636                s->current_picture.f->data[i] +=
1637                    s->current_picture.f->linesize[i];
1638            }
1639            s->current_picture.f->linesize[i] *= 2;
1640            s->last_picture.f->linesize[i]    *= 2;
1641            s->next_picture.f->linesize[i]    *= 2;
1642        }
1643    }
1644
1645    if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1646        s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1647        s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1648    } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1649        s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1650        s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1651    } else {
1652        s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1653        s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1654    }
1655
1656    if (s->dct_error_sum) {
1657        av_assert2(s->noise_reduction && s->encoding);
1658        update_noise_reduction(s);
1659    }
1660
1661    return 0;
1662}
1663
1664int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1665                          const AVFrame *pic_arg, int *got_packet)
1666{
1667    MpegEncContext *s = avctx->priv_data;
1668    int i, stuffing_count, ret;
1669    int context_count = s->slice_context_count;
1670
1671    s->vbv_ignore_qmax = 0;
1672
1673    s->picture_in_gop_number++;
1674
1675    if (load_input_picture(s, pic_arg) < 0)
1676        return -1;
1677
1678    if (select_input_picture(s) < 0) {
1679        return -1;
1680    }
1681
1682    /* output? */
1683    if (s->new_picture->data[0]) {
1684        int growing_buffer = context_count == 1 && !s->data_partitioning;
1685        size_t pkt_size = 10000 + s->mb_width * s->mb_height *
1686                                  (growing_buffer ? 64 : (MAX_MB_BYTES + 100));
1687        if (CONFIG_MJPEG_ENCODER && avctx->codec_id == AV_CODEC_ID_MJPEG) {
1688            ret = ff_mjpeg_add_icc_profile_size(avctx, s->new_picture, &pkt_size);
1689            if (ret < 0)
1690                return ret;
1691        }
1692        if ((ret = ff_alloc_packet(avctx, pkt, pkt_size)) < 0)
1693            return ret;
1694        pkt->size = avctx->internal->byte_buffer_size - AV_INPUT_BUFFER_PADDING_SIZE;
1695        if (s->mb_info) {
1696            s->mb_info_ptr = av_packet_new_side_data(pkt,
1697                                 AV_PKT_DATA_H263_MB_INFO,
1698                                 s->mb_width*s->mb_height*12);
1699            s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1700        }
1701
1702        for (i = 0; i < context_count; i++) {
1703            int start_y = s->thread_context[i]->start_mb_y;
1704            int   end_y = s->thread_context[i]->  end_mb_y;
1705            int h       = s->mb_height;
1706            uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1707            uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1708
1709            init_put_bits(&s->thread_context[i]->pb, start, end - start);
1710        }
1711
1712        s->pict_type = s->new_picture->pict_type;
1713        //emms_c();
1714        ret = frame_start(s);
1715        if (ret < 0)
1716            return ret;
1717vbv_retry:
1718        ret = encode_picture(s, s->picture_number);
1719        if (growing_buffer) {
1720            av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1721            pkt->data = s->pb.buf;
1722            pkt->size = avctx->internal->byte_buffer_size;
1723        }
1724        if (ret < 0)
1725            return -1;
1726
1727        frame_end(s);
1728
1729       if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) && s->out_format == FMT_MJPEG)
1730            ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1731
1732        if (avctx->rc_buffer_size) {
1733            RateControlContext *rcc = &s->rc_context;
1734            int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1735            int hq = (avctx->mb_decision == FF_MB_DECISION_RD || avctx->trellis);
1736            int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
1737
1738            if (put_bits_count(&s->pb) > max_size &&
1739                s->lambda < s->lmax) {
1740                s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
1741                                       (s->qscale + 1) / s->qscale);
1742                if (s->adaptive_quant) {
1743                    int i;
1744                    for (i = 0; i < s->mb_height * s->mb_stride; i++)
1745                        s->lambda_table[i] =
1746                            FFMAX(s->lambda_table[i] + min_step,
1747                                  s->lambda_table[i] * (s->qscale + 1) /
1748                                  s->qscale);
1749                }
1750                s->mb_skipped = 0;        // done in frame_start()
1751                // done in encode_picture() so we must undo it
1752                if (s->pict_type == AV_PICTURE_TYPE_P) {
1753                    if (s->flipflop_rounding          ||
1754                        s->codec_id == AV_CODEC_ID_H263P ||
1755                        s->codec_id == AV_CODEC_ID_MPEG4)
1756                        s->no_rounding ^= 1;
1757                }
1758                if (s->pict_type != AV_PICTURE_TYPE_B) {
1759                    s->time_base       = s->last_time_base;
1760                    s->last_non_b_time = s->time - s->pp_time;
1761                }
1762                for (i = 0; i < context_count; i++) {
1763                    PutBitContext *pb = &s->thread_context[i]->pb;
1764                    init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1765                }
1766                s->vbv_ignore_qmax = 1;
1767                av_log(avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1768                goto vbv_retry;
1769            }
1770
1771            av_assert0(avctx->rc_max_rate);
1772        }
1773
1774        if (avctx->flags & AV_CODEC_FLAG_PASS1)
1775            ff_write_pass1_stats(s);
1776
1777        for (i = 0; i < 4; i++) {
1778            s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1779            avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1780        }
1781        ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1782                                       s->current_picture_ptr->encoding_error,
1783                                       (avctx->flags&AV_CODEC_FLAG_PSNR) ? MPEGVIDEO_MAX_PLANES : 0,
1784                                       s->pict_type);
1785
1786        if (avctx->flags & AV_CODEC_FLAG_PASS1)
1787            assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1788                                             s->misc_bits + s->i_tex_bits +
1789                                             s->p_tex_bits);
1790        flush_put_bits(&s->pb);
1791        s->frame_bits  = put_bits_count(&s->pb);
1792
1793        stuffing_count = ff_vbv_update(s, s->frame_bits);
1794        s->stuffing_bits = 8*stuffing_count;
1795        if (stuffing_count) {
1796            if (put_bytes_left(&s->pb, 0) < stuffing_count + 50) {
1797                av_log(avctx, AV_LOG_ERROR, "stuffing too large\n");
1798                return -1;
1799            }
1800
1801            switch (s->codec_id) {
1802            case AV_CODEC_ID_MPEG1VIDEO:
1803            case AV_CODEC_ID_MPEG2VIDEO:
1804                while (stuffing_count--) {
1805                    put_bits(&s->pb, 8, 0);
1806                }
1807            break;
1808            case AV_CODEC_ID_MPEG4:
1809                put_bits(&s->pb, 16, 0);
1810                put_bits(&s->pb, 16, 0x1C3);
1811                stuffing_count -= 4;
1812                while (stuffing_count--) {
1813                    put_bits(&s->pb, 8, 0xFF);
1814                }
1815            break;
1816            default:
1817                av_log(avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1818                s->stuffing_bits = 0;
1819            }
1820            flush_put_bits(&s->pb);
1821            s->frame_bits  = put_bits_count(&s->pb);
1822        }
1823
1824        /* update MPEG-1/2 vbv_delay for CBR */
1825        if (avctx->rc_max_rate                          &&
1826            avctx->rc_min_rate == avctx->rc_max_rate &&
1827            s->out_format == FMT_MPEG1                     &&
1828            90000LL * (avctx->rc_buffer_size - 1) <=
1829                avctx->rc_max_rate * 0xFFFFLL) {
1830            AVCPBProperties *props;
1831            size_t props_size;
1832
1833            int vbv_delay, min_delay;
1834            double inbits  = avctx->rc_max_rate *
1835                             av_q2d(avctx->time_base);
1836            int    minbits = s->frame_bits - 8 *
1837                             (s->vbv_delay_pos - 1);
1838            double bits    = s->rc_context.buffer_index + minbits - inbits;
1839            uint8_t *const vbv_delay_ptr = s->pb.buf + s->vbv_delay_pos;
1840
1841            if (bits < 0)
1842                av_log(avctx, AV_LOG_ERROR,
1843                       "Internal error, negative bits\n");
1844
1845            av_assert1(s->repeat_first_field == 0);
1846
1847            vbv_delay = bits * 90000 / avctx->rc_max_rate;
1848            min_delay = (minbits * 90000LL + avctx->rc_max_rate - 1) /
1849                        avctx->rc_max_rate;
1850
1851            vbv_delay = FFMAX(vbv_delay, min_delay);
1852
1853            av_assert0(vbv_delay < 0xFFFF);
1854
1855            vbv_delay_ptr[0] &= 0xF8;
1856            vbv_delay_ptr[0] |= vbv_delay >> 13;
1857            vbv_delay_ptr[1]  = vbv_delay >> 5;
1858            vbv_delay_ptr[2] &= 0x07;
1859            vbv_delay_ptr[2] |= vbv_delay << 3;
1860
1861            props = av_cpb_properties_alloc(&props_size);
1862            if (!props)
1863                return AVERROR(ENOMEM);
1864            props->vbv_delay = vbv_delay * 300;
1865
1866            ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1867                                          (uint8_t*)props, props_size);
1868            if (ret < 0) {
1869                av_freep(&props);
1870                return ret;
1871            }
1872        }
1873        s->total_bits     += s->frame_bits;
1874
1875        pkt->pts = s->current_picture.f->pts;
1876        if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1877            if (!s->current_picture.f->coded_picture_number)
1878                pkt->dts = pkt->pts - s->dts_delta;
1879            else
1880                pkt->dts = s->reordered_pts;
1881            s->reordered_pts = pkt->pts;
1882        } else
1883            pkt->dts = pkt->pts;
1884        if (s->current_picture.f->key_frame)
1885            pkt->flags |= AV_PKT_FLAG_KEY;
1886        if (s->mb_info)
1887            av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1888    } else {
1889        s->frame_bits = 0;
1890    }
1891
1892    /* release non-reference frames */
1893    for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1894        if (!s->picture[i].reference)
1895            ff_mpeg_unref_picture(avctx, &s->picture[i]);
1896    }
1897
1898    av_assert1((s->frame_bits & 7) == 0);
1899
1900    pkt->size = s->frame_bits / 8;
1901    *got_packet = !!pkt->size;
1902    return 0;
1903}
1904
1905static inline void dct_single_coeff_elimination(MpegEncContext *s,
1906                                                int n, int threshold)
1907{
1908    static const char tab[64] = {
1909        3, 2, 2, 1, 1, 1, 1, 1,
1910        1, 1, 1, 1, 1, 1, 1, 1,
1911        1, 1, 1, 1, 1, 1, 1, 1,
1912        0, 0, 0, 0, 0, 0, 0, 0,
1913        0, 0, 0, 0, 0, 0, 0, 0,
1914        0, 0, 0, 0, 0, 0, 0, 0,
1915        0, 0, 0, 0, 0, 0, 0, 0,
1916        0, 0, 0, 0, 0, 0, 0, 0
1917    };
1918    int score = 0;
1919    int run = 0;
1920    int i;
1921    int16_t *block = s->block[n];
1922    const int last_index = s->block_last_index[n];
1923    int skip_dc;
1924
1925    if (threshold < 0) {
1926        skip_dc = 0;
1927        threshold = -threshold;
1928    } else
1929        skip_dc = 1;
1930
1931    /* Are all we could set to zero already zero? */
1932    if (last_index <= skip_dc - 1)
1933        return;
1934
1935    for (i = 0; i <= last_index; i++) {
1936        const int j = s->intra_scantable.permutated[i];
1937        const int level = FFABS(block[j]);
1938        if (level == 1) {
1939            if (skip_dc && i == 0)
1940                continue;
1941            score += tab[run];
1942            run = 0;
1943        } else if (level > 1) {
1944            return;
1945        } else {
1946            run++;
1947        }
1948    }
1949    if (score >= threshold)
1950        return;
1951    for (i = skip_dc; i <= last_index; i++) {
1952        const int j = s->intra_scantable.permutated[i];
1953        block[j] = 0;
1954    }
1955    if (block[0])
1956        s->block_last_index[n] = 0;
1957    else
1958        s->block_last_index[n] = -1;
1959}
1960
1961static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1962                               int last_index)
1963{
1964    int i;
1965    const int maxlevel = s->max_qcoeff;
1966    const int minlevel = s->min_qcoeff;
1967    int overflow = 0;
1968
1969    if (s->mb_intra) {
1970        i = 1; // skip clipping of intra dc
1971    } else
1972        i = 0;
1973
1974    for (; i <= last_index; i++) {
1975        const int j = s->intra_scantable.permutated[i];
1976        int level = block[j];
1977
1978        if (level > maxlevel) {
1979            level = maxlevel;
1980            overflow++;
1981        } else if (level < minlevel) {
1982            level = minlevel;
1983            overflow++;
1984        }
1985
1986        block[j] = level;
1987    }
1988
1989    if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1990        av_log(s->avctx, AV_LOG_INFO,
1991               "warning, clipping %d dct coefficients to %d..%d\n",
1992               overflow, minlevel, maxlevel);
1993}
1994
1995static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1996{
1997    int x, y;
1998    // FIXME optimize
1999    for (y = 0; y < 8; y++) {
2000        for (x = 0; x < 8; x++) {
2001            int x2, y2;
2002            int sum = 0;
2003            int sqr = 0;
2004            int count = 0;
2005
2006            for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2007                for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2008                    int v = ptr[x2 + y2 * stride];
2009                    sum += v;
2010                    sqr += v * v;
2011                    count++;
2012                }
2013            }
2014            weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2015        }
2016    }
2017}
2018
2019static av_always_inline void encode_mb_internal(MpegEncContext *s,
2020                                                int motion_x, int motion_y,
2021                                                int mb_block_height,
2022                                                int mb_block_width,
2023                                                int mb_block_count,
2024                                                int chroma_x_shift,
2025                                                int chroma_y_shift,
2026                                                int chroma_format)
2027{
2028/* Interlaced DCT is only possible with MPEG-2 and MPEG-4
2029 * and neither of these encoders currently supports 444. */
2030#define INTERLACED_DCT(s) ((chroma_format == CHROMA_420 || chroma_format == CHROMA_422) && \
2031                           (s)->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
2032    int16_t weight[12][64];
2033    int16_t orig[12][64];
2034    const int mb_x = s->mb_x;
2035    const int mb_y = s->mb_y;
2036    int i;
2037    int skip_dct[12];
2038    int dct_offset = s->linesize * 8; // default for progressive frames
2039    int uv_dct_offset = s->uvlinesize * 8;
2040    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2041    ptrdiff_t wrap_y, wrap_c;
2042
2043    for (i = 0; i < mb_block_count; i++)
2044        skip_dct[i] = s->skipdct;
2045
2046    if (s->adaptive_quant) {
2047        const int last_qp = s->qscale;
2048        const int mb_xy = mb_x + mb_y * s->mb_stride;
2049
2050        s->lambda = s->lambda_table[mb_xy];
2051        update_qscale(s);
2052
2053        if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2054            s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2055            s->dquant = s->qscale - last_qp;
2056
2057            if (s->out_format == FMT_H263) {
2058                s->dquant = av_clip(s->dquant, -2, 2);
2059
2060                if (s->codec_id == AV_CODEC_ID_MPEG4) {
2061                    if (!s->mb_intra) {
2062                        if (s->pict_type == AV_PICTURE_TYPE_B) {
2063                            if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2064                                s->dquant = 0;
2065                        }
2066                        if (s->mv_type == MV_TYPE_8X8)
2067                            s->dquant = 0;
2068                    }
2069                }
2070            }
2071        }
2072        ff_set_qscale(s, last_qp + s->dquant);
2073    } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2074        ff_set_qscale(s, s->qscale + s->dquant);
2075
2076    wrap_y = s->linesize;
2077    wrap_c = s->uvlinesize;
2078    ptr_y  = s->new_picture->data[0] +
2079             (mb_y * 16 * wrap_y)              + mb_x * 16;
2080    ptr_cb = s->new_picture->data[1] +
2081             (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2082    ptr_cr = s->new_picture->data[2] +
2083             (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2084
2085    if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2086        uint8_t *ebuf = s->sc.edge_emu_buffer + 38 * wrap_y;
2087        int cw = (s->width  + chroma_x_shift) >> chroma_x_shift;
2088        int ch = (s->height + chroma_y_shift) >> chroma_y_shift;
2089        s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2090                                 wrap_y, wrap_y,
2091                                 16, 16, mb_x * 16, mb_y * 16,
2092                                 s->width, s->height);
2093        ptr_y = ebuf;
2094        s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2095                                 wrap_c, wrap_c,
2096                                 mb_block_width, mb_block_height,
2097                                 mb_x * mb_block_width, mb_y * mb_block_height,
2098                                 cw, ch);
2099        ptr_cb = ebuf + 16 * wrap_y;
2100        s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2101                                 wrap_c, wrap_c,
2102                                 mb_block_width, mb_block_height,
2103                                 mb_x * mb_block_width, mb_y * mb_block_height,
2104                                 cw, ch);
2105        ptr_cr = ebuf + 16 * wrap_y + 16;
2106    }
2107
2108    if (s->mb_intra) {
2109        if (INTERLACED_DCT(s)) {
2110            int progressive_score, interlaced_score;
2111
2112            s->interlaced_dct = 0;
2113            progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2114                                s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2115                                                     NULL, wrap_y, 8) - 400;
2116
2117            if (progressive_score > 0) {
2118                interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2119                                                        NULL, wrap_y * 2, 8) +
2120                                   s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2121                                                        NULL, wrap_y * 2, 8);
2122                if (progressive_score > interlaced_score) {
2123                    s->interlaced_dct = 1;
2124
2125                    dct_offset = wrap_y;
2126                    uv_dct_offset = wrap_c;
2127                    wrap_y <<= 1;
2128                    if (chroma_format == CHROMA_422 ||
2129                        chroma_format == CHROMA_444)
2130                        wrap_c <<= 1;
2131                }
2132            }
2133        }
2134
2135        s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2136        s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2137        s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2138        s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2139
2140        if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2141            skip_dct[4] = 1;
2142            skip_dct[5] = 1;
2143        } else {
2144            s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2145            s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2146            if (chroma_format == CHROMA_422) {
2147                s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2148                s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2149            } else if (chroma_format == CHROMA_444) {
2150                s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2151                s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2152                s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2153                s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2154                s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2155                s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2156            }
2157        }
2158    } else {
2159        op_pixels_func (*op_pix)[4];
2160        qpel_mc_func (*op_qpix)[16];
2161        uint8_t *dest_y, *dest_cb, *dest_cr;
2162
2163        dest_y  = s->dest[0];
2164        dest_cb = s->dest[1];
2165        dest_cr = s->dest[2];
2166
2167        if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2168            op_pix  = s->hdsp.put_pixels_tab;
2169            op_qpix = s->qdsp.put_qpel_pixels_tab;
2170        } else {
2171            op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2172            op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2173        }
2174
2175        if (s->mv_dir & MV_DIR_FORWARD) {
2176            ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2177                          s->last_picture.f->data,
2178                          op_pix, op_qpix);
2179            op_pix  = s->hdsp.avg_pixels_tab;
2180            op_qpix = s->qdsp.avg_qpel_pixels_tab;
2181        }
2182        if (s->mv_dir & MV_DIR_BACKWARD) {
2183            ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2184                          s->next_picture.f->data,
2185                          op_pix, op_qpix);
2186        }
2187
2188        if (INTERLACED_DCT(s)) {
2189            int progressive_score, interlaced_score;
2190
2191            s->interlaced_dct = 0;
2192            progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2193                                s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2194                                                     ptr_y + wrap_y * 8,
2195                                                     wrap_y, 8) - 400;
2196
2197            if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2198                progressive_score -= 400;
2199
2200            if (progressive_score > 0) {
2201                interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2202                                                        wrap_y * 2, 8) +
2203                                   s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2204                                                        ptr_y + wrap_y,
2205                                                        wrap_y * 2, 8);
2206
2207                if (progressive_score > interlaced_score) {
2208                    s->interlaced_dct = 1;
2209
2210                    dct_offset = wrap_y;
2211                    uv_dct_offset = wrap_c;
2212                    wrap_y <<= 1;
2213                    if (chroma_format == CHROMA_422)
2214                        wrap_c <<= 1;
2215                }
2216            }
2217        }
2218
2219        s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2220        s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2221        s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2222                            dest_y + dct_offset, wrap_y);
2223        s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2224                            dest_y + dct_offset + 8, wrap_y);
2225
2226        if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2227            skip_dct[4] = 1;
2228            skip_dct[5] = 1;
2229        } else {
2230            s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2231            s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2232            if (!chroma_y_shift) { /* 422 */
2233                s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2234                                    dest_cb + uv_dct_offset, wrap_c);
2235                s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2236                                    dest_cr + uv_dct_offset, wrap_c);
2237            }
2238        }
2239        /* pre quantization */
2240        if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2241                2 * s->qscale * s->qscale) {
2242            // FIXME optimize
2243            if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2244                skip_dct[0] = 1;
2245            if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2246                skip_dct[1] = 1;
2247            if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2248                               wrap_y, 8) < 20 * s->qscale)
2249                skip_dct[2] = 1;
2250            if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2251                               wrap_y, 8) < 20 * s->qscale)
2252                skip_dct[3] = 1;
2253            if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2254                skip_dct[4] = 1;
2255            if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2256                skip_dct[5] = 1;
2257            if (!chroma_y_shift) { /* 422 */
2258                if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2259                                   dest_cb + uv_dct_offset,
2260                                   wrap_c, 8) < 20 * s->qscale)
2261                    skip_dct[6] = 1;
2262                if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2263                                   dest_cr + uv_dct_offset,
2264                                   wrap_c, 8) < 20 * s->qscale)
2265                    skip_dct[7] = 1;
2266            }
2267        }
2268    }
2269
2270    if (s->quantizer_noise_shaping) {
2271        if (!skip_dct[0])
2272            get_visual_weight(weight[0], ptr_y                 , wrap_y);
2273        if (!skip_dct[1])
2274            get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2275        if (!skip_dct[2])
2276            get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2277        if (!skip_dct[3])
2278            get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2279        if (!skip_dct[4])
2280            get_visual_weight(weight[4], ptr_cb                , wrap_c);
2281        if (!skip_dct[5])
2282            get_visual_weight(weight[5], ptr_cr                , wrap_c);
2283        if (!chroma_y_shift) { /* 422 */
2284            if (!skip_dct[6])
2285                get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2286                                  wrap_c);
2287            if (!skip_dct[7])
2288                get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2289                                  wrap_c);
2290        }
2291        memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2292    }
2293
2294    /* DCT & quantize */
2295    av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2296    {
2297        for (i = 0; i < mb_block_count; i++) {
2298            if (!skip_dct[i]) {
2299                int overflow;
2300                s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2301                // FIXME we could decide to change to quantizer instead of
2302                // clipping
2303                // JS: I don't think that would be a good idea it could lower
2304                //     quality instead of improve it. Just INTRADC clipping
2305                //     deserves changes in quantizer
2306                if (overflow)
2307                    clip_coeffs(s, s->block[i], s->block_last_index[i]);
2308            } else
2309                s->block_last_index[i] = -1;
2310        }
2311        if (s->quantizer_noise_shaping) {
2312            for (i = 0; i < mb_block_count; i++) {
2313                if (!skip_dct[i]) {
2314                    s->block_last_index[i] =
2315                        dct_quantize_refine(s, s->block[i], weight[i],
2316                                            orig[i], i, s->qscale);
2317                }
2318            }
2319        }
2320
2321        if (s->luma_elim_threshold && !s->mb_intra)
2322            for (i = 0; i < 4; i++)
2323                dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2324        if (s->chroma_elim_threshold && !s->mb_intra)
2325            for (i = 4; i < mb_block_count; i++)
2326                dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2327
2328        if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2329            for (i = 0; i < mb_block_count; i++) {
2330                if (s->block_last_index[i] == -1)
2331                    s->coded_score[i] = INT_MAX / 256;
2332            }
2333        }
2334    }
2335
2336    if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2337        s->block_last_index[4] =
2338        s->block_last_index[5] = 0;
2339        s->block[4][0] =
2340        s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2341        if (!chroma_y_shift) { /* 422 / 444 */
2342            for (i=6; i<12; i++) {
2343                s->block_last_index[i] = 0;
2344                s->block[i][0] = s->block[4][0];
2345            }
2346        }
2347    }
2348
2349    // non c quantize code returns incorrect block_last_index FIXME
2350    if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2351        for (i = 0; i < mb_block_count; i++) {
2352            int j;
2353            if (s->block_last_index[i] > 0) {
2354                for (j = 63; j > 0; j--) {
2355                    if (s->block[i][s->intra_scantable.permutated[j]])
2356                        break;
2357                }
2358                s->block_last_index[i] = j;
2359            }
2360        }
2361    }
2362
2363    /* huffman encode */
2364    switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2365    case AV_CODEC_ID_MPEG1VIDEO:
2366    case AV_CODEC_ID_MPEG2VIDEO:
2367        if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2368            ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2369        break;
2370    case AV_CODEC_ID_MPEG4:
2371        if (CONFIG_MPEG4_ENCODER)
2372            ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2373        break;
2374    case AV_CODEC_ID_MSMPEG4V2:
2375    case AV_CODEC_ID_MSMPEG4V3:
2376    case AV_CODEC_ID_WMV1:
2377        if (CONFIG_MSMPEG4_ENCODER)
2378            ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2379        break;
2380    case AV_CODEC_ID_WMV2:
2381        if (CONFIG_WMV2_ENCODER)
2382            ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2383        break;
2384    case AV_CODEC_ID_H261:
2385        if (CONFIG_H261_ENCODER)
2386            ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2387        break;
2388    case AV_CODEC_ID_H263:
2389    case AV_CODEC_ID_H263P:
2390    case AV_CODEC_ID_FLV1:
2391    case AV_CODEC_ID_RV10:
2392    case AV_CODEC_ID_RV20:
2393        if (CONFIG_H263_ENCODER)
2394            ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2395        break;
2396#if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
2397    case AV_CODEC_ID_MJPEG:
2398    case AV_CODEC_ID_AMV:
2399        ff_mjpeg_encode_mb(s, s->block);
2400        break;
2401#endif
2402    case AV_CODEC_ID_SPEEDHQ:
2403        if (CONFIG_SPEEDHQ_ENCODER)
2404            ff_speedhq_encode_mb(s, s->block);
2405        break;
2406    default:
2407        av_assert1(0);
2408    }
2409}
2410
2411static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2412{
2413    if (s->chroma_format == CHROMA_420)
2414        encode_mb_internal(s, motion_x, motion_y,  8, 8, 6, 1, 1, CHROMA_420);
2415    else if (s->chroma_format == CHROMA_422)
2416        encode_mb_internal(s, motion_x, motion_y, 16, 8, 8, 1, 0, CHROMA_422);
2417    else
2418        encode_mb_internal(s, motion_x, motion_y, 16, 16, 12, 0, 0, CHROMA_444);
2419}
2420
2421static inline void copy_context_before_encode(MpegEncContext *d,
2422                                              const MpegEncContext *s)
2423{
2424    int i;
2425
2426    memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2427
2428    /* MPEG-1 */
2429    d->mb_skip_run= s->mb_skip_run;
2430    for(i=0; i<3; i++)
2431        d->last_dc[i] = s->last_dc[i];
2432
2433    /* statistics */
2434    d->mv_bits= s->mv_bits;
2435    d->i_tex_bits= s->i_tex_bits;
2436    d->p_tex_bits= s->p_tex_bits;
2437    d->i_count= s->i_count;
2438    d->skip_count= s->skip_count;
2439    d->misc_bits= s->misc_bits;
2440    d->last_bits= 0;
2441
2442    d->mb_skipped= 0;
2443    d->qscale= s->qscale;
2444    d->dquant= s->dquant;
2445
2446    d->esc3_level_length= s->esc3_level_length;
2447}
2448
2449static inline void copy_context_after_encode(MpegEncContext *d,
2450                                             const MpegEncContext *s)
2451{
2452    int i;
2453
2454    memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2455    memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2456
2457    /* MPEG-1 */
2458    d->mb_skip_run= s->mb_skip_run;
2459    for(i=0; i<3; i++)
2460        d->last_dc[i] = s->last_dc[i];
2461
2462    /* statistics */
2463    d->mv_bits= s->mv_bits;
2464    d->i_tex_bits= s->i_tex_bits;
2465    d->p_tex_bits= s->p_tex_bits;
2466    d->i_count= s->i_count;
2467    d->skip_count= s->skip_count;
2468    d->misc_bits= s->misc_bits;
2469
2470    d->mb_intra= s->mb_intra;
2471    d->mb_skipped= s->mb_skipped;
2472    d->mv_type= s->mv_type;
2473    d->mv_dir= s->mv_dir;
2474    d->pb= s->pb;
2475    if(s->data_partitioning){
2476        d->pb2= s->pb2;
2477        d->tex_pb= s->tex_pb;
2478    }
2479    d->block= s->block;
2480    for(i=0; i<8; i++)
2481        d->block_last_index[i]= s->block_last_index[i];
2482    d->interlaced_dct= s->interlaced_dct;
2483    d->qscale= s->qscale;
2484
2485    d->esc3_level_length= s->esc3_level_length;
2486}
2487
2488static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best,
2489                           PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2490                           int *dmin, int *next_block, int motion_x, int motion_y)
2491{
2492    int score;
2493    uint8_t *dest_backup[3];
2494
2495    copy_context_before_encode(s, backup);
2496
2497    s->block= s->blocks[*next_block];
2498    s->pb= pb[*next_block];
2499    if(s->data_partitioning){
2500        s->pb2   = pb2   [*next_block];
2501        s->tex_pb= tex_pb[*next_block];
2502    }
2503
2504    if(*next_block){
2505        memcpy(dest_backup, s->dest, sizeof(s->dest));
2506        s->dest[0] = s->sc.rd_scratchpad;
2507        s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2508        s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2509        av_assert0(s->linesize >= 32); //FIXME
2510    }
2511
2512    encode_mb(s, motion_x, motion_y);
2513
2514    score= put_bits_count(&s->pb);
2515    if(s->data_partitioning){
2516        score+= put_bits_count(&s->pb2);
2517        score+= put_bits_count(&s->tex_pb);
2518    }
2519
2520    if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2521        ff_mpv_reconstruct_mb(s, s->block);
2522
2523        score *= s->lambda2;
2524        score += sse_mb(s) << FF_LAMBDA_SHIFT;
2525    }
2526
2527    if(*next_block){
2528        memcpy(s->dest, dest_backup, sizeof(s->dest));
2529    }
2530
2531    if(score<*dmin){
2532        *dmin= score;
2533        *next_block^=1;
2534
2535        copy_context_after_encode(best, s);
2536    }
2537}
2538
2539static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2540    const uint32_t *sq = ff_square_tab + 256;
2541    int acc=0;
2542    int x,y;
2543
2544    if(w==16 && h==16)
2545        return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2546    else if(w==8 && h==8)
2547        return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2548
2549    for(y=0; y<h; y++){
2550        for(x=0; x<w; x++){
2551            acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2552        }
2553    }
2554
2555    av_assert2(acc>=0);
2556
2557    return acc;
2558}
2559
2560static int sse_mb(MpegEncContext *s){
2561    int w= 16;
2562    int h= 16;
2563    int chroma_mb_w = w >> s->chroma_x_shift;
2564    int chroma_mb_h = h >> s->chroma_y_shift;
2565
2566    if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2567    if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2568
2569    if(w==16 && h==16)
2570      if(s->avctx->mb_cmp == FF_CMP_NSSE){
2571        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s->linesize * 16,
2572                               s->dest[0], s->linesize, 16) +
2573               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
2574                               s->dest[1], s->uvlinesize, chroma_mb_h) +
2575               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
2576                               s->dest[2], s->uvlinesize, chroma_mb_h);
2577      }else{
2578        return s->mecc.sse[0](NULL, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s->linesize * 16,
2579                              s->dest[0], s->linesize, 16) +
2580               s->mecc.sse[1](NULL, s->new_picture->data[1] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
2581                              s->dest[1], s->uvlinesize, chroma_mb_h) +
2582               s->mecc.sse[1](NULL, s->new_picture->data[2] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
2583                              s->dest[2], s->uvlinesize, chroma_mb_h);
2584      }
2585    else
2586        return  sse(s, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s->linesize * 16,
2587                    s->dest[0], w, h, s->linesize) +
2588                sse(s, s->new_picture->data[1] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
2589                    s->dest[1], w >> s->chroma_x_shift, h >> s->chroma_y_shift, s->uvlinesize) +
2590                sse(s, s->new_picture->data[2] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
2591                    s->dest[2], w >> s->chroma_x_shift, h >> s->chroma_y_shift, s->uvlinesize);
2592}
2593
2594static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2595    MpegEncContext *s= *(void**)arg;
2596
2597
2598    s->me.pre_pass=1;
2599    s->me.dia_size= s->avctx->pre_dia_size;
2600    s->first_slice_line=1;
2601    for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2602        for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2603            ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2604        }
2605        s->first_slice_line=0;
2606    }
2607
2608    s->me.pre_pass=0;
2609
2610    return 0;
2611}
2612
2613static int estimate_motion_thread(AVCodecContext *c, void *arg){
2614    MpegEncContext *s= *(void**)arg;
2615
2616    s->me.dia_size= s->avctx->dia_size;
2617    s->first_slice_line=1;
2618    for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2619        s->mb_x=0; //for block init below
2620        ff_init_block_index(s);
2621        for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2622            s->block_index[0]+=2;
2623            s->block_index[1]+=2;
2624            s->block_index[2]+=2;
2625            s->block_index[3]+=2;
2626
2627            /* compute motion vector & mb_type and store in context */
2628            if(s->pict_type==AV_PICTURE_TYPE_B)
2629                ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2630            else
2631                ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2632        }
2633        s->first_slice_line=0;
2634    }
2635    return 0;
2636}
2637
2638static int mb_var_thread(AVCodecContext *c, void *arg){
2639    MpegEncContext *s= *(void**)arg;
2640    int mb_x, mb_y;
2641
2642    for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2643        for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2644            int xx = mb_x * 16;
2645            int yy = mb_y * 16;
2646            uint8_t *pix = s->new_picture->data[0] + (yy * s->linesize) + xx;
2647            int varc;
2648            int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2649
2650            varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2651                    (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2652
2653            s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2654            s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2655            s->me.mb_var_sum_temp    += varc;
2656        }
2657    }
2658    return 0;
2659}
2660
2661static void write_slice_end(MpegEncContext *s){
2662    if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2663        if(s->partitioned_frame){
2664            ff_mpeg4_merge_partitions(s);
2665        }
2666
2667        ff_mpeg4_stuffing(&s->pb);
2668    } else if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
2669               s->out_format == FMT_MJPEG) {
2670        ff_mjpeg_encode_stuffing(s);
2671    } else if (CONFIG_SPEEDHQ_ENCODER && s->out_format == FMT_SPEEDHQ) {
2672        ff_speedhq_end_slice(s);
2673    }
2674
2675    flush_put_bits(&s->pb);
2676
2677    if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2678        s->misc_bits+= get_bits_diff(s);
2679}
2680
2681static void write_mb_info(MpegEncContext *s)
2682{
2683    uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2684    int offset = put_bits_count(&s->pb);
2685    int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2686    int gobn = s->mb_y / s->gob_index;
2687    int pred_x, pred_y;
2688    if (CONFIG_H263_ENCODER)
2689        ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2690    bytestream_put_le32(&ptr, offset);
2691    bytestream_put_byte(&ptr, s->qscale);
2692    bytestream_put_byte(&ptr, gobn);
2693    bytestream_put_le16(&ptr, mba);
2694    bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2695    bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2696    /* 4MV not implemented */
2697    bytestream_put_byte(&ptr, 0); /* hmv2 */
2698    bytestream_put_byte(&ptr, 0); /* vmv2 */
2699}
2700
2701static void update_mb_info(MpegEncContext *s, int startcode)
2702{
2703    if (!s->mb_info)
2704        return;
2705    if (put_bytes_count(&s->pb, 0) - s->prev_mb_info >= s->mb_info) {
2706        s->mb_info_size += 12;
2707        s->prev_mb_info = s->last_mb_info;
2708    }
2709    if (startcode) {
2710        s->prev_mb_info = put_bytes_count(&s->pb, 0);
2711        /* This might have incremented mb_info_size above, and we return without
2712         * actually writing any info into that slot yet. But in that case,
2713         * this will be called again at the start of the after writing the
2714         * start code, actually writing the mb info. */
2715        return;
2716    }
2717
2718    s->last_mb_info = put_bytes_count(&s->pb, 0);
2719    if (!s->mb_info_size)
2720        s->mb_info_size += 12;
2721    write_mb_info(s);
2722}
2723
2724int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2725{
2726    if (put_bytes_left(&s->pb, 0) < threshold
2727        && s->slice_context_count == 1
2728        && s->pb.buf == s->avctx->internal->byte_buffer) {
2729        int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2730
2731        uint8_t *new_buffer = NULL;
2732        int new_buffer_size = 0;
2733
2734        if ((s->avctx->internal->byte_buffer_size + size_increase) >= INT_MAX/8) {
2735            av_log(s->avctx, AV_LOG_ERROR, "Cannot reallocate putbit buffer\n");
2736            return AVERROR(ENOMEM);
2737        }
2738
2739        emms_c();
2740
2741        av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2742                              s->avctx->internal->byte_buffer_size + size_increase);
2743        if (!new_buffer)
2744            return AVERROR(ENOMEM);
2745
2746        memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2747        av_free(s->avctx->internal->byte_buffer);
2748        s->avctx->internal->byte_buffer      = new_buffer;
2749        s->avctx->internal->byte_buffer_size = new_buffer_size;
2750        rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2751        s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2752    }
2753    if (put_bytes_left(&s->pb, 0) < threshold)
2754        return AVERROR(EINVAL);
2755    return 0;
2756}
2757
2758static int encode_thread(AVCodecContext *c, void *arg){
2759    MpegEncContext *s= *(void**)arg;
2760    int mb_x, mb_y, mb_y_order;
2761    int chr_h= 16>>s->chroma_y_shift;
2762    int i, j;
2763    MpegEncContext best_s = { 0 }, backup_s;
2764    uint8_t bit_buf[2][MAX_MB_BYTES];
2765    uint8_t bit_buf2[2][MAX_MB_BYTES];
2766    uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2767    PutBitContext pb[2], pb2[2], tex_pb[2];
2768
2769    for(i=0; i<2; i++){
2770        init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2771        init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2772        init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2773    }
2774
2775    s->last_bits= put_bits_count(&s->pb);
2776    s->mv_bits=0;
2777    s->misc_bits=0;
2778    s->i_tex_bits=0;
2779    s->p_tex_bits=0;
2780    s->i_count=0;
2781    s->skip_count=0;
2782
2783    for(i=0; i<3; i++){
2784        /* init last dc values */
2785        /* note: quant matrix value (8) is implied here */
2786        s->last_dc[i] = 128 << s->intra_dc_precision;
2787
2788        s->current_picture.encoding_error[i] = 0;
2789    }
2790    if(s->codec_id==AV_CODEC_ID_AMV){
2791        s->last_dc[0] = 128*8/13;
2792        s->last_dc[1] = 128*8/14;
2793        s->last_dc[2] = 128*8/14;
2794    }
2795    s->mb_skip_run = 0;
2796    memset(s->last_mv, 0, sizeof(s->last_mv));
2797
2798    s->last_mv_dir = 0;
2799
2800    switch(s->codec_id){
2801    case AV_CODEC_ID_H263:
2802    case AV_CODEC_ID_H263P:
2803    case AV_CODEC_ID_FLV1:
2804        if (CONFIG_H263_ENCODER)
2805            s->gob_index = H263_GOB_HEIGHT(s->height);
2806        break;
2807    case AV_CODEC_ID_MPEG4:
2808        if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2809            ff_mpeg4_init_partitions(s);
2810        break;
2811    }
2812
2813    s->resync_mb_x=0;
2814    s->resync_mb_y=0;
2815    s->first_slice_line = 1;
2816    s->ptr_lastgob = s->pb.buf;
2817    for (mb_y_order = s->start_mb_y; mb_y_order < s->end_mb_y; mb_y_order++) {
2818        if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
2819            int first_in_slice;
2820            mb_y = ff_speedhq_mb_y_order_to_mb(mb_y_order, s->mb_height, &first_in_slice);
2821            if (first_in_slice && mb_y_order != s->start_mb_y)
2822                ff_speedhq_end_slice(s);
2823            s->last_dc[0] = s->last_dc[1] = s->last_dc[2] = 1024 << s->intra_dc_precision;
2824        } else {
2825            mb_y = mb_y_order;
2826        }
2827        s->mb_x=0;
2828        s->mb_y= mb_y;
2829
2830        ff_set_qscale(s, s->qscale);
2831        ff_init_block_index(s);
2832
2833        for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2834            int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2835            int mb_type= s->mb_type[xy];
2836//            int d;
2837            int dmin= INT_MAX;
2838            int dir;
2839            int size_increase =  s->avctx->internal->byte_buffer_size/4
2840                               + s->mb_width*MAX_MB_BYTES;
2841
2842            ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2843            if (put_bytes_left(&s->pb, 0) < MAX_MB_BYTES){
2844                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2845                return -1;
2846            }
2847            if(s->data_partitioning){
2848                if (put_bytes_left(&s->pb2,    0) < MAX_MB_BYTES ||
2849                    put_bytes_left(&s->tex_pb, 0) < MAX_MB_BYTES) {
2850                    av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2851                    return -1;
2852                }
2853            }
2854
2855            s->mb_x = mb_x;
2856            s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2857            ff_update_block_index(s);
2858
2859            if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2860                ff_h261_reorder_mb_index(s);
2861                xy= s->mb_y*s->mb_stride + s->mb_x;
2862                mb_type= s->mb_type[xy];
2863            }
2864
2865            /* write gob / video packet header  */
2866            if(s->rtp_mode){
2867                int current_packet_size, is_gob_start;
2868
2869                current_packet_size = put_bytes_count(&s->pb, 1)
2870                                      - (s->ptr_lastgob - s->pb.buf);
2871
2872                is_gob_start = s->rtp_payload_size &&
2873                               current_packet_size >= s->rtp_payload_size &&
2874                               mb_y + mb_x > 0;
2875
2876                if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2877
2878                switch(s->codec_id){
2879                case AV_CODEC_ID_H263:
2880                case AV_CODEC_ID_H263P:
2881                    if(!s->h263_slice_structured)
2882                        if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2883                    break;
2884                case AV_CODEC_ID_MPEG2VIDEO:
2885                    if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2886                case AV_CODEC_ID_MPEG1VIDEO:
2887                    if(s->mb_skip_run) is_gob_start=0;
2888                    break;
2889                case AV_CODEC_ID_MJPEG:
2890                    if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2891                    break;
2892                }
2893
2894                if(is_gob_start){
2895                    if(s->start_mb_y != mb_y || mb_x!=0){
2896                        write_slice_end(s);
2897
2898                        if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2899                            ff_mpeg4_init_partitions(s);
2900                        }
2901                    }
2902
2903                    av_assert2((put_bits_count(&s->pb)&7) == 0);
2904                    current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2905
2906                    if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2907                        int r = put_bytes_count(&s->pb, 0) + s->picture_number + 16 + s->mb_x + s->mb_y;
2908                        int d = 100 / s->error_rate;
2909                        if(r % d == 0){
2910                            current_packet_size=0;
2911                            s->pb.buf_ptr= s->ptr_lastgob;
2912                            av_assert1(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2913                        }
2914                    }
2915
2916                    switch(s->codec_id){
2917                    case AV_CODEC_ID_MPEG4:
2918                        if (CONFIG_MPEG4_ENCODER) {
2919                            ff_mpeg4_encode_video_packet_header(s);
2920                            ff_mpeg4_clean_buffers(s);
2921                        }
2922                    break;
2923                    case AV_CODEC_ID_MPEG1VIDEO:
2924                    case AV_CODEC_ID_MPEG2VIDEO:
2925                        if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2926                            ff_mpeg1_encode_slice_header(s);
2927                            ff_mpeg1_clean_buffers(s);
2928                        }
2929                    break;
2930                    case AV_CODEC_ID_H263:
2931                    case AV_CODEC_ID_H263P:
2932                        if (CONFIG_H263_ENCODER) {
2933                            update_mb_info(s, 1);
2934                            ff_h263_encode_gob_header(s, mb_y);
2935                        }
2936                    break;
2937                    }
2938
2939                    if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2940                        int bits= put_bits_count(&s->pb);
2941                        s->misc_bits+= bits - s->last_bits;
2942                        s->last_bits= bits;
2943                    }
2944
2945                    s->ptr_lastgob += current_packet_size;
2946                    s->first_slice_line=1;
2947                    s->resync_mb_x=mb_x;
2948                    s->resync_mb_y=mb_y;
2949                }
2950            }
2951
2952            if(  (s->resync_mb_x   == s->mb_x)
2953               && s->resync_mb_y+1 == s->mb_y){
2954                s->first_slice_line=0;
2955            }
2956
2957            s->mb_skipped=0;
2958            s->dquant=0; //only for QP_RD
2959
2960            update_mb_info(s, 0);
2961
2962            if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2963                int next_block=0;
2964                int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2965
2966                copy_context_before_encode(&backup_s, s);
2967                backup_s.pb= s->pb;
2968                best_s.data_partitioning= s->data_partitioning;
2969                best_s.partitioned_frame= s->partitioned_frame;
2970                if(s->data_partitioning){
2971                    backup_s.pb2= s->pb2;
2972                    backup_s.tex_pb= s->tex_pb;
2973                }
2974
2975                if(mb_type&CANDIDATE_MB_TYPE_INTER){
2976                    s->mv_dir = MV_DIR_FORWARD;
2977                    s->mv_type = MV_TYPE_16X16;
2978                    s->mb_intra= 0;
2979                    s->mv[0][0][0] = s->p_mv_table[xy][0];
2980                    s->mv[0][0][1] = s->p_mv_table[xy][1];
2981                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
2982                                 &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2983                }
2984                if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2985                    s->mv_dir = MV_DIR_FORWARD;
2986                    s->mv_type = MV_TYPE_FIELD;
2987                    s->mb_intra= 0;
2988                    for(i=0; i<2; i++){
2989                        j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2990                        s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2991                        s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2992                    }
2993                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
2994                                 &dmin, &next_block, 0, 0);
2995                }
2996                if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2997                    s->mv_dir = MV_DIR_FORWARD;
2998                    s->mv_type = MV_TYPE_16X16;
2999                    s->mb_intra= 0;
3000                    s->mv[0][0][0] = 0;
3001                    s->mv[0][0][1] = 0;
3002                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3003                                 &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3004                }
3005                if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3006                    s->mv_dir = MV_DIR_FORWARD;
3007                    s->mv_type = MV_TYPE_8X8;
3008                    s->mb_intra= 0;
3009                    for(i=0; i<4; i++){
3010                        s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3011                        s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3012                    }
3013                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3014                                 &dmin, &next_block, 0, 0);
3015                }
3016                if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3017                    s->mv_dir = MV_DIR_FORWARD;
3018                    s->mv_type = MV_TYPE_16X16;
3019                    s->mb_intra= 0;
3020                    s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3021                    s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3022                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3023                                 &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3024                }
3025                if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3026                    s->mv_dir = MV_DIR_BACKWARD;
3027                    s->mv_type = MV_TYPE_16X16;
3028                    s->mb_intra= 0;
3029                    s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3030                    s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3031                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3032                                 &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3033                }
3034                if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3035                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3036                    s->mv_type = MV_TYPE_16X16;
3037                    s->mb_intra= 0;
3038                    s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3039                    s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3040                    s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3041                    s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3042                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3043                                 &dmin, &next_block, 0, 0);
3044                }
3045                if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3046                    s->mv_dir = MV_DIR_FORWARD;
3047                    s->mv_type = MV_TYPE_FIELD;
3048                    s->mb_intra= 0;
3049                    for(i=0; i<2; i++){
3050                        j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3051                        s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3052                        s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3053                    }
3054                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3055                                 &dmin, &next_block, 0, 0);
3056                }
3057                if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3058                    s->mv_dir = MV_DIR_BACKWARD;
3059                    s->mv_type = MV_TYPE_FIELD;
3060                    s->mb_intra= 0;
3061                    for(i=0; i<2; i++){
3062                        j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3063                        s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3064                        s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3065                    }
3066                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3067                                 &dmin, &next_block, 0, 0);
3068                }
3069                if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3070                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3071                    s->mv_type = MV_TYPE_FIELD;
3072                    s->mb_intra= 0;
3073                    for(dir=0; dir<2; dir++){
3074                        for(i=0; i<2; i++){
3075                            j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3076                            s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3077                            s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3078                        }
3079                    }
3080                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3081                                 &dmin, &next_block, 0, 0);
3082                }
3083                if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3084                    s->mv_dir = 0;
3085                    s->mv_type = MV_TYPE_16X16;
3086                    s->mb_intra= 1;
3087                    s->mv[0][0][0] = 0;
3088                    s->mv[0][0][1] = 0;
3089                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3090                                 &dmin, &next_block, 0, 0);
3091                    if(s->h263_pred || s->h263_aic){
3092                        if(best_s.mb_intra)
3093                            s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3094                        else
3095                            ff_clean_intra_table_entries(s); //old mode?
3096                    }
3097                }
3098
3099                if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3100                    if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3101                        const int last_qp= backup_s.qscale;
3102                        int qpi, qp, dc[6];
3103                        int16_t ac[6][16];
3104                        const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3105                        static const int dquant_tab[4]={-1,1,-2,2};
3106                        int storecoefs = s->mb_intra && s->dc_val[0];
3107
3108                        av_assert2(backup_s.dquant == 0);
3109
3110                        //FIXME intra
3111                        s->mv_dir= best_s.mv_dir;
3112                        s->mv_type = MV_TYPE_16X16;
3113                        s->mb_intra= best_s.mb_intra;
3114                        s->mv[0][0][0] = best_s.mv[0][0][0];
3115                        s->mv[0][0][1] = best_s.mv[0][0][1];
3116                        s->mv[1][0][0] = best_s.mv[1][0][0];
3117                        s->mv[1][0][1] = best_s.mv[1][0][1];
3118
3119                        qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3120                        for(; qpi<4; qpi++){
3121                            int dquant= dquant_tab[qpi];
3122                            qp= last_qp + dquant;
3123                            if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3124                                continue;
3125                            backup_s.dquant= dquant;
3126                            if(storecoefs){
3127                                for(i=0; i<6; i++){
3128                                    dc[i]= s->dc_val[0][ s->block_index[i] ];
3129                                    memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3130                                }
3131                            }
3132
3133                            encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3134                                         &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3135                            if(best_s.qscale != qp){
3136                                if(storecoefs){
3137                                    for(i=0; i<6; i++){
3138                                        s->dc_val[0][ s->block_index[i] ]= dc[i];
3139                                        memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3140                                    }
3141                                }
3142                            }
3143                        }
3144                    }
3145                }
3146                if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3147                    int mx= s->b_direct_mv_table[xy][0];
3148                    int my= s->b_direct_mv_table[xy][1];
3149
3150                    backup_s.dquant = 0;
3151                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3152                    s->mb_intra= 0;
3153                    ff_mpeg4_set_direct_mv(s, mx, my);
3154                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3155                                 &dmin, &next_block, mx, my);
3156                }
3157                if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3158                    backup_s.dquant = 0;
3159                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3160                    s->mb_intra= 0;
3161                    ff_mpeg4_set_direct_mv(s, 0, 0);
3162                    encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3163                                 &dmin, &next_block, 0, 0);
3164                }
3165                if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3166                    int coded=0;
3167                    for(i=0; i<6; i++)
3168                        coded |= s->block_last_index[i];
3169                    if(coded){
3170                        int mx,my;
3171                        memcpy(s->mv, best_s.mv, sizeof(s->mv));
3172                        if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3173                            mx=my=0; //FIXME find the one we actually used
3174                            ff_mpeg4_set_direct_mv(s, mx, my);
3175                        }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3176                            mx= s->mv[1][0][0];
3177                            my= s->mv[1][0][1];
3178                        }else{
3179                            mx= s->mv[0][0][0];
3180                            my= s->mv[0][0][1];
3181                        }
3182
3183                        s->mv_dir= best_s.mv_dir;
3184                        s->mv_type = best_s.mv_type;
3185                        s->mb_intra= 0;
3186/*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3187                        s->mv[0][0][1] = best_s.mv[0][0][1];
3188                        s->mv[1][0][0] = best_s.mv[1][0][0];
3189                        s->mv[1][0][1] = best_s.mv[1][0][1];*/
3190                        backup_s.dquant= 0;
3191                        s->skipdct=1;
3192                        encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3193                                        &dmin, &next_block, mx, my);
3194                        s->skipdct=0;
3195                    }
3196                }
3197
3198                s->current_picture.qscale_table[xy] = best_s.qscale;
3199
3200                copy_context_after_encode(s, &best_s);
3201
3202                pb_bits_count= put_bits_count(&s->pb);
3203                flush_put_bits(&s->pb);
3204                ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3205                s->pb= backup_s.pb;
3206
3207                if(s->data_partitioning){
3208                    pb2_bits_count= put_bits_count(&s->pb2);
3209                    flush_put_bits(&s->pb2);
3210                    ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3211                    s->pb2= backup_s.pb2;
3212
3213                    tex_pb_bits_count= put_bits_count(&s->tex_pb);
3214                    flush_put_bits(&s->tex_pb);
3215                    ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3216                    s->tex_pb= backup_s.tex_pb;
3217                }
3218                s->last_bits= put_bits_count(&s->pb);
3219
3220                if (CONFIG_H263_ENCODER &&
3221                    s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3222                    ff_h263_update_motion_val(s);
3223
3224                if(next_block==0){ //FIXME 16 vs linesize16
3225                    s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3226                    s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3227                    s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3228                }
3229
3230                if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3231                    ff_mpv_reconstruct_mb(s, s->block);
3232            } else {
3233                int motion_x = 0, motion_y = 0;
3234                s->mv_type=MV_TYPE_16X16;
3235                // only one MB-Type possible
3236
3237                switch(mb_type){
3238                case CANDIDATE_MB_TYPE_INTRA:
3239                    s->mv_dir = 0;
3240                    s->mb_intra= 1;
3241                    motion_x= s->mv[0][0][0] = 0;
3242                    motion_y= s->mv[0][0][1] = 0;
3243                    break;
3244                case CANDIDATE_MB_TYPE_INTER:
3245                    s->mv_dir = MV_DIR_FORWARD;
3246                    s->mb_intra= 0;
3247                    motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3248                    motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3249                    break;
3250                case CANDIDATE_MB_TYPE_INTER_I:
3251                    s->mv_dir = MV_DIR_FORWARD;
3252                    s->mv_type = MV_TYPE_FIELD;
3253                    s->mb_intra= 0;
3254                    for(i=0; i<2; i++){
3255                        j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3256                        s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3257                        s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3258                    }
3259                    break;
3260                case CANDIDATE_MB_TYPE_INTER4V:
3261                    s->mv_dir = MV_DIR_FORWARD;
3262                    s->mv_type = MV_TYPE_8X8;
3263                    s->mb_intra= 0;
3264                    for(i=0; i<4; i++){
3265                        s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3266                        s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3267                    }
3268                    break;
3269                case CANDIDATE_MB_TYPE_DIRECT:
3270                    if (CONFIG_MPEG4_ENCODER) {
3271                        s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3272                        s->mb_intra= 0;
3273                        motion_x=s->b_direct_mv_table[xy][0];
3274                        motion_y=s->b_direct_mv_table[xy][1];
3275                        ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3276                    }
3277                    break;
3278                case CANDIDATE_MB_TYPE_DIRECT0:
3279                    if (CONFIG_MPEG4_ENCODER) {
3280                        s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3281                        s->mb_intra= 0;
3282                        ff_mpeg4_set_direct_mv(s, 0, 0);
3283                    }
3284                    break;
3285                case CANDIDATE_MB_TYPE_BIDIR:
3286                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3287                    s->mb_intra= 0;
3288                    s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3289                    s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3290                    s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3291                    s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3292                    break;
3293                case CANDIDATE_MB_TYPE_BACKWARD:
3294                    s->mv_dir = MV_DIR_BACKWARD;
3295                    s->mb_intra= 0;
3296                    motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3297                    motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3298                    break;
3299                case CANDIDATE_MB_TYPE_FORWARD:
3300                    s->mv_dir = MV_DIR_FORWARD;
3301                    s->mb_intra= 0;
3302                    motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3303                    motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3304                    break;
3305                case CANDIDATE_MB_TYPE_FORWARD_I:
3306                    s->mv_dir = MV_DIR_FORWARD;
3307                    s->mv_type = MV_TYPE_FIELD;
3308                    s->mb_intra= 0;
3309                    for(i=0; i<2; i++){
3310                        j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3311                        s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3312                        s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3313                    }
3314                    break;
3315                case CANDIDATE_MB_TYPE_BACKWARD_I:
3316                    s->mv_dir = MV_DIR_BACKWARD;
3317                    s->mv_type = MV_TYPE_FIELD;
3318                    s->mb_intra= 0;
3319                    for(i=0; i<2; i++){
3320                        j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3321                        s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3322                        s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3323                    }
3324                    break;
3325                case CANDIDATE_MB_TYPE_BIDIR_I:
3326                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3327                    s->mv_type = MV_TYPE_FIELD;
3328                    s->mb_intra= 0;
3329                    for(dir=0; dir<2; dir++){
3330                        for(i=0; i<2; i++){
3331                            j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3332                            s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3333                            s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3334                        }
3335                    }
3336                    break;
3337                default:
3338                    av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3339                }
3340
3341                encode_mb(s, motion_x, motion_y);
3342
3343                // RAL: Update last macroblock type
3344                s->last_mv_dir = s->mv_dir;
3345
3346                if (CONFIG_H263_ENCODER &&
3347                    s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3348                    ff_h263_update_motion_val(s);
3349
3350                ff_mpv_reconstruct_mb(s, s->block);
3351            }
3352
3353            /* clean the MV table in IPS frames for direct mode in B-frames */
3354            if(s->mb_intra /* && I,P,S_TYPE */){
3355                s->p_mv_table[xy][0]=0;
3356                s->p_mv_table[xy][1]=0;
3357            }
3358
3359            if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3360                int w= 16;
3361                int h= 16;
3362
3363                if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3364                if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3365
3366                s->current_picture.encoding_error[0] += sse(
3367                    s, s->new_picture->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3368                    s->dest[0], w, h, s->linesize);
3369                s->current_picture.encoding_error[1] += sse(
3370                    s, s->new_picture->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3371                    s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3372                s->current_picture.encoding_error[2] += sse(
3373                    s, s->new_picture->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3374                    s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3375            }
3376            if(s->loop_filter){
3377                if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3378                    ff_h263_loop_filter(s);
3379            }
3380            ff_dlog(s->avctx, "MB %d %d bits\n",
3381                    s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3382        }
3383    }
3384
3385    //not beautiful here but we must write it before flushing so it has to be here
3386    if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3387        ff_msmpeg4_encode_ext_header(s);
3388
3389    write_slice_end(s);
3390
3391    return 0;
3392}
3393
3394#define MERGE(field) dst->field += src->field; src->field=0
3395static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3396    MERGE(me.scene_change_score);
3397    MERGE(me.mc_mb_var_sum_temp);
3398    MERGE(me.mb_var_sum_temp);
3399}
3400
3401static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3402    int i;
3403
3404    MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3405    MERGE(dct_count[1]);
3406    MERGE(mv_bits);
3407    MERGE(i_tex_bits);
3408    MERGE(p_tex_bits);
3409    MERGE(i_count);
3410    MERGE(skip_count);
3411    MERGE(misc_bits);
3412    MERGE(current_picture.encoding_error[0]);
3413    MERGE(current_picture.encoding_error[1]);
3414    MERGE(current_picture.encoding_error[2]);
3415
3416    if (dst->noise_reduction){
3417        for(i=0; i<64; i++){
3418            MERGE(dct_error_sum[0][i]);
3419            MERGE(dct_error_sum[1][i]);
3420        }
3421    }
3422
3423    av_assert1(put_bits_count(&src->pb) % 8 ==0);
3424    av_assert1(put_bits_count(&dst->pb) % 8 ==0);
3425    ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3426    flush_put_bits(&dst->pb);
3427}
3428
3429static int estimate_qp(MpegEncContext *s, int dry_run){
3430    if (s->next_lambda){
3431        s->current_picture_ptr->f->quality =
3432        s->current_picture.f->quality = s->next_lambda;
3433        if(!dry_run) s->next_lambda= 0;
3434    } else if (!s->fixed_qscale) {
3435        int quality = ff_rate_estimate_qscale(s, dry_run);
3436        s->current_picture_ptr->f->quality =
3437        s->current_picture.f->quality = quality;
3438        if (s->current_picture.f->quality < 0)
3439            return -1;
3440    }
3441
3442    if(s->adaptive_quant){
3443        switch(s->codec_id){
3444        case AV_CODEC_ID_MPEG4:
3445            if (CONFIG_MPEG4_ENCODER)
3446                ff_clean_mpeg4_qscales(s);
3447            break;
3448        case AV_CODEC_ID_H263:
3449        case AV_CODEC_ID_H263P:
3450        case AV_CODEC_ID_FLV1:
3451            if (CONFIG_H263_ENCODER)
3452                ff_clean_h263_qscales(s);
3453            break;
3454        default:
3455            ff_init_qscale_tab(s);
3456        }
3457
3458        s->lambda= s->lambda_table[0];
3459        //FIXME broken
3460    }else
3461        s->lambda = s->current_picture.f->quality;
3462    update_qscale(s);
3463    return 0;
3464}
3465
3466/* must be called before writing the header */
3467static void set_frame_distances(MpegEncContext * s){
3468    av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3469    s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3470
3471    if(s->pict_type==AV_PICTURE_TYPE_B){
3472        s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3473        av_assert1(s->pb_time > 0 && s->pb_time < s->pp_time);
3474    }else{
3475        s->pp_time= s->time - s->last_non_b_time;
3476        s->last_non_b_time= s->time;
3477        av_assert1(s->picture_number==0 || s->pp_time > 0);
3478    }
3479}
3480
3481static int encode_picture(MpegEncContext *s, int picture_number)
3482{
3483    int i, ret;
3484    int bits;
3485    int context_count = s->slice_context_count;
3486
3487    s->picture_number = picture_number;
3488
3489    /* Reset the average MB variance */
3490    s->me.mb_var_sum_temp    =
3491    s->me.mc_mb_var_sum_temp = 0;
3492
3493    /* we need to initialize some time vars before we can encode B-frames */
3494    // RAL: Condition added for MPEG1VIDEO
3495    if (s->out_format == FMT_MPEG1 || (s->h263_pred && !s->msmpeg4_version))
3496        set_frame_distances(s);
3497    if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3498        ff_set_mpeg4_time(s);
3499
3500    s->me.scene_change_score=0;
3501
3502//    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3503
3504    if(s->pict_type==AV_PICTURE_TYPE_I){
3505        if(s->msmpeg4_version >= 3) s->no_rounding=1;
3506        else                        s->no_rounding=0;
3507    }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3508        if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3509            s->no_rounding ^= 1;
3510    }
3511
3512    if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3513        if (estimate_qp(s,1) < 0)
3514            return -1;
3515        ff_get_2pass_fcode(s);
3516    } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3517        if(s->pict_type==AV_PICTURE_TYPE_B)
3518            s->lambda= s->last_lambda_for[s->pict_type];
3519        else
3520            s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3521        update_qscale(s);
3522    }
3523
3524    if (s->out_format != FMT_MJPEG) {
3525        if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3526        if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3527        s->q_chroma_intra_matrix   = s->q_intra_matrix;
3528        s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3529    }
3530
3531    s->mb_intra=0; //for the rate distortion & bit compare functions
3532    for(i=1; i<context_count; i++){
3533        ret = ff_update_duplicate_context(s->thread_context[i], s);
3534        if (ret < 0)
3535            return ret;
3536    }
3537
3538    if(ff_init_me(s)<0)
3539        return -1;
3540
3541    /* Estimate motion for every MB */
3542    if(s->pict_type != AV_PICTURE_TYPE_I){
3543        s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3544        s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3545        if (s->pict_type != AV_PICTURE_TYPE_B) {
3546            if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3547                s->me_pre == 2) {
3548                s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3549            }
3550        }
3551
3552        s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3553    }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3554        /* I-Frame */
3555        for(i=0; i<s->mb_stride*s->mb_height; i++)
3556            s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3557
3558        if(!s->fixed_qscale){
3559            /* finding spatial complexity for I-frame rate control */
3560            s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3561        }
3562    }
3563    for(i=1; i<context_count; i++){
3564        merge_context_after_me(s, s->thread_context[i]);
3565    }
3566    s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3567    s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3568    emms_c();
3569
3570    if (s->me.scene_change_score > s->scenechange_threshold &&
3571        s->pict_type == AV_PICTURE_TYPE_P) {
3572        s->pict_type= AV_PICTURE_TYPE_I;
3573        for(i=0; i<s->mb_stride*s->mb_height; i++)
3574            s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3575        if(s->msmpeg4_version >= 3)
3576            s->no_rounding=1;
3577        ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3578                s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3579    }
3580
3581    if(!s->umvplus){
3582        if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3583            s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3584
3585            if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3586                int a,b;
3587                a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3588                b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3589                s->f_code= FFMAX3(s->f_code, a, b);
3590            }
3591
3592            ff_fix_long_p_mvs(s, s->intra_penalty ? CANDIDATE_MB_TYPE_INTER : CANDIDATE_MB_TYPE_INTRA);
3593            ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, !!s->intra_penalty);
3594            if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3595                int j;
3596                for(i=0; i<2; i++){
3597                    for(j=0; j<2; j++)
3598                        ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3599                                        s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, !!s->intra_penalty);
3600                }
3601            }
3602        } else if (s->pict_type == AV_PICTURE_TYPE_B) {
3603            int a, b;
3604
3605            a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3606            b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3607            s->f_code = FFMAX(a, b);
3608
3609            a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3610            b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3611            s->b_code = FFMAX(a, b);
3612
3613            ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3614            ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3615            ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3616            ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3617            if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3618                int dir, j;
3619                for(dir=0; dir<2; dir++){
3620                    for(i=0; i<2; i++){
3621                        for(j=0; j<2; j++){
3622                            int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3623                                          : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3624                            ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3625                                            s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3626                        }
3627                    }
3628                }
3629            }
3630        }
3631    }
3632
3633    if (estimate_qp(s, 0) < 0)
3634        return -1;
3635
3636    if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3637        s->pict_type == AV_PICTURE_TYPE_I &&
3638        !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3639        s->qscale= 3; //reduce clipping problems
3640
3641    if (s->out_format == FMT_MJPEG) {
3642        const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3643        const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3644
3645        if (s->avctx->intra_matrix) {
3646            chroma_matrix =
3647            luma_matrix = s->avctx->intra_matrix;
3648        }
3649        if (s->avctx->chroma_intra_matrix)
3650            chroma_matrix = s->avctx->chroma_intra_matrix;
3651
3652        /* for mjpeg, we do include qscale in the matrix */
3653        for(i=1;i<64;i++){
3654            int j = s->idsp.idct_permutation[i];
3655
3656            s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3657            s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3658        }
3659        s->y_dc_scale_table=
3660        s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3661        s->chroma_intra_matrix[0] =
3662        s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3663        ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3664                       s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3665        ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3666                       s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3667        s->qscale= 8;
3668
3669        if (s->codec_id == AV_CODEC_ID_AMV) {
3670            static const uint8_t y[32] = {13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3671            static const uint8_t c[32] = {14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3672            for (int i = 1; i < 64; i++) {
3673                int j = s->idsp.idct_permutation[ff_zigzag_direct[i]];
3674
3675                s->intra_matrix[j]        = sp5x_qscale_five_quant_table[0][i];
3676                s->chroma_intra_matrix[j] = sp5x_qscale_five_quant_table[1][i];
3677            }
3678            s->y_dc_scale_table = y;
3679            s->c_dc_scale_table = c;
3680            s->intra_matrix[0] = 13;
3681            s->chroma_intra_matrix[0] = 14;
3682            ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3683                              s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3684            ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3685                              s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3686            s->qscale = 8;
3687        }
3688    } else if (s->out_format == FMT_SPEEDHQ) {
3689        s->y_dc_scale_table=
3690        s->c_dc_scale_table= ff_mpeg2_dc_scale_table[3];
3691    }
3692
3693    //FIXME var duplication
3694    s->current_picture_ptr->f->key_frame =
3695    s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3696    s->current_picture_ptr->f->pict_type =
3697    s->current_picture.f->pict_type = s->pict_type;
3698
3699    if (s->current_picture.f->key_frame)
3700        s->picture_in_gop_number=0;
3701
3702    s->mb_x = s->mb_y = 0;
3703    s->last_bits= put_bits_count(&s->pb);
3704    switch(s->out_format) {
3705#if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
3706    case FMT_MJPEG:
3707        ff_mjpeg_amv_encode_picture_header(s);
3708        break;
3709#endif
3710    case FMT_SPEEDHQ:
3711        if (CONFIG_SPEEDHQ_ENCODER)
3712            ff_speedhq_encode_picture_header(s);
3713        break;
3714    case FMT_H261:
3715        if (CONFIG_H261_ENCODER)
3716            ff_h261_encode_picture_header(s, picture_number);
3717        break;
3718    case FMT_H263:
3719        if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3720            ff_wmv2_encode_picture_header(s, picture_number);
3721        else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3722            ff_msmpeg4_encode_picture_header(s, picture_number);
3723        else if (CONFIG_MPEG4_ENCODER && s->h263_pred) {
3724            ret = ff_mpeg4_encode_picture_header(s, picture_number);
3725            if (ret < 0)
3726                return ret;
3727        } else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3728            ret = ff_rv10_encode_picture_header(s, picture_number);
3729            if (ret < 0)
3730                return ret;
3731        }
3732        else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3733            ff_rv20_encode_picture_header(s, picture_number);
3734        else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3735            ff_flv_encode_picture_header(s, picture_number);
3736        else if (CONFIG_H263_ENCODER)
3737            ff_h263_encode_picture_header(s, picture_number);
3738        break;
3739    case FMT_MPEG1:
3740        if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3741            ff_mpeg1_encode_picture_header(s, picture_number);
3742        break;
3743    default:
3744        av_assert0(0);
3745    }
3746    bits= put_bits_count(&s->pb);
3747    s->header_bits= bits - s->last_bits;
3748
3749    for(i=1; i<context_count; i++){
3750        update_duplicate_context_after_me(s->thread_context[i], s);
3751    }
3752    s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3753    for(i=1; i<context_count; i++){
3754        if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3755            set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-BUF_BITS));
3756        merge_context_after_encode(s, s->thread_context[i]);
3757    }
3758    emms_c();
3759    return 0;
3760}
3761
3762static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3763    const int intra= s->mb_intra;
3764    int i;
3765
3766    s->dct_count[intra]++;
3767
3768    for(i=0; i<64; i++){
3769        int level= block[i];
3770
3771        if(level){
3772            if(level>0){
3773                s->dct_error_sum[intra][i] += level;
3774                level -= s->dct_offset[intra][i];
3775                if(level<0) level=0;
3776            }else{
3777                s->dct_error_sum[intra][i] -= level;
3778                level += s->dct_offset[intra][i];
3779                if(level>0) level=0;
3780            }
3781            block[i]= level;
3782        }
3783    }
3784}
3785
3786static int dct_quantize_trellis_c(MpegEncContext *s,
3787                                  int16_t *block, int n,
3788                                  int qscale, int *overflow){
3789    const int *qmat;
3790    const uint16_t *matrix;
3791    const uint8_t *scantable;
3792    const uint8_t *perm_scantable;
3793    int max=0;
3794    unsigned int threshold1, threshold2;
3795    int bias=0;
3796    int run_tab[65];
3797    int level_tab[65];
3798    int score_tab[65];
3799    int survivor[65];
3800    int survivor_count;
3801    int last_run=0;
3802    int last_level=0;
3803    int last_score= 0;
3804    int last_i;
3805    int coeff[2][64];
3806    int coeff_count[64];
3807    int qmul, qadd, start_i, last_non_zero, i, dc;
3808    const int esc_length= s->ac_esc_length;
3809    uint8_t * length;
3810    uint8_t * last_length;
3811    const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3812    int mpeg2_qscale;
3813
3814    s->fdsp.fdct(block);
3815
3816    if(s->dct_error_sum)
3817        s->denoise_dct(s, block);
3818    qmul= qscale*16;
3819    qadd= ((qscale-1)|1)*8;
3820
3821    if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
3822    else                 mpeg2_qscale = qscale << 1;
3823
3824    if (s->mb_intra) {
3825        int q;
3826        scantable= s->intra_scantable.scantable;
3827        perm_scantable= s->intra_scantable.permutated;
3828        if (!s->h263_aic) {
3829            if (n < 4)
3830                q = s->y_dc_scale;
3831            else
3832                q = s->c_dc_scale;
3833            q = q << 3;
3834        } else{
3835            /* For AIC we skip quant/dequant of INTRADC */
3836            q = 1 << 3;
3837            qadd=0;
3838        }
3839
3840        /* note: block[0] is assumed to be positive */
3841        block[0] = (block[0] + (q >> 1)) / q;
3842        start_i = 1;
3843        last_non_zero = 0;
3844        qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3845        matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3846        if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3847            bias= 1<<(QMAT_SHIFT-1);
3848
3849        if (n > 3 && s->intra_chroma_ac_vlc_length) {
3850            length     = s->intra_chroma_ac_vlc_length;
3851            last_length= s->intra_chroma_ac_vlc_last_length;
3852        } else {
3853            length     = s->intra_ac_vlc_length;
3854            last_length= s->intra_ac_vlc_last_length;
3855        }
3856    } else {
3857        scantable= s->inter_scantable.scantable;
3858        perm_scantable= s->inter_scantable.permutated;
3859        start_i = 0;
3860        last_non_zero = -1;
3861        qmat = s->q_inter_matrix[qscale];
3862        matrix = s->inter_matrix;
3863        length     = s->inter_ac_vlc_length;
3864        last_length= s->inter_ac_vlc_last_length;
3865    }
3866    last_i= start_i;
3867
3868    threshold1= (1<<QMAT_SHIFT) - bias - 1;
3869    threshold2= (threshold1<<1);
3870
3871    for(i=63; i>=start_i; i--) {
3872        const int j = scantable[i];
3873        int level = block[j] * qmat[j];
3874
3875        if(((unsigned)(level+threshold1))>threshold2){
3876            last_non_zero = i;
3877            break;
3878        }
3879    }
3880
3881    for(i=start_i; i<=last_non_zero; i++) {
3882        const int j = scantable[i];
3883        int level = block[j] * qmat[j];
3884
3885//        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3886//           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3887        if(((unsigned)(level+threshold1))>threshold2){
3888            if(level>0){
3889                level= (bias + level)>>QMAT_SHIFT;
3890                coeff[0][i]= level;
3891                coeff[1][i]= level-1;
3892//                coeff[2][k]= level-2;
3893            }else{
3894                level= (bias - level)>>QMAT_SHIFT;
3895                coeff[0][i]= -level;
3896                coeff[1][i]= -level+1;
3897//                coeff[2][k]= -level+2;
3898            }
3899            coeff_count[i]= FFMIN(level, 2);
3900            av_assert2(coeff_count[i]);
3901            max |=level;
3902        }else{
3903            coeff[0][i]= (level>>31)|1;
3904            coeff_count[i]= 1;
3905        }
3906    }
3907
3908    *overflow= s->max_qcoeff < max; //overflow might have happened
3909
3910    if(last_non_zero < start_i){
3911        memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3912        return last_non_zero;
3913    }
3914
3915    score_tab[start_i]= 0;
3916    survivor[0]= start_i;
3917    survivor_count= 1;
3918
3919    for(i=start_i; i<=last_non_zero; i++){
3920        int level_index, j, zero_distortion;
3921        int dct_coeff= FFABS(block[ scantable[i] ]);
3922        int best_score=256*256*256*120;
3923
3924        if (s->fdsp.fdct == ff_fdct_ifast)
3925            dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3926        zero_distortion= dct_coeff*dct_coeff;
3927
3928        for(level_index=0; level_index < coeff_count[i]; level_index++){
3929            int distortion;
3930            int level= coeff[level_index][i];
3931            const int alevel= FFABS(level);
3932            int unquant_coeff;
3933
3934            av_assert2(level);
3935
3936            if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3937                unquant_coeff= alevel*qmul + qadd;
3938            } else if(s->out_format == FMT_MJPEG) {
3939                j = s->idsp.idct_permutation[scantable[i]];
3940                unquant_coeff = alevel * matrix[j] * 8;
3941            }else{ // MPEG-1
3942                j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3943                if(s->mb_intra){
3944                        unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
3945                        unquant_coeff =   (unquant_coeff - 1) | 1;
3946                }else{
3947                        unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
3948                        unquant_coeff =   (unquant_coeff - 1) | 1;
3949                }
3950                unquant_coeff<<= 3;
3951            }
3952
3953            distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3954            level+=64;
3955            if((level&(~127)) == 0){
3956                for(j=survivor_count-1; j>=0; j--){
3957                    int run= i - survivor[j];
3958                    int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3959                    score += score_tab[i-run];
3960
3961                    if(score < best_score){
3962                        best_score= score;
3963                        run_tab[i+1]= run;
3964                        level_tab[i+1]= level-64;
3965                    }
3966                }
3967
3968                if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3969                    for(j=survivor_count-1; j>=0; j--){
3970                        int run= i - survivor[j];
3971                        int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3972                        score += score_tab[i-run];
3973                        if(score < last_score){
3974                            last_score= score;
3975                            last_run= run;
3976                            last_level= level-64;
3977                            last_i= i+1;
3978                        }
3979                    }
3980                }
3981            }else{
3982                distortion += esc_length*lambda;
3983                for(j=survivor_count-1; j>=0; j--){
3984                    int run= i - survivor[j];
3985                    int score= distortion + score_tab[i-run];
3986
3987                    if(score < best_score){
3988                        best_score= score;
3989                        run_tab[i+1]= run;
3990                        level_tab[i+1]= level-64;
3991                    }
3992                }
3993
3994                if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3995                  for(j=survivor_count-1; j>=0; j--){
3996                        int run= i - survivor[j];
3997                        int score= distortion + score_tab[i-run];
3998                        if(score < last_score){
3999                            last_score= score;
4000                            last_run= run;
4001                            last_level= level-64;
4002                            last_i= i+1;
4003                        }
4004                    }
4005                }
4006            }
4007        }
4008
4009        score_tab[i+1]= best_score;
4010
4011        // Note: there is a vlc code in MPEG-4 which is 1 bit shorter then another one with a shorter run and the same level
4012        if(last_non_zero <= 27){
4013            for(; survivor_count; survivor_count--){
4014                if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4015                    break;
4016            }
4017        }else{
4018            for(; survivor_count; survivor_count--){
4019                if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4020                    break;
4021            }
4022        }
4023
4024        survivor[ survivor_count++ ]= i+1;
4025    }
4026
4027    if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4028        last_score= 256*256*256*120;
4029        for(i= survivor[0]; i<=last_non_zero + 1; i++){
4030            int score= score_tab[i];
4031            if (i)
4032                score += lambda * 2; // FIXME more exact?
4033
4034            if(score < last_score){
4035                last_score= score;
4036                last_i= i;
4037                last_level= level_tab[i];
4038                last_run= run_tab[i];
4039            }
4040        }
4041    }
4042
4043    s->coded_score[n] = last_score;
4044
4045    dc= FFABS(block[0]);
4046    last_non_zero= last_i - 1;
4047    memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4048
4049    if(last_non_zero < start_i)
4050        return last_non_zero;
4051
4052    if(last_non_zero == 0 && start_i == 0){
4053        int best_level= 0;
4054        int best_score= dc * dc;
4055
4056        for(i=0; i<coeff_count[0]; i++){
4057            int level= coeff[i][0];
4058            int alevel= FFABS(level);
4059            int unquant_coeff, score, distortion;
4060
4061            if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4062                    unquant_coeff= (alevel*qmul + qadd)>>3;
4063            } else{ // MPEG-1
4064                    unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
4065                    unquant_coeff =   (unquant_coeff - 1) | 1;
4066            }
4067            unquant_coeff = (unquant_coeff + 4) >> 3;
4068            unquant_coeff<<= 3 + 3;
4069
4070            distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4071            level+=64;
4072            if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4073            else                    score= distortion + esc_length*lambda;
4074
4075            if(score < best_score){
4076                best_score= score;
4077                best_level= level - 64;
4078            }
4079        }
4080        block[0]= best_level;
4081        s->coded_score[n] = best_score - dc*dc;
4082        if(best_level == 0) return -1;
4083        else                return last_non_zero;
4084    }
4085
4086    i= last_i;
4087    av_assert2(last_level);
4088
4089    block[ perm_scantable[last_non_zero] ]= last_level;
4090    i -= last_run + 1;
4091
4092    for(; i>start_i; i -= run_tab[i] + 1){
4093        block[ perm_scantable[i-1] ]= level_tab[i];
4094    }
4095
4096    return last_non_zero;
4097}
4098
4099static int16_t basis[64][64];
4100
4101static void build_basis(uint8_t *perm){
4102    int i, j, x, y;
4103    emms_c();
4104    for(i=0; i<8; i++){
4105        for(j=0; j<8; j++){
4106            for(y=0; y<8; y++){
4107                for(x=0; x<8; x++){
4108                    double s= 0.25*(1<<BASIS_SHIFT);
4109                    int index= 8*i + j;
4110                    int perm_index= perm[index];
4111                    if(i==0) s*= sqrt(0.5);
4112                    if(j==0) s*= sqrt(0.5);
4113                    basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4114                }
4115            }
4116        }
4117    }
4118}
4119
4120static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4121                        int16_t *block, int16_t *weight, int16_t *orig,
4122                        int n, int qscale){
4123    int16_t rem[64];
4124    LOCAL_ALIGNED_16(int16_t, d1, [64]);
4125    const uint8_t *scantable;
4126    const uint8_t *perm_scantable;
4127//    unsigned int threshold1, threshold2;
4128//    int bias=0;
4129    int run_tab[65];
4130    int prev_run=0;
4131    int prev_level=0;
4132    int qmul, qadd, start_i, last_non_zero, i, dc;
4133    uint8_t * length;
4134    uint8_t * last_length;
4135    int lambda;
4136    int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4137
4138    if(basis[0][0] == 0)
4139        build_basis(s->idsp.idct_permutation);
4140
4141    qmul= qscale*2;
4142    qadd= (qscale-1)|1;
4143    if (s->mb_intra) {
4144        scantable= s->intra_scantable.scantable;
4145        perm_scantable= s->intra_scantable.permutated;
4146        if (!s->h263_aic) {
4147            if (n < 4)
4148                q = s->y_dc_scale;
4149            else
4150                q = s->c_dc_scale;
4151        } else{
4152            /* For AIC we skip quant/dequant of INTRADC */
4153            q = 1;
4154            qadd=0;
4155        }
4156        q <<= RECON_SHIFT-3;
4157        /* note: block[0] is assumed to be positive */
4158        dc= block[0]*q;
4159//        block[0] = (block[0] + (q >> 1)) / q;
4160        start_i = 1;
4161//        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4162//            bias= 1<<(QMAT_SHIFT-1);
4163        if (n > 3 && s->intra_chroma_ac_vlc_length) {
4164            length     = s->intra_chroma_ac_vlc_length;
4165            last_length= s->intra_chroma_ac_vlc_last_length;
4166        } else {
4167            length     = s->intra_ac_vlc_length;
4168            last_length= s->intra_ac_vlc_last_length;
4169        }
4170    } else {
4171        scantable= s->inter_scantable.scantable;
4172        perm_scantable= s->inter_scantable.permutated;
4173        dc= 0;
4174        start_i = 0;
4175        length     = s->inter_ac_vlc_length;
4176        last_length= s->inter_ac_vlc_last_length;
4177    }
4178    last_non_zero = s->block_last_index[n];
4179
4180    dc += (1<<(RECON_SHIFT-1));
4181    for(i=0; i<64; i++){
4182        rem[i] = dc - (orig[i] << RECON_SHIFT); // FIXME use orig directly instead of copying to rem[]
4183    }
4184
4185    sum=0;
4186    for(i=0; i<64; i++){
4187        int one= 36;
4188        int qns=4;
4189        int w;
4190
4191        w= FFABS(weight[i]) + qns*one;
4192        w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4193
4194        weight[i] = w;
4195//        w=weight[i] = (63*qns + (w/2)) / w;
4196
4197        av_assert2(w>0);
4198        av_assert2(w<(1<<6));
4199        sum += w*w;
4200    }
4201    lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4202
4203    run=0;
4204    rle_index=0;
4205    for(i=start_i; i<=last_non_zero; i++){
4206        int j= perm_scantable[i];
4207        const int level= block[j];
4208        int coeff;
4209
4210        if(level){
4211            if(level<0) coeff= qmul*level - qadd;
4212            else        coeff= qmul*level + qadd;
4213            run_tab[rle_index++]=run;
4214            run=0;
4215
4216            s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4217        }else{
4218            run++;
4219        }
4220    }
4221
4222    for(;;){
4223        int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4224        int best_coeff=0;
4225        int best_change=0;
4226        int run2, best_unquant_change=0, analyze_gradient;
4227        analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4228
4229        if(analyze_gradient){
4230            for(i=0; i<64; i++){
4231                int w= weight[i];
4232
4233                d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4234            }
4235            s->fdsp.fdct(d1);
4236        }
4237
4238        if(start_i){
4239            const int level= block[0];
4240            int change, old_coeff;
4241
4242            av_assert2(s->mb_intra);
4243
4244            old_coeff= q*level;
4245
4246            for(change=-1; change<=1; change+=2){
4247                int new_level= level + change;
4248                int score, new_coeff;
4249
4250                new_coeff= q*new_level;
4251                if(new_coeff >= 2048 || new_coeff < 0)
4252                    continue;
4253
4254                score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4255                                                  new_coeff - old_coeff);
4256                if(score<best_score){
4257                    best_score= score;
4258                    best_coeff= 0;
4259                    best_change= change;
4260                    best_unquant_change= new_coeff - old_coeff;
4261                }
4262            }
4263        }
4264
4265        run=0;
4266        rle_index=0;
4267        run2= run_tab[rle_index++];
4268        prev_level=0;
4269        prev_run=0;
4270
4271        for(i=start_i; i<64; i++){
4272            int j= perm_scantable[i];
4273            const int level= block[j];
4274            int change, old_coeff;
4275
4276            if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4277                break;
4278
4279            if(level){
4280                if(level<0) old_coeff= qmul*level - qadd;
4281                else        old_coeff= qmul*level + qadd;
4282                run2= run_tab[rle_index++]; //FIXME ! maybe after last
4283            }else{
4284                old_coeff=0;
4285                run2--;
4286                av_assert2(run2>=0 || i >= last_non_zero );
4287            }
4288
4289            for(change=-1; change<=1; change+=2){
4290                int new_level= level + change;
4291                int score, new_coeff, unquant_change;
4292
4293                score=0;
4294                if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4295                   continue;
4296
4297                if(new_level){
4298                    if(new_level<0) new_coeff= qmul*new_level - qadd;
4299                    else            new_coeff= qmul*new_level + qadd;
4300                    if(new_coeff >= 2048 || new_coeff <= -2048)
4301                        continue;
4302                    //FIXME check for overflow
4303
4304                    if(level){
4305                        if(level < 63 && level > -63){
4306                            if(i < last_non_zero)
4307                                score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4308                                         - length[UNI_AC_ENC_INDEX(run, level+64)];
4309                            else
4310                                score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4311                                         - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4312                        }
4313                    }else{
4314                        av_assert2(FFABS(new_level)==1);
4315
4316                        if(analyze_gradient){
4317                            int g= d1[ scantable[i] ];
4318                            if(g && (g^new_level) >= 0)
4319                                continue;
4320                        }
4321
4322                        if(i < last_non_zero){
4323                            int next_i= i + run2 + 1;
4324                            int next_level= block[ perm_scantable[next_i] ] + 64;
4325
4326                            if(next_level&(~127))
4327                                next_level= 0;
4328
4329                            if(next_i < last_non_zero)
4330                                score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4331                                         + length[UNI_AC_ENC_INDEX(run2, next_level)]
4332                                         - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4333                            else
4334                                score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4335                                        + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4336                                        - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4337                        }else{
4338                            score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4339                            if(prev_level){
4340                                score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4341                                        - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4342                            }
4343                        }
4344                    }
4345                }else{
4346                    new_coeff=0;
4347                    av_assert2(FFABS(level)==1);
4348
4349                    if(i < last_non_zero){
4350                        int next_i= i + run2 + 1;
4351                        int next_level= block[ perm_scantable[next_i] ] + 64;
4352
4353                        if(next_level&(~127))
4354                            next_level= 0;
4355
4356                        if(next_i < last_non_zero)
4357                            score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4358                                     - length[UNI_AC_ENC_INDEX(run2, next_level)]
4359                                     - length[UNI_AC_ENC_INDEX(run, 65)];
4360                        else
4361                            score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4362                                     - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4363                                     - length[UNI_AC_ENC_INDEX(run, 65)];
4364                    }else{
4365                        score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4366                        if(prev_level){
4367                            score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4368                                    - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4369                        }
4370                    }
4371                }
4372
4373                score *= lambda;
4374
4375                unquant_change= new_coeff - old_coeff;
4376                av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4377
4378                score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4379                                                   unquant_change);
4380                if(score<best_score){
4381                    best_score= score;
4382                    best_coeff= i;
4383                    best_change= change;
4384                    best_unquant_change= unquant_change;
4385                }
4386            }
4387            if(level){
4388                prev_level= level + 64;
4389                if(prev_level&(~127))
4390                    prev_level= 0;
4391                prev_run= run;
4392                run=0;
4393            }else{
4394                run++;
4395            }
4396        }
4397
4398        if(best_change){
4399            int j= perm_scantable[ best_coeff ];
4400
4401            block[j] += best_change;
4402
4403            if(best_coeff > last_non_zero){
4404                last_non_zero= best_coeff;
4405                av_assert2(block[j]);
4406            }else{
4407                for(; last_non_zero>=start_i; last_non_zero--){
4408                    if(block[perm_scantable[last_non_zero]])
4409                        break;
4410                }
4411            }
4412
4413            run=0;
4414            rle_index=0;
4415            for(i=start_i; i<=last_non_zero; i++){
4416                int j= perm_scantable[i];
4417                const int level= block[j];
4418
4419                 if(level){
4420                     run_tab[rle_index++]=run;
4421                     run=0;
4422                 }else{
4423                     run++;
4424                 }
4425            }
4426
4427            s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4428        }else{
4429            break;
4430        }
4431    }
4432
4433    return last_non_zero;
4434}
4435
4436/**
4437 * Permute an 8x8 block according to permutation.
4438 * @param block the block which will be permuted according to
4439 *              the given permutation vector
4440 * @param permutation the permutation vector
4441 * @param last the last non zero coefficient in scantable order, used to
4442 *             speed the permutation up
4443 * @param scantable the used scantable, this is only used to speed the
4444 *                  permutation up, the block is not (inverse) permutated
4445 *                  to scantable order!
4446 */
4447void ff_block_permute(int16_t *block, uint8_t *permutation,
4448                      const uint8_t *scantable, int last)
4449{
4450    int i;
4451    int16_t temp[64];
4452
4453    if (last <= 0)
4454        return;
4455    //FIXME it is ok but not clean and might fail for some permutations
4456    // if (permutation[1] == 1)
4457    // return;
4458
4459    for (i = 0; i <= last; i++) {
4460        const int j = scantable[i];
4461        temp[j] = block[j];
4462        block[j] = 0;
4463    }
4464
4465    for (i = 0; i <= last; i++) {
4466        const int j = scantable[i];
4467        const int perm_j = permutation[j];
4468        block[perm_j] = temp[j];
4469    }
4470}
4471
4472int ff_dct_quantize_c(MpegEncContext *s,
4473                        int16_t *block, int n,
4474                        int qscale, int *overflow)
4475{
4476    int i, j, level, last_non_zero, q, start_i;
4477    const int *qmat;
4478    const uint8_t *scantable;
4479    int bias;
4480    int max=0;
4481    unsigned int threshold1, threshold2;
4482
4483    s->fdsp.fdct(block);
4484
4485    if(s->dct_error_sum)
4486        s->denoise_dct(s, block);
4487
4488    if (s->mb_intra) {
4489        scantable= s->intra_scantable.scantable;
4490        if (!s->h263_aic) {
4491            if (n < 4)
4492                q = s->y_dc_scale;
4493            else
4494                q = s->c_dc_scale;
4495            q = q << 3;
4496        } else
4497            /* For AIC we skip quant/dequant of INTRADC */
4498            q = 1 << 3;
4499
4500        /* note: block[0] is assumed to be positive */
4501        block[0] = (block[0] + (q >> 1)) / q;
4502        start_i = 1;
4503        last_non_zero = 0;
4504        qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4505        bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4506    } else {
4507        scantable= s->inter_scantable.scantable;
4508        start_i = 0;
4509        last_non_zero = -1;
4510        qmat = s->q_inter_matrix[qscale];
4511        bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4512    }
4513    threshold1= (1<<QMAT_SHIFT) - bias - 1;
4514    threshold2= (threshold1<<1);
4515    for(i=63;i>=start_i;i--) {
4516        j = scantable[i];
4517        level = block[j] * qmat[j];
4518
4519        if(((unsigned)(level+threshold1))>threshold2){
4520            last_non_zero = i;
4521            break;
4522        }else{
4523            block[j]=0;
4524        }
4525    }
4526    for(i=start_i; i<=last_non_zero; i++) {
4527        j = scantable[i];
4528        level = block[j] * qmat[j];
4529
4530//        if(   bias+level >= (1<<QMAT_SHIFT)
4531//           || bias-level >= (1<<QMAT_SHIFT)){
4532        if(((unsigned)(level+threshold1))>threshold2){
4533            if(level>0){
4534                level= (bias + level)>>QMAT_SHIFT;
4535                block[j]= level;
4536            }else{
4537                level= (bias - level)>>QMAT_SHIFT;
4538                block[j]= -level;
4539            }
4540            max |=level;
4541        }else{
4542            block[j]=0;
4543        }
4544    }
4545    *overflow= s->max_qcoeff < max; //overflow might have happened
4546
4547    /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4548    if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4549        ff_block_permute(block, s->idsp.idct_permutation,
4550                      scantable, last_non_zero);
4551
4552    return last_non_zero;
4553}
4554