1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * DSP utils 3cabdff1aSopenharmony_ci * Copyright (c) 2000, 2001 Fabrice Bellard 4cabdff1aSopenharmony_ci * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * This file is part of FFmpeg. 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 12cabdff1aSopenharmony_ci * 13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16cabdff1aSopenharmony_ci * Lesser General Public License for more details. 17cabdff1aSopenharmony_ci * 18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21cabdff1aSopenharmony_ci */ 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 24cabdff1aSopenharmony_ci#include "libavutil/internal.h" 25cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 26cabdff1aSopenharmony_ci#include "avcodec.h" 27cabdff1aSopenharmony_ci#include "copy_block.h" 28cabdff1aSopenharmony_ci#include "simple_idct.h" 29cabdff1aSopenharmony_ci#include "me_cmp.h" 30cabdff1aSopenharmony_ci#include "mpegvideoenc.h" 31cabdff1aSopenharmony_ci#include "config.h" 32cabdff1aSopenharmony_ci#include "config_components.h" 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_ci/* (i - 256) * (i - 256) */ 35cabdff1aSopenharmony_ciconst uint32_t ff_square_tab[512] = { 36cabdff1aSopenharmony_ci 65536, 65025, 64516, 64009, 63504, 63001, 62500, 62001, 61504, 61009, 60516, 60025, 59536, 59049, 58564, 58081, 37cabdff1aSopenharmony_ci 57600, 57121, 56644, 56169, 55696, 55225, 54756, 54289, 53824, 53361, 52900, 52441, 51984, 51529, 51076, 50625, 38cabdff1aSopenharmony_ci 50176, 49729, 49284, 48841, 48400, 47961, 47524, 47089, 46656, 46225, 45796, 45369, 44944, 44521, 44100, 43681, 39cabdff1aSopenharmony_ci 43264, 42849, 42436, 42025, 41616, 41209, 40804, 40401, 40000, 39601, 39204, 38809, 38416, 38025, 37636, 37249, 40cabdff1aSopenharmony_ci 36864, 36481, 36100, 35721, 35344, 34969, 34596, 34225, 33856, 33489, 33124, 32761, 32400, 32041, 31684, 31329, 41cabdff1aSopenharmony_ci 30976, 30625, 30276, 29929, 29584, 29241, 28900, 28561, 28224, 27889, 27556, 27225, 26896, 26569, 26244, 25921, 42cabdff1aSopenharmony_ci 25600, 25281, 24964, 24649, 24336, 24025, 23716, 23409, 23104, 22801, 22500, 22201, 21904, 21609, 21316, 21025, 43cabdff1aSopenharmony_ci 20736, 20449, 20164, 19881, 19600, 19321, 19044, 18769, 18496, 18225, 17956, 17689, 17424, 17161, 16900, 16641, 44cabdff1aSopenharmony_ci 16384, 16129, 15876, 15625, 15376, 15129, 14884, 14641, 14400, 14161, 13924, 13689, 13456, 13225, 12996, 12769, 45cabdff1aSopenharmony_ci 12544, 12321, 12100, 11881, 11664, 11449, 11236, 11025, 10816, 10609, 10404, 10201, 10000, 9801, 9604, 9409, 46cabdff1aSopenharmony_ci 9216, 9025, 8836, 8649, 8464, 8281, 8100, 7921, 7744, 7569, 7396, 7225, 7056, 6889, 6724, 6561, 47cabdff1aSopenharmony_ci 6400, 6241, 6084, 5929, 5776, 5625, 5476, 5329, 5184, 5041, 4900, 4761, 4624, 4489, 4356, 4225, 48cabdff1aSopenharmony_ci 4096, 3969, 3844, 3721, 3600, 3481, 3364, 3249, 3136, 3025, 2916, 2809, 2704, 2601, 2500, 2401, 49cabdff1aSopenharmony_ci 2304, 2209, 2116, 2025, 1936, 1849, 1764, 1681, 1600, 1521, 1444, 1369, 1296, 1225, 1156, 1089, 50cabdff1aSopenharmony_ci 1024, 961, 900, 841, 784, 729, 676, 625, 576, 529, 484, 441, 400, 361, 324, 289, 51cabdff1aSopenharmony_ci 256, 225, 196, 169, 144, 121, 100, 81, 64, 49, 36, 25, 16, 9, 4, 1, 52cabdff1aSopenharmony_ci 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 53cabdff1aSopenharmony_ci 256, 289, 324, 361, 400, 441, 484, 529, 576, 625, 676, 729, 784, 841, 900, 961, 54cabdff1aSopenharmony_ci 1024, 1089, 1156, 1225, 1296, 1369, 1444, 1521, 1600, 1681, 1764, 1849, 1936, 2025, 2116, 2209, 55cabdff1aSopenharmony_ci 2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025, 3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969, 56cabdff1aSopenharmony_ci 4096, 4225, 4356, 4489, 4624, 4761, 4900, 5041, 5184, 5329, 5476, 5625, 5776, 5929, 6084, 6241, 57cabdff1aSopenharmony_ci 6400, 6561, 6724, 6889, 7056, 7225, 7396, 7569, 7744, 7921, 8100, 8281, 8464, 8649, 8836, 9025, 58cabdff1aSopenharmony_ci 9216, 9409, 9604, 9801, 10000, 10201, 10404, 10609, 10816, 11025, 11236, 11449, 11664, 11881, 12100, 12321, 59cabdff1aSopenharmony_ci 12544, 12769, 12996, 13225, 13456, 13689, 13924, 14161, 14400, 14641, 14884, 15129, 15376, 15625, 15876, 16129, 60cabdff1aSopenharmony_ci 16384, 16641, 16900, 17161, 17424, 17689, 17956, 18225, 18496, 18769, 19044, 19321, 19600, 19881, 20164, 20449, 61cabdff1aSopenharmony_ci 20736, 21025, 21316, 21609, 21904, 22201, 22500, 22801, 23104, 23409, 23716, 24025, 24336, 24649, 24964, 25281, 62cabdff1aSopenharmony_ci 25600, 25921, 26244, 26569, 26896, 27225, 27556, 27889, 28224, 28561, 28900, 29241, 29584, 29929, 30276, 30625, 63cabdff1aSopenharmony_ci 30976, 31329, 31684, 32041, 32400, 32761, 33124, 33489, 33856, 34225, 34596, 34969, 35344, 35721, 36100, 36481, 64cabdff1aSopenharmony_ci 36864, 37249, 37636, 38025, 38416, 38809, 39204, 39601, 40000, 40401, 40804, 41209, 41616, 42025, 42436, 42849, 65cabdff1aSopenharmony_ci 43264, 43681, 44100, 44521, 44944, 45369, 45796, 46225, 46656, 47089, 47524, 47961, 48400, 48841, 49284, 49729, 66cabdff1aSopenharmony_ci 50176, 50625, 51076, 51529, 51984, 52441, 52900, 53361, 53824, 54289, 54756, 55225, 55696, 56169, 56644, 57121, 67cabdff1aSopenharmony_ci 57600, 58081, 58564, 59049, 59536, 60025, 60516, 61009, 61504, 62001, 62500, 63001, 63504, 64009, 64516, 65025, 68cabdff1aSopenharmony_ci}; 69cabdff1aSopenharmony_ci 70cabdff1aSopenharmony_cistatic int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 71cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 72cabdff1aSopenharmony_ci{ 73cabdff1aSopenharmony_ci int s = 0, i; 74cabdff1aSopenharmony_ci const uint32_t *sq = ff_square_tab + 256; 75cabdff1aSopenharmony_ci 76cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 77cabdff1aSopenharmony_ci s += sq[pix1[0] - pix2[0]]; 78cabdff1aSopenharmony_ci s += sq[pix1[1] - pix2[1]]; 79cabdff1aSopenharmony_ci s += sq[pix1[2] - pix2[2]]; 80cabdff1aSopenharmony_ci s += sq[pix1[3] - pix2[3]]; 81cabdff1aSopenharmony_ci pix1 += stride; 82cabdff1aSopenharmony_ci pix2 += stride; 83cabdff1aSopenharmony_ci } 84cabdff1aSopenharmony_ci return s; 85cabdff1aSopenharmony_ci} 86cabdff1aSopenharmony_ci 87cabdff1aSopenharmony_cistatic int sse8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 88cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 89cabdff1aSopenharmony_ci{ 90cabdff1aSopenharmony_ci int s = 0, i; 91cabdff1aSopenharmony_ci const uint32_t *sq = ff_square_tab + 256; 92cabdff1aSopenharmony_ci 93cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 94cabdff1aSopenharmony_ci s += sq[pix1[0] - pix2[0]]; 95cabdff1aSopenharmony_ci s += sq[pix1[1] - pix2[1]]; 96cabdff1aSopenharmony_ci s += sq[pix1[2] - pix2[2]]; 97cabdff1aSopenharmony_ci s += sq[pix1[3] - pix2[3]]; 98cabdff1aSopenharmony_ci s += sq[pix1[4] - pix2[4]]; 99cabdff1aSopenharmony_ci s += sq[pix1[5] - pix2[5]]; 100cabdff1aSopenharmony_ci s += sq[pix1[6] - pix2[6]]; 101cabdff1aSopenharmony_ci s += sq[pix1[7] - pix2[7]]; 102cabdff1aSopenharmony_ci pix1 += stride; 103cabdff1aSopenharmony_ci pix2 += stride; 104cabdff1aSopenharmony_ci } 105cabdff1aSopenharmony_ci return s; 106cabdff1aSopenharmony_ci} 107cabdff1aSopenharmony_ci 108cabdff1aSopenharmony_cistatic int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 109cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 110cabdff1aSopenharmony_ci{ 111cabdff1aSopenharmony_ci int s = 0, i; 112cabdff1aSopenharmony_ci const uint32_t *sq = ff_square_tab + 256; 113cabdff1aSopenharmony_ci 114cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 115cabdff1aSopenharmony_ci s += sq[pix1[0] - pix2[0]]; 116cabdff1aSopenharmony_ci s += sq[pix1[1] - pix2[1]]; 117cabdff1aSopenharmony_ci s += sq[pix1[2] - pix2[2]]; 118cabdff1aSopenharmony_ci s += sq[pix1[3] - pix2[3]]; 119cabdff1aSopenharmony_ci s += sq[pix1[4] - pix2[4]]; 120cabdff1aSopenharmony_ci s += sq[pix1[5] - pix2[5]]; 121cabdff1aSopenharmony_ci s += sq[pix1[6] - pix2[6]]; 122cabdff1aSopenharmony_ci s += sq[pix1[7] - pix2[7]]; 123cabdff1aSopenharmony_ci s += sq[pix1[8] - pix2[8]]; 124cabdff1aSopenharmony_ci s += sq[pix1[9] - pix2[9]]; 125cabdff1aSopenharmony_ci s += sq[pix1[10] - pix2[10]]; 126cabdff1aSopenharmony_ci s += sq[pix1[11] - pix2[11]]; 127cabdff1aSopenharmony_ci s += sq[pix1[12] - pix2[12]]; 128cabdff1aSopenharmony_ci s += sq[pix1[13] - pix2[13]]; 129cabdff1aSopenharmony_ci s += sq[pix1[14] - pix2[14]]; 130cabdff1aSopenharmony_ci s += sq[pix1[15] - pix2[15]]; 131cabdff1aSopenharmony_ci 132cabdff1aSopenharmony_ci pix1 += stride; 133cabdff1aSopenharmony_ci pix2 += stride; 134cabdff1aSopenharmony_ci } 135cabdff1aSopenharmony_ci return s; 136cabdff1aSopenharmony_ci} 137cabdff1aSopenharmony_ci 138cabdff1aSopenharmony_cistatic int sum_abs_dctelem_c(int16_t *block) 139cabdff1aSopenharmony_ci{ 140cabdff1aSopenharmony_ci int sum = 0, i; 141cabdff1aSopenharmony_ci 142cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 143cabdff1aSopenharmony_ci sum += FFABS(block[i]); 144cabdff1aSopenharmony_ci return sum; 145cabdff1aSopenharmony_ci} 146cabdff1aSopenharmony_ci 147cabdff1aSopenharmony_ci#define avg2(a, b) (((a) + (b) + 1) >> 1) 148cabdff1aSopenharmony_ci#define avg4(a, b, c, d) (((a) + (b) + (c) + (d) + 2) >> 2) 149cabdff1aSopenharmony_ci 150cabdff1aSopenharmony_cistatic inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 151cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 152cabdff1aSopenharmony_ci{ 153cabdff1aSopenharmony_ci int s = 0, i; 154cabdff1aSopenharmony_ci 155cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 156cabdff1aSopenharmony_ci s += abs(pix1[0] - pix2[0]); 157cabdff1aSopenharmony_ci s += abs(pix1[1] - pix2[1]); 158cabdff1aSopenharmony_ci s += abs(pix1[2] - pix2[2]); 159cabdff1aSopenharmony_ci s += abs(pix1[3] - pix2[3]); 160cabdff1aSopenharmony_ci s += abs(pix1[4] - pix2[4]); 161cabdff1aSopenharmony_ci s += abs(pix1[5] - pix2[5]); 162cabdff1aSopenharmony_ci s += abs(pix1[6] - pix2[6]); 163cabdff1aSopenharmony_ci s += abs(pix1[7] - pix2[7]); 164cabdff1aSopenharmony_ci s += abs(pix1[8] - pix2[8]); 165cabdff1aSopenharmony_ci s += abs(pix1[9] - pix2[9]); 166cabdff1aSopenharmony_ci s += abs(pix1[10] - pix2[10]); 167cabdff1aSopenharmony_ci s += abs(pix1[11] - pix2[11]); 168cabdff1aSopenharmony_ci s += abs(pix1[12] - pix2[12]); 169cabdff1aSopenharmony_ci s += abs(pix1[13] - pix2[13]); 170cabdff1aSopenharmony_ci s += abs(pix1[14] - pix2[14]); 171cabdff1aSopenharmony_ci s += abs(pix1[15] - pix2[15]); 172cabdff1aSopenharmony_ci pix1 += stride; 173cabdff1aSopenharmony_ci pix2 += stride; 174cabdff1aSopenharmony_ci } 175cabdff1aSopenharmony_ci return s; 176cabdff1aSopenharmony_ci} 177cabdff1aSopenharmony_ci 178cabdff1aSopenharmony_cistatic inline int pix_median_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 179cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 180cabdff1aSopenharmony_ci{ 181cabdff1aSopenharmony_ci int s = 0, i, j; 182cabdff1aSopenharmony_ci 183cabdff1aSopenharmony_ci#define V(x) (pix1[x] - pix2[x]) 184cabdff1aSopenharmony_ci 185cabdff1aSopenharmony_ci s += abs(V(0)); 186cabdff1aSopenharmony_ci s += abs(V(1) - V(0)); 187cabdff1aSopenharmony_ci s += abs(V(2) - V(1)); 188cabdff1aSopenharmony_ci s += abs(V(3) - V(2)); 189cabdff1aSopenharmony_ci s += abs(V(4) - V(3)); 190cabdff1aSopenharmony_ci s += abs(V(5) - V(4)); 191cabdff1aSopenharmony_ci s += abs(V(6) - V(5)); 192cabdff1aSopenharmony_ci s += abs(V(7) - V(6)); 193cabdff1aSopenharmony_ci s += abs(V(8) - V(7)); 194cabdff1aSopenharmony_ci s += abs(V(9) - V(8)); 195cabdff1aSopenharmony_ci s += abs(V(10) - V(9)); 196cabdff1aSopenharmony_ci s += abs(V(11) - V(10)); 197cabdff1aSopenharmony_ci s += abs(V(12) - V(11)); 198cabdff1aSopenharmony_ci s += abs(V(13) - V(12)); 199cabdff1aSopenharmony_ci s += abs(V(14) - V(13)); 200cabdff1aSopenharmony_ci s += abs(V(15) - V(14)); 201cabdff1aSopenharmony_ci 202cabdff1aSopenharmony_ci pix1 += stride; 203cabdff1aSopenharmony_ci pix2 += stride; 204cabdff1aSopenharmony_ci 205cabdff1aSopenharmony_ci for (i = 1; i < h; i++) { 206cabdff1aSopenharmony_ci s += abs(V(0) - V(-stride)); 207cabdff1aSopenharmony_ci for (j = 1; j < 16; j++) 208cabdff1aSopenharmony_ci s += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1))); 209cabdff1aSopenharmony_ci pix1 += stride; 210cabdff1aSopenharmony_ci pix2 += stride; 211cabdff1aSopenharmony_ci 212cabdff1aSopenharmony_ci } 213cabdff1aSopenharmony_ci#undef V 214cabdff1aSopenharmony_ci return s; 215cabdff1aSopenharmony_ci} 216cabdff1aSopenharmony_ci 217cabdff1aSopenharmony_cistatic int pix_abs16_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 218cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 219cabdff1aSopenharmony_ci{ 220cabdff1aSopenharmony_ci int s = 0, i; 221cabdff1aSopenharmony_ci 222cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 223cabdff1aSopenharmony_ci s += abs(pix1[0] - avg2(pix2[0], pix2[1])); 224cabdff1aSopenharmony_ci s += abs(pix1[1] - avg2(pix2[1], pix2[2])); 225cabdff1aSopenharmony_ci s += abs(pix1[2] - avg2(pix2[2], pix2[3])); 226cabdff1aSopenharmony_ci s += abs(pix1[3] - avg2(pix2[3], pix2[4])); 227cabdff1aSopenharmony_ci s += abs(pix1[4] - avg2(pix2[4], pix2[5])); 228cabdff1aSopenharmony_ci s += abs(pix1[5] - avg2(pix2[5], pix2[6])); 229cabdff1aSopenharmony_ci s += abs(pix1[6] - avg2(pix2[6], pix2[7])); 230cabdff1aSopenharmony_ci s += abs(pix1[7] - avg2(pix2[7], pix2[8])); 231cabdff1aSopenharmony_ci s += abs(pix1[8] - avg2(pix2[8], pix2[9])); 232cabdff1aSopenharmony_ci s += abs(pix1[9] - avg2(pix2[9], pix2[10])); 233cabdff1aSopenharmony_ci s += abs(pix1[10] - avg2(pix2[10], pix2[11])); 234cabdff1aSopenharmony_ci s += abs(pix1[11] - avg2(pix2[11], pix2[12])); 235cabdff1aSopenharmony_ci s += abs(pix1[12] - avg2(pix2[12], pix2[13])); 236cabdff1aSopenharmony_ci s += abs(pix1[13] - avg2(pix2[13], pix2[14])); 237cabdff1aSopenharmony_ci s += abs(pix1[14] - avg2(pix2[14], pix2[15])); 238cabdff1aSopenharmony_ci s += abs(pix1[15] - avg2(pix2[15], pix2[16])); 239cabdff1aSopenharmony_ci pix1 += stride; 240cabdff1aSopenharmony_ci pix2 += stride; 241cabdff1aSopenharmony_ci } 242cabdff1aSopenharmony_ci return s; 243cabdff1aSopenharmony_ci} 244cabdff1aSopenharmony_ci 245cabdff1aSopenharmony_cistatic int pix_abs16_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 246cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 247cabdff1aSopenharmony_ci{ 248cabdff1aSopenharmony_ci int s = 0, i; 249cabdff1aSopenharmony_ci uint8_t *pix3 = pix2 + stride; 250cabdff1aSopenharmony_ci 251cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 252cabdff1aSopenharmony_ci s += abs(pix1[0] - avg2(pix2[0], pix3[0])); 253cabdff1aSopenharmony_ci s += abs(pix1[1] - avg2(pix2[1], pix3[1])); 254cabdff1aSopenharmony_ci s += abs(pix1[2] - avg2(pix2[2], pix3[2])); 255cabdff1aSopenharmony_ci s += abs(pix1[3] - avg2(pix2[3], pix3[3])); 256cabdff1aSopenharmony_ci s += abs(pix1[4] - avg2(pix2[4], pix3[4])); 257cabdff1aSopenharmony_ci s += abs(pix1[5] - avg2(pix2[5], pix3[5])); 258cabdff1aSopenharmony_ci s += abs(pix1[6] - avg2(pix2[6], pix3[6])); 259cabdff1aSopenharmony_ci s += abs(pix1[7] - avg2(pix2[7], pix3[7])); 260cabdff1aSopenharmony_ci s += abs(pix1[8] - avg2(pix2[8], pix3[8])); 261cabdff1aSopenharmony_ci s += abs(pix1[9] - avg2(pix2[9], pix3[9])); 262cabdff1aSopenharmony_ci s += abs(pix1[10] - avg2(pix2[10], pix3[10])); 263cabdff1aSopenharmony_ci s += abs(pix1[11] - avg2(pix2[11], pix3[11])); 264cabdff1aSopenharmony_ci s += abs(pix1[12] - avg2(pix2[12], pix3[12])); 265cabdff1aSopenharmony_ci s += abs(pix1[13] - avg2(pix2[13], pix3[13])); 266cabdff1aSopenharmony_ci s += abs(pix1[14] - avg2(pix2[14], pix3[14])); 267cabdff1aSopenharmony_ci s += abs(pix1[15] - avg2(pix2[15], pix3[15])); 268cabdff1aSopenharmony_ci pix1 += stride; 269cabdff1aSopenharmony_ci pix2 += stride; 270cabdff1aSopenharmony_ci pix3 += stride; 271cabdff1aSopenharmony_ci } 272cabdff1aSopenharmony_ci return s; 273cabdff1aSopenharmony_ci} 274cabdff1aSopenharmony_ci 275cabdff1aSopenharmony_cistatic int pix_abs16_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 276cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 277cabdff1aSopenharmony_ci{ 278cabdff1aSopenharmony_ci int s = 0, i; 279cabdff1aSopenharmony_ci uint8_t *pix3 = pix2 + stride; 280cabdff1aSopenharmony_ci 281cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 282cabdff1aSopenharmony_ci s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); 283cabdff1aSopenharmony_ci s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); 284cabdff1aSopenharmony_ci s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); 285cabdff1aSopenharmony_ci s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); 286cabdff1aSopenharmony_ci s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); 287cabdff1aSopenharmony_ci s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); 288cabdff1aSopenharmony_ci s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); 289cabdff1aSopenharmony_ci s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); 290cabdff1aSopenharmony_ci s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); 291cabdff1aSopenharmony_ci s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); 292cabdff1aSopenharmony_ci s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); 293cabdff1aSopenharmony_ci s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); 294cabdff1aSopenharmony_ci s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); 295cabdff1aSopenharmony_ci s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); 296cabdff1aSopenharmony_ci s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); 297cabdff1aSopenharmony_ci s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); 298cabdff1aSopenharmony_ci pix1 += stride; 299cabdff1aSopenharmony_ci pix2 += stride; 300cabdff1aSopenharmony_ci pix3 += stride; 301cabdff1aSopenharmony_ci } 302cabdff1aSopenharmony_ci return s; 303cabdff1aSopenharmony_ci} 304cabdff1aSopenharmony_ci 305cabdff1aSopenharmony_cistatic inline int pix_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 306cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 307cabdff1aSopenharmony_ci{ 308cabdff1aSopenharmony_ci int s = 0, i; 309cabdff1aSopenharmony_ci 310cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 311cabdff1aSopenharmony_ci s += abs(pix1[0] - pix2[0]); 312cabdff1aSopenharmony_ci s += abs(pix1[1] - pix2[1]); 313cabdff1aSopenharmony_ci s += abs(pix1[2] - pix2[2]); 314cabdff1aSopenharmony_ci s += abs(pix1[3] - pix2[3]); 315cabdff1aSopenharmony_ci s += abs(pix1[4] - pix2[4]); 316cabdff1aSopenharmony_ci s += abs(pix1[5] - pix2[5]); 317cabdff1aSopenharmony_ci s += abs(pix1[6] - pix2[6]); 318cabdff1aSopenharmony_ci s += abs(pix1[7] - pix2[7]); 319cabdff1aSopenharmony_ci pix1 += stride; 320cabdff1aSopenharmony_ci pix2 += stride; 321cabdff1aSopenharmony_ci } 322cabdff1aSopenharmony_ci return s; 323cabdff1aSopenharmony_ci} 324cabdff1aSopenharmony_ci 325cabdff1aSopenharmony_cistatic inline int pix_median_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 326cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 327cabdff1aSopenharmony_ci{ 328cabdff1aSopenharmony_ci int s = 0, i, j; 329cabdff1aSopenharmony_ci 330cabdff1aSopenharmony_ci#define V(x) (pix1[x] - pix2[x]) 331cabdff1aSopenharmony_ci 332cabdff1aSopenharmony_ci s += abs(V(0)); 333cabdff1aSopenharmony_ci s += abs(V(1) - V(0)); 334cabdff1aSopenharmony_ci s += abs(V(2) - V(1)); 335cabdff1aSopenharmony_ci s += abs(V(3) - V(2)); 336cabdff1aSopenharmony_ci s += abs(V(4) - V(3)); 337cabdff1aSopenharmony_ci s += abs(V(5) - V(4)); 338cabdff1aSopenharmony_ci s += abs(V(6) - V(5)); 339cabdff1aSopenharmony_ci s += abs(V(7) - V(6)); 340cabdff1aSopenharmony_ci 341cabdff1aSopenharmony_ci pix1 += stride; 342cabdff1aSopenharmony_ci pix2 += stride; 343cabdff1aSopenharmony_ci 344cabdff1aSopenharmony_ci for (i = 1; i < h; i++) { 345cabdff1aSopenharmony_ci s += abs(V(0) - V(-stride)); 346cabdff1aSopenharmony_ci for (j = 1; j < 8; j++) 347cabdff1aSopenharmony_ci s += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1))); 348cabdff1aSopenharmony_ci pix1 += stride; 349cabdff1aSopenharmony_ci pix2 += stride; 350cabdff1aSopenharmony_ci 351cabdff1aSopenharmony_ci } 352cabdff1aSopenharmony_ci#undef V 353cabdff1aSopenharmony_ci return s; 354cabdff1aSopenharmony_ci} 355cabdff1aSopenharmony_ci 356cabdff1aSopenharmony_cistatic int pix_abs8_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 357cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 358cabdff1aSopenharmony_ci{ 359cabdff1aSopenharmony_ci int s = 0, i; 360cabdff1aSopenharmony_ci 361cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 362cabdff1aSopenharmony_ci s += abs(pix1[0] - avg2(pix2[0], pix2[1])); 363cabdff1aSopenharmony_ci s += abs(pix1[1] - avg2(pix2[1], pix2[2])); 364cabdff1aSopenharmony_ci s += abs(pix1[2] - avg2(pix2[2], pix2[3])); 365cabdff1aSopenharmony_ci s += abs(pix1[3] - avg2(pix2[3], pix2[4])); 366cabdff1aSopenharmony_ci s += abs(pix1[4] - avg2(pix2[4], pix2[5])); 367cabdff1aSopenharmony_ci s += abs(pix1[5] - avg2(pix2[5], pix2[6])); 368cabdff1aSopenharmony_ci s += abs(pix1[6] - avg2(pix2[6], pix2[7])); 369cabdff1aSopenharmony_ci s += abs(pix1[7] - avg2(pix2[7], pix2[8])); 370cabdff1aSopenharmony_ci pix1 += stride; 371cabdff1aSopenharmony_ci pix2 += stride; 372cabdff1aSopenharmony_ci } 373cabdff1aSopenharmony_ci return s; 374cabdff1aSopenharmony_ci} 375cabdff1aSopenharmony_ci 376cabdff1aSopenharmony_cistatic int pix_abs8_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 377cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 378cabdff1aSopenharmony_ci{ 379cabdff1aSopenharmony_ci int s = 0, i; 380cabdff1aSopenharmony_ci uint8_t *pix3 = pix2 + stride; 381cabdff1aSopenharmony_ci 382cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 383cabdff1aSopenharmony_ci s += abs(pix1[0] - avg2(pix2[0], pix3[0])); 384cabdff1aSopenharmony_ci s += abs(pix1[1] - avg2(pix2[1], pix3[1])); 385cabdff1aSopenharmony_ci s += abs(pix1[2] - avg2(pix2[2], pix3[2])); 386cabdff1aSopenharmony_ci s += abs(pix1[3] - avg2(pix2[3], pix3[3])); 387cabdff1aSopenharmony_ci s += abs(pix1[4] - avg2(pix2[4], pix3[4])); 388cabdff1aSopenharmony_ci s += abs(pix1[5] - avg2(pix2[5], pix3[5])); 389cabdff1aSopenharmony_ci s += abs(pix1[6] - avg2(pix2[6], pix3[6])); 390cabdff1aSopenharmony_ci s += abs(pix1[7] - avg2(pix2[7], pix3[7])); 391cabdff1aSopenharmony_ci pix1 += stride; 392cabdff1aSopenharmony_ci pix2 += stride; 393cabdff1aSopenharmony_ci pix3 += stride; 394cabdff1aSopenharmony_ci } 395cabdff1aSopenharmony_ci return s; 396cabdff1aSopenharmony_ci} 397cabdff1aSopenharmony_ci 398cabdff1aSopenharmony_cistatic int pix_abs8_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 399cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 400cabdff1aSopenharmony_ci{ 401cabdff1aSopenharmony_ci int s = 0, i; 402cabdff1aSopenharmony_ci uint8_t *pix3 = pix2 + stride; 403cabdff1aSopenharmony_ci 404cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 405cabdff1aSopenharmony_ci s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); 406cabdff1aSopenharmony_ci s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); 407cabdff1aSopenharmony_ci s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); 408cabdff1aSopenharmony_ci s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); 409cabdff1aSopenharmony_ci s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); 410cabdff1aSopenharmony_ci s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); 411cabdff1aSopenharmony_ci s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); 412cabdff1aSopenharmony_ci s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); 413cabdff1aSopenharmony_ci pix1 += stride; 414cabdff1aSopenharmony_ci pix2 += stride; 415cabdff1aSopenharmony_ci pix3 += stride; 416cabdff1aSopenharmony_ci } 417cabdff1aSopenharmony_ci return s; 418cabdff1aSopenharmony_ci} 419cabdff1aSopenharmony_ci 420cabdff1aSopenharmony_cistatic int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, 421cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 422cabdff1aSopenharmony_ci{ 423cabdff1aSopenharmony_ci int score1 = 0, score2 = 0, x, y; 424cabdff1aSopenharmony_ci 425cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 426cabdff1aSopenharmony_ci for (x = 0; x < 16; x++) 427cabdff1aSopenharmony_ci score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]); 428cabdff1aSopenharmony_ci if (y + 1 < h) { 429cabdff1aSopenharmony_ci for (x = 0; x < 15; x++) 430cabdff1aSopenharmony_ci score2 += FFABS(s1[x] - s1[x + stride] - 431cabdff1aSopenharmony_ci s1[x + 1] + s1[x + stride + 1]) - 432cabdff1aSopenharmony_ci FFABS(s2[x] - s2[x + stride] - 433cabdff1aSopenharmony_ci s2[x + 1] + s2[x + stride + 1]); 434cabdff1aSopenharmony_ci } 435cabdff1aSopenharmony_ci s1 += stride; 436cabdff1aSopenharmony_ci s2 += stride; 437cabdff1aSopenharmony_ci } 438cabdff1aSopenharmony_ci 439cabdff1aSopenharmony_ci if (c) 440cabdff1aSopenharmony_ci return score1 + FFABS(score2) * c->avctx->nsse_weight; 441cabdff1aSopenharmony_ci else 442cabdff1aSopenharmony_ci return score1 + FFABS(score2) * 8; 443cabdff1aSopenharmony_ci} 444cabdff1aSopenharmony_ci 445cabdff1aSopenharmony_cistatic int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, 446cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 447cabdff1aSopenharmony_ci{ 448cabdff1aSopenharmony_ci int score1 = 0, score2 = 0, x, y; 449cabdff1aSopenharmony_ci 450cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { 451cabdff1aSopenharmony_ci for (x = 0; x < 8; x++) 452cabdff1aSopenharmony_ci score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]); 453cabdff1aSopenharmony_ci if (y + 1 < h) { 454cabdff1aSopenharmony_ci for (x = 0; x < 7; x++) 455cabdff1aSopenharmony_ci score2 += FFABS(s1[x] - s1[x + stride] - 456cabdff1aSopenharmony_ci s1[x + 1] + s1[x + stride + 1]) - 457cabdff1aSopenharmony_ci FFABS(s2[x] - s2[x + stride] - 458cabdff1aSopenharmony_ci s2[x + 1] + s2[x + stride + 1]); 459cabdff1aSopenharmony_ci } 460cabdff1aSopenharmony_ci s1 += stride; 461cabdff1aSopenharmony_ci s2 += stride; 462cabdff1aSopenharmony_ci } 463cabdff1aSopenharmony_ci 464cabdff1aSopenharmony_ci if (c) 465cabdff1aSopenharmony_ci return score1 + FFABS(score2) * c->avctx->nsse_weight; 466cabdff1aSopenharmony_ci else 467cabdff1aSopenharmony_ci return score1 + FFABS(score2) * 8; 468cabdff1aSopenharmony_ci} 469cabdff1aSopenharmony_ci 470cabdff1aSopenharmony_cistatic int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b, 471cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 472cabdff1aSopenharmony_ci{ 473cabdff1aSopenharmony_ci return 0; 474cabdff1aSopenharmony_ci} 475cabdff1aSopenharmony_ci 476cabdff1aSopenharmony_ciint ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type) 477cabdff1aSopenharmony_ci{ 478cabdff1aSopenharmony_ci int ret = 0; 479cabdff1aSopenharmony_ci int i; 480cabdff1aSopenharmony_ci 481cabdff1aSopenharmony_ci memset(cmp, 0, sizeof(void *) * 6); 482cabdff1aSopenharmony_ci 483cabdff1aSopenharmony_ci for (i = 0; i < 6; i++) { 484cabdff1aSopenharmony_ci switch (type & 0xFF) { 485cabdff1aSopenharmony_ci case FF_CMP_SAD: 486cabdff1aSopenharmony_ci cmp[i] = c->sad[i]; 487cabdff1aSopenharmony_ci break; 488cabdff1aSopenharmony_ci case FF_CMP_MEDIAN_SAD: 489cabdff1aSopenharmony_ci cmp[i] = c->median_sad[i]; 490cabdff1aSopenharmony_ci break; 491cabdff1aSopenharmony_ci case FF_CMP_SATD: 492cabdff1aSopenharmony_ci cmp[i] = c->hadamard8_diff[i]; 493cabdff1aSopenharmony_ci break; 494cabdff1aSopenharmony_ci case FF_CMP_SSE: 495cabdff1aSopenharmony_ci cmp[i] = c->sse[i]; 496cabdff1aSopenharmony_ci break; 497cabdff1aSopenharmony_ci case FF_CMP_DCT: 498cabdff1aSopenharmony_ci cmp[i] = c->dct_sad[i]; 499cabdff1aSopenharmony_ci break; 500cabdff1aSopenharmony_ci case FF_CMP_DCT264: 501cabdff1aSopenharmony_ci cmp[i] = c->dct264_sad[i]; 502cabdff1aSopenharmony_ci break; 503cabdff1aSopenharmony_ci case FF_CMP_DCTMAX: 504cabdff1aSopenharmony_ci cmp[i] = c->dct_max[i]; 505cabdff1aSopenharmony_ci break; 506cabdff1aSopenharmony_ci case FF_CMP_PSNR: 507cabdff1aSopenharmony_ci cmp[i] = c->quant_psnr[i]; 508cabdff1aSopenharmony_ci break; 509cabdff1aSopenharmony_ci case FF_CMP_BIT: 510cabdff1aSopenharmony_ci cmp[i] = c->bit[i]; 511cabdff1aSopenharmony_ci break; 512cabdff1aSopenharmony_ci case FF_CMP_RD: 513cabdff1aSopenharmony_ci cmp[i] = c->rd[i]; 514cabdff1aSopenharmony_ci break; 515cabdff1aSopenharmony_ci case FF_CMP_VSAD: 516cabdff1aSopenharmony_ci cmp[i] = c->vsad[i]; 517cabdff1aSopenharmony_ci break; 518cabdff1aSopenharmony_ci case FF_CMP_VSSE: 519cabdff1aSopenharmony_ci cmp[i] = c->vsse[i]; 520cabdff1aSopenharmony_ci break; 521cabdff1aSopenharmony_ci case FF_CMP_ZERO: 522cabdff1aSopenharmony_ci cmp[i] = zero_cmp; 523cabdff1aSopenharmony_ci break; 524cabdff1aSopenharmony_ci case FF_CMP_NSSE: 525cabdff1aSopenharmony_ci cmp[i] = c->nsse[i]; 526cabdff1aSopenharmony_ci break; 527cabdff1aSopenharmony_ci#if CONFIG_DWT 528cabdff1aSopenharmony_ci case FF_CMP_W53: 529cabdff1aSopenharmony_ci cmp[i]= c->w53[i]; 530cabdff1aSopenharmony_ci break; 531cabdff1aSopenharmony_ci case FF_CMP_W97: 532cabdff1aSopenharmony_ci cmp[i]= c->w97[i]; 533cabdff1aSopenharmony_ci break; 534cabdff1aSopenharmony_ci#endif 535cabdff1aSopenharmony_ci default: 536cabdff1aSopenharmony_ci av_log(NULL, AV_LOG_ERROR, 537cabdff1aSopenharmony_ci "invalid cmp function selection\n"); 538cabdff1aSopenharmony_ci ret = -1; 539cabdff1aSopenharmony_ci break; 540cabdff1aSopenharmony_ci } 541cabdff1aSopenharmony_ci } 542cabdff1aSopenharmony_ci 543cabdff1aSopenharmony_ci return ret; 544cabdff1aSopenharmony_ci} 545cabdff1aSopenharmony_ci 546cabdff1aSopenharmony_ci#define BUTTERFLY2(o1, o2, i1, i2) \ 547cabdff1aSopenharmony_ci o1 = (i1) + (i2); \ 548cabdff1aSopenharmony_ci o2 = (i1) - (i2); 549cabdff1aSopenharmony_ci 550cabdff1aSopenharmony_ci#define BUTTERFLY1(x, y) \ 551cabdff1aSopenharmony_ci { \ 552cabdff1aSopenharmony_ci int a, b; \ 553cabdff1aSopenharmony_ci a = x; \ 554cabdff1aSopenharmony_ci b = y; \ 555cabdff1aSopenharmony_ci x = a + b; \ 556cabdff1aSopenharmony_ci y = a - b; \ 557cabdff1aSopenharmony_ci } 558cabdff1aSopenharmony_ci 559cabdff1aSopenharmony_ci#define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y))) 560cabdff1aSopenharmony_ci 561cabdff1aSopenharmony_cistatic int hadamard8_diff8x8_c(MpegEncContext *s, uint8_t *dst, 562cabdff1aSopenharmony_ci uint8_t *src, ptrdiff_t stride, int h) 563cabdff1aSopenharmony_ci{ 564cabdff1aSopenharmony_ci int i, temp[64], sum = 0; 565cabdff1aSopenharmony_ci 566cabdff1aSopenharmony_ci av_assert2(h == 8); 567cabdff1aSopenharmony_ci 568cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 569cabdff1aSopenharmony_ci // FIXME: try pointer walks 570cabdff1aSopenharmony_ci BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1], 571cabdff1aSopenharmony_ci src[stride * i + 0] - dst[stride * i + 0], 572cabdff1aSopenharmony_ci src[stride * i + 1] - dst[stride * i + 1]); 573cabdff1aSopenharmony_ci BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3], 574cabdff1aSopenharmony_ci src[stride * i + 2] - dst[stride * i + 2], 575cabdff1aSopenharmony_ci src[stride * i + 3] - dst[stride * i + 3]); 576cabdff1aSopenharmony_ci BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5], 577cabdff1aSopenharmony_ci src[stride * i + 4] - dst[stride * i + 4], 578cabdff1aSopenharmony_ci src[stride * i + 5] - dst[stride * i + 5]); 579cabdff1aSopenharmony_ci BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7], 580cabdff1aSopenharmony_ci src[stride * i + 6] - dst[stride * i + 6], 581cabdff1aSopenharmony_ci src[stride * i + 7] - dst[stride * i + 7]); 582cabdff1aSopenharmony_ci 583cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]); 584cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]); 585cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]); 586cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]); 587cabdff1aSopenharmony_ci 588cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]); 589cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]); 590cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]); 591cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]); 592cabdff1aSopenharmony_ci } 593cabdff1aSopenharmony_ci 594cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 595cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]); 596cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]); 597cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]); 598cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]); 599cabdff1aSopenharmony_ci 600cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]); 601cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]); 602cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]); 603cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]); 604cabdff1aSopenharmony_ci 605cabdff1aSopenharmony_ci sum += BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) + 606cabdff1aSopenharmony_ci BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) + 607cabdff1aSopenharmony_ci BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) + 608cabdff1aSopenharmony_ci BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]); 609cabdff1aSopenharmony_ci } 610cabdff1aSopenharmony_ci return sum; 611cabdff1aSopenharmony_ci} 612cabdff1aSopenharmony_ci 613cabdff1aSopenharmony_cistatic int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src, 614cabdff1aSopenharmony_ci uint8_t *dummy, ptrdiff_t stride, int h) 615cabdff1aSopenharmony_ci{ 616cabdff1aSopenharmony_ci int i, temp[64], sum = 0; 617cabdff1aSopenharmony_ci 618cabdff1aSopenharmony_ci av_assert2(h == 8); 619cabdff1aSopenharmony_ci 620cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 621cabdff1aSopenharmony_ci // FIXME: try pointer walks 622cabdff1aSopenharmony_ci BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1], 623cabdff1aSopenharmony_ci src[stride * i + 0], src[stride * i + 1]); 624cabdff1aSopenharmony_ci BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3], 625cabdff1aSopenharmony_ci src[stride * i + 2], src[stride * i + 3]); 626cabdff1aSopenharmony_ci BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5], 627cabdff1aSopenharmony_ci src[stride * i + 4], src[stride * i + 5]); 628cabdff1aSopenharmony_ci BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7], 629cabdff1aSopenharmony_ci src[stride * i + 6], src[stride * i + 7]); 630cabdff1aSopenharmony_ci 631cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]); 632cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]); 633cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]); 634cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]); 635cabdff1aSopenharmony_ci 636cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]); 637cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]); 638cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]); 639cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]); 640cabdff1aSopenharmony_ci } 641cabdff1aSopenharmony_ci 642cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 643cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]); 644cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]); 645cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]); 646cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]); 647cabdff1aSopenharmony_ci 648cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]); 649cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]); 650cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]); 651cabdff1aSopenharmony_ci BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]); 652cabdff1aSopenharmony_ci 653cabdff1aSopenharmony_ci sum += 654cabdff1aSopenharmony_ci BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) 655cabdff1aSopenharmony_ci + BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) 656cabdff1aSopenharmony_ci + BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) 657cabdff1aSopenharmony_ci + BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]); 658cabdff1aSopenharmony_ci } 659cabdff1aSopenharmony_ci 660cabdff1aSopenharmony_ci sum -= FFABS(temp[8 * 0] + temp[8 * 4]); // -mean 661cabdff1aSopenharmony_ci 662cabdff1aSopenharmony_ci return sum; 663cabdff1aSopenharmony_ci} 664cabdff1aSopenharmony_ci 665cabdff1aSopenharmony_cistatic int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1, 666cabdff1aSopenharmony_ci uint8_t *src2, ptrdiff_t stride, int h) 667cabdff1aSopenharmony_ci{ 668cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, temp, [64]); 669cabdff1aSopenharmony_ci 670cabdff1aSopenharmony_ci av_assert2(h == 8); 671cabdff1aSopenharmony_ci 672cabdff1aSopenharmony_ci s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); 673cabdff1aSopenharmony_ci s->fdsp.fdct(temp); 674cabdff1aSopenharmony_ci return s->mecc.sum_abs_dctelem(temp); 675cabdff1aSopenharmony_ci} 676cabdff1aSopenharmony_ci 677cabdff1aSopenharmony_ci#if CONFIG_GPL 678cabdff1aSopenharmony_ci#define DCT8_1D \ 679cabdff1aSopenharmony_ci { \ 680cabdff1aSopenharmony_ci const int s07 = SRC(0) + SRC(7); \ 681cabdff1aSopenharmony_ci const int s16 = SRC(1) + SRC(6); \ 682cabdff1aSopenharmony_ci const int s25 = SRC(2) + SRC(5); \ 683cabdff1aSopenharmony_ci const int s34 = SRC(3) + SRC(4); \ 684cabdff1aSopenharmony_ci const int a0 = s07 + s34; \ 685cabdff1aSopenharmony_ci const int a1 = s16 + s25; \ 686cabdff1aSopenharmony_ci const int a2 = s07 - s34; \ 687cabdff1aSopenharmony_ci const int a3 = s16 - s25; \ 688cabdff1aSopenharmony_ci const int d07 = SRC(0) - SRC(7); \ 689cabdff1aSopenharmony_ci const int d16 = SRC(1) - SRC(6); \ 690cabdff1aSopenharmony_ci const int d25 = SRC(2) - SRC(5); \ 691cabdff1aSopenharmony_ci const int d34 = SRC(3) - SRC(4); \ 692cabdff1aSopenharmony_ci const int a4 = d16 + d25 + (d07 + (d07 >> 1)); \ 693cabdff1aSopenharmony_ci const int a5 = d07 - d34 - (d25 + (d25 >> 1)); \ 694cabdff1aSopenharmony_ci const int a6 = d07 + d34 - (d16 + (d16 >> 1)); \ 695cabdff1aSopenharmony_ci const int a7 = d16 - d25 + (d34 + (d34 >> 1)); \ 696cabdff1aSopenharmony_ci DST(0, a0 + a1); \ 697cabdff1aSopenharmony_ci DST(1, a4 + (a7 >> 2)); \ 698cabdff1aSopenharmony_ci DST(2, a2 + (a3 >> 1)); \ 699cabdff1aSopenharmony_ci DST(3, a5 + (a6 >> 2)); \ 700cabdff1aSopenharmony_ci DST(4, a0 - a1); \ 701cabdff1aSopenharmony_ci DST(5, a6 - (a5 >> 2)); \ 702cabdff1aSopenharmony_ci DST(6, (a2 >> 1) - a3); \ 703cabdff1aSopenharmony_ci DST(7, (a4 >> 2) - a7); \ 704cabdff1aSopenharmony_ci } 705cabdff1aSopenharmony_ci 706cabdff1aSopenharmony_cistatic int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1, 707cabdff1aSopenharmony_ci uint8_t *src2, ptrdiff_t stride, int h) 708cabdff1aSopenharmony_ci{ 709cabdff1aSopenharmony_ci int16_t dct[8][8]; 710cabdff1aSopenharmony_ci int i, sum = 0; 711cabdff1aSopenharmony_ci 712cabdff1aSopenharmony_ci s->pdsp.diff_pixels_unaligned(dct[0], src1, src2, stride); 713cabdff1aSopenharmony_ci 714cabdff1aSopenharmony_ci#define SRC(x) dct[i][x] 715cabdff1aSopenharmony_ci#define DST(x, v) dct[i][x] = v 716cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) 717cabdff1aSopenharmony_ci DCT8_1D 718cabdff1aSopenharmony_ci#undef SRC 719cabdff1aSopenharmony_ci#undef DST 720cabdff1aSopenharmony_ci 721cabdff1aSopenharmony_ci#define SRC(x) dct[x][i] 722cabdff1aSopenharmony_ci#define DST(x, v) sum += FFABS(v) 723cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) 724cabdff1aSopenharmony_ci DCT8_1D 725cabdff1aSopenharmony_ci#undef SRC 726cabdff1aSopenharmony_ci#undef DST 727cabdff1aSopenharmony_ci return sum; 728cabdff1aSopenharmony_ci} 729cabdff1aSopenharmony_ci#endif 730cabdff1aSopenharmony_ci 731cabdff1aSopenharmony_cistatic int dct_max8x8_c(MpegEncContext *s, uint8_t *src1, 732cabdff1aSopenharmony_ci uint8_t *src2, ptrdiff_t stride, int h) 733cabdff1aSopenharmony_ci{ 734cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, temp, [64]); 735cabdff1aSopenharmony_ci int sum = 0, i; 736cabdff1aSopenharmony_ci 737cabdff1aSopenharmony_ci av_assert2(h == 8); 738cabdff1aSopenharmony_ci 739cabdff1aSopenharmony_ci s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); 740cabdff1aSopenharmony_ci s->fdsp.fdct(temp); 741cabdff1aSopenharmony_ci 742cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 743cabdff1aSopenharmony_ci sum = FFMAX(sum, FFABS(temp[i])); 744cabdff1aSopenharmony_ci 745cabdff1aSopenharmony_ci return sum; 746cabdff1aSopenharmony_ci} 747cabdff1aSopenharmony_ci 748cabdff1aSopenharmony_cistatic int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, 749cabdff1aSopenharmony_ci uint8_t *src2, ptrdiff_t stride, int h) 750cabdff1aSopenharmony_ci{ 751cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, temp, [64 * 2]); 752cabdff1aSopenharmony_ci int16_t *const bak = temp + 64; 753cabdff1aSopenharmony_ci int sum = 0, i; 754cabdff1aSopenharmony_ci 755cabdff1aSopenharmony_ci av_assert2(h == 8); 756cabdff1aSopenharmony_ci s->mb_intra = 0; 757cabdff1aSopenharmony_ci 758cabdff1aSopenharmony_ci s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); 759cabdff1aSopenharmony_ci 760cabdff1aSopenharmony_ci memcpy(bak, temp, 64 * sizeof(int16_t)); 761cabdff1aSopenharmony_ci 762cabdff1aSopenharmony_ci s->block_last_index[0 /* FIXME */] = 763cabdff1aSopenharmony_ci s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); 764cabdff1aSopenharmony_ci s->dct_unquantize_inter(s, temp, 0, s->qscale); 765cabdff1aSopenharmony_ci ff_simple_idct_int16_8bit(temp); // FIXME 766cabdff1aSopenharmony_ci 767cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 768cabdff1aSopenharmony_ci sum += (temp[i] - bak[i]) * (temp[i] - bak[i]); 769cabdff1aSopenharmony_ci 770cabdff1aSopenharmony_ci return sum; 771cabdff1aSopenharmony_ci} 772cabdff1aSopenharmony_ci 773cabdff1aSopenharmony_cistatic int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, 774cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 775cabdff1aSopenharmony_ci{ 776cabdff1aSopenharmony_ci const uint8_t *scantable = s->intra_scantable.permutated; 777cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, temp, [64]); 778cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]); 779cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]); 780cabdff1aSopenharmony_ci int i, last, run, bits, level, distortion, start_i; 781cabdff1aSopenharmony_ci const int esc_length = s->ac_esc_length; 782cabdff1aSopenharmony_ci uint8_t *length, *last_length; 783cabdff1aSopenharmony_ci 784cabdff1aSopenharmony_ci av_assert2(h == 8); 785cabdff1aSopenharmony_ci 786cabdff1aSopenharmony_ci copy_block8(lsrc1, src1, 8, stride, 8); 787cabdff1aSopenharmony_ci copy_block8(lsrc2, src2, 8, stride, 8); 788cabdff1aSopenharmony_ci 789cabdff1aSopenharmony_ci s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8); 790cabdff1aSopenharmony_ci 791cabdff1aSopenharmony_ci s->block_last_index[0 /* FIXME */] = 792cabdff1aSopenharmony_ci last = 793cabdff1aSopenharmony_ci s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); 794cabdff1aSopenharmony_ci 795cabdff1aSopenharmony_ci bits = 0; 796cabdff1aSopenharmony_ci 797cabdff1aSopenharmony_ci if (s->mb_intra) { 798cabdff1aSopenharmony_ci start_i = 1; 799cabdff1aSopenharmony_ci length = s->intra_ac_vlc_length; 800cabdff1aSopenharmony_ci last_length = s->intra_ac_vlc_last_length; 801cabdff1aSopenharmony_ci bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma 802cabdff1aSopenharmony_ci } else { 803cabdff1aSopenharmony_ci start_i = 0; 804cabdff1aSopenharmony_ci length = s->inter_ac_vlc_length; 805cabdff1aSopenharmony_ci last_length = s->inter_ac_vlc_last_length; 806cabdff1aSopenharmony_ci } 807cabdff1aSopenharmony_ci 808cabdff1aSopenharmony_ci if (last >= start_i) { 809cabdff1aSopenharmony_ci run = 0; 810cabdff1aSopenharmony_ci for (i = start_i; i < last; i++) { 811cabdff1aSopenharmony_ci int j = scantable[i]; 812cabdff1aSopenharmony_ci level = temp[j]; 813cabdff1aSopenharmony_ci 814cabdff1aSopenharmony_ci if (level) { 815cabdff1aSopenharmony_ci level += 64; 816cabdff1aSopenharmony_ci if ((level & (~127)) == 0) 817cabdff1aSopenharmony_ci bits += length[UNI_AC_ENC_INDEX(run, level)]; 818cabdff1aSopenharmony_ci else 819cabdff1aSopenharmony_ci bits += esc_length; 820cabdff1aSopenharmony_ci run = 0; 821cabdff1aSopenharmony_ci } else 822cabdff1aSopenharmony_ci run++; 823cabdff1aSopenharmony_ci } 824cabdff1aSopenharmony_ci i = scantable[last]; 825cabdff1aSopenharmony_ci 826cabdff1aSopenharmony_ci level = temp[i] + 64; 827cabdff1aSopenharmony_ci 828cabdff1aSopenharmony_ci av_assert2(level - 64); 829cabdff1aSopenharmony_ci 830cabdff1aSopenharmony_ci if ((level & (~127)) == 0) { 831cabdff1aSopenharmony_ci bits += last_length[UNI_AC_ENC_INDEX(run, level)]; 832cabdff1aSopenharmony_ci } else 833cabdff1aSopenharmony_ci bits += esc_length; 834cabdff1aSopenharmony_ci } 835cabdff1aSopenharmony_ci 836cabdff1aSopenharmony_ci if (last >= 0) { 837cabdff1aSopenharmony_ci if (s->mb_intra) 838cabdff1aSopenharmony_ci s->dct_unquantize_intra(s, temp, 0, s->qscale); 839cabdff1aSopenharmony_ci else 840cabdff1aSopenharmony_ci s->dct_unquantize_inter(s, temp, 0, s->qscale); 841cabdff1aSopenharmony_ci } 842cabdff1aSopenharmony_ci 843cabdff1aSopenharmony_ci s->idsp.idct_add(lsrc2, 8, temp); 844cabdff1aSopenharmony_ci 845cabdff1aSopenharmony_ci distortion = s->mecc.sse[1](NULL, lsrc2, lsrc1, 8, 8); 846cabdff1aSopenharmony_ci 847cabdff1aSopenharmony_ci return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7); 848cabdff1aSopenharmony_ci} 849cabdff1aSopenharmony_ci 850cabdff1aSopenharmony_cistatic int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, 851cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 852cabdff1aSopenharmony_ci{ 853cabdff1aSopenharmony_ci const uint8_t *scantable = s->intra_scantable.permutated; 854cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, temp, [64]); 855cabdff1aSopenharmony_ci int i, last, run, bits, level, start_i; 856cabdff1aSopenharmony_ci const int esc_length = s->ac_esc_length; 857cabdff1aSopenharmony_ci uint8_t *length, *last_length; 858cabdff1aSopenharmony_ci 859cabdff1aSopenharmony_ci av_assert2(h == 8); 860cabdff1aSopenharmony_ci 861cabdff1aSopenharmony_ci s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); 862cabdff1aSopenharmony_ci 863cabdff1aSopenharmony_ci s->block_last_index[0 /* FIXME */] = 864cabdff1aSopenharmony_ci last = 865cabdff1aSopenharmony_ci s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); 866cabdff1aSopenharmony_ci 867cabdff1aSopenharmony_ci bits = 0; 868cabdff1aSopenharmony_ci 869cabdff1aSopenharmony_ci if (s->mb_intra) { 870cabdff1aSopenharmony_ci start_i = 1; 871cabdff1aSopenharmony_ci length = s->intra_ac_vlc_length; 872cabdff1aSopenharmony_ci last_length = s->intra_ac_vlc_last_length; 873cabdff1aSopenharmony_ci bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma 874cabdff1aSopenharmony_ci } else { 875cabdff1aSopenharmony_ci start_i = 0; 876cabdff1aSopenharmony_ci length = s->inter_ac_vlc_length; 877cabdff1aSopenharmony_ci last_length = s->inter_ac_vlc_last_length; 878cabdff1aSopenharmony_ci } 879cabdff1aSopenharmony_ci 880cabdff1aSopenharmony_ci if (last >= start_i) { 881cabdff1aSopenharmony_ci run = 0; 882cabdff1aSopenharmony_ci for (i = start_i; i < last; i++) { 883cabdff1aSopenharmony_ci int j = scantable[i]; 884cabdff1aSopenharmony_ci level = temp[j]; 885cabdff1aSopenharmony_ci 886cabdff1aSopenharmony_ci if (level) { 887cabdff1aSopenharmony_ci level += 64; 888cabdff1aSopenharmony_ci if ((level & (~127)) == 0) 889cabdff1aSopenharmony_ci bits += length[UNI_AC_ENC_INDEX(run, level)]; 890cabdff1aSopenharmony_ci else 891cabdff1aSopenharmony_ci bits += esc_length; 892cabdff1aSopenharmony_ci run = 0; 893cabdff1aSopenharmony_ci } else 894cabdff1aSopenharmony_ci run++; 895cabdff1aSopenharmony_ci } 896cabdff1aSopenharmony_ci i = scantable[last]; 897cabdff1aSopenharmony_ci 898cabdff1aSopenharmony_ci level = temp[i] + 64; 899cabdff1aSopenharmony_ci 900cabdff1aSopenharmony_ci av_assert2(level - 64); 901cabdff1aSopenharmony_ci 902cabdff1aSopenharmony_ci if ((level & (~127)) == 0) 903cabdff1aSopenharmony_ci bits += last_length[UNI_AC_ENC_INDEX(run, level)]; 904cabdff1aSopenharmony_ci else 905cabdff1aSopenharmony_ci bits += esc_length; 906cabdff1aSopenharmony_ci } 907cabdff1aSopenharmony_ci 908cabdff1aSopenharmony_ci return bits; 909cabdff1aSopenharmony_ci} 910cabdff1aSopenharmony_ci 911cabdff1aSopenharmony_ci#define VSAD_INTRA(size) \ 912cabdff1aSopenharmony_cistatic int vsad_intra ## size ## _c(MpegEncContext *c, \ 913cabdff1aSopenharmony_ci uint8_t *s, uint8_t *dummy, \ 914cabdff1aSopenharmony_ci ptrdiff_t stride, int h) \ 915cabdff1aSopenharmony_ci{ \ 916cabdff1aSopenharmony_ci int score = 0, x, y; \ 917cabdff1aSopenharmony_ci \ 918cabdff1aSopenharmony_ci for (y = 1; y < h; y++) { \ 919cabdff1aSopenharmony_ci for (x = 0; x < size; x += 4) { \ 920cabdff1aSopenharmony_ci score += FFABS(s[x] - s[x + stride]) + \ 921cabdff1aSopenharmony_ci FFABS(s[x + 1] - s[x + stride + 1]) + \ 922cabdff1aSopenharmony_ci FFABS(s[x + 2] - s[x + 2 + stride]) + \ 923cabdff1aSopenharmony_ci FFABS(s[x + 3] - s[x + 3 + stride]); \ 924cabdff1aSopenharmony_ci } \ 925cabdff1aSopenharmony_ci s += stride; \ 926cabdff1aSopenharmony_ci } \ 927cabdff1aSopenharmony_ci \ 928cabdff1aSopenharmony_ci return score; \ 929cabdff1aSopenharmony_ci} 930cabdff1aSopenharmony_ciVSAD_INTRA(8) 931cabdff1aSopenharmony_ciVSAD_INTRA(16) 932cabdff1aSopenharmony_ci 933cabdff1aSopenharmony_ci#define VSAD(size) \ 934cabdff1aSopenharmony_cistatic int vsad ## size ## _c(MpegEncContext *c, \ 935cabdff1aSopenharmony_ci uint8_t *s1, uint8_t *s2, \ 936cabdff1aSopenharmony_ci ptrdiff_t stride, int h) \ 937cabdff1aSopenharmony_ci{ \ 938cabdff1aSopenharmony_ci int score = 0, x, y; \ 939cabdff1aSopenharmony_ci \ 940cabdff1aSopenharmony_ci for (y = 1; y < h; y++) { \ 941cabdff1aSopenharmony_ci for (x = 0; x < size; x++) \ 942cabdff1aSopenharmony_ci score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \ 943cabdff1aSopenharmony_ci s1 += stride; \ 944cabdff1aSopenharmony_ci s2 += stride; \ 945cabdff1aSopenharmony_ci } \ 946cabdff1aSopenharmony_ci \ 947cabdff1aSopenharmony_ci return score; \ 948cabdff1aSopenharmony_ci} 949cabdff1aSopenharmony_ciVSAD(8) 950cabdff1aSopenharmony_ciVSAD(16) 951cabdff1aSopenharmony_ci 952cabdff1aSopenharmony_ci#define SQ(a) ((a) * (a)) 953cabdff1aSopenharmony_ci#define VSSE_INTRA(size) \ 954cabdff1aSopenharmony_cistatic int vsse_intra ## size ## _c(MpegEncContext *c, \ 955cabdff1aSopenharmony_ci uint8_t *s, uint8_t *dummy, \ 956cabdff1aSopenharmony_ci ptrdiff_t stride, int h) \ 957cabdff1aSopenharmony_ci{ \ 958cabdff1aSopenharmony_ci int score = 0, x, y; \ 959cabdff1aSopenharmony_ci \ 960cabdff1aSopenharmony_ci for (y = 1; y < h; y++) { \ 961cabdff1aSopenharmony_ci for (x = 0; x < size; x += 4) { \ 962cabdff1aSopenharmony_ci score += SQ(s[x] - s[x + stride]) + \ 963cabdff1aSopenharmony_ci SQ(s[x + 1] - s[x + stride + 1]) + \ 964cabdff1aSopenharmony_ci SQ(s[x + 2] - s[x + stride + 2]) + \ 965cabdff1aSopenharmony_ci SQ(s[x + 3] - s[x + stride + 3]); \ 966cabdff1aSopenharmony_ci } \ 967cabdff1aSopenharmony_ci s += stride; \ 968cabdff1aSopenharmony_ci } \ 969cabdff1aSopenharmony_ci \ 970cabdff1aSopenharmony_ci return score; \ 971cabdff1aSopenharmony_ci} 972cabdff1aSopenharmony_ciVSSE_INTRA(8) 973cabdff1aSopenharmony_ciVSSE_INTRA(16) 974cabdff1aSopenharmony_ci 975cabdff1aSopenharmony_ci#define VSSE(size) \ 976cabdff1aSopenharmony_cistatic int vsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, \ 977cabdff1aSopenharmony_ci ptrdiff_t stride, int h) \ 978cabdff1aSopenharmony_ci{ \ 979cabdff1aSopenharmony_ci int score = 0, x, y; \ 980cabdff1aSopenharmony_ci \ 981cabdff1aSopenharmony_ci for (y = 1; y < h; y++) { \ 982cabdff1aSopenharmony_ci for (x = 0; x < size; x++) \ 983cabdff1aSopenharmony_ci score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \ 984cabdff1aSopenharmony_ci s1 += stride; \ 985cabdff1aSopenharmony_ci s2 += stride; \ 986cabdff1aSopenharmony_ci } \ 987cabdff1aSopenharmony_ci \ 988cabdff1aSopenharmony_ci return score; \ 989cabdff1aSopenharmony_ci} 990cabdff1aSopenharmony_ciVSSE(8) 991cabdff1aSopenharmony_ciVSSE(16) 992cabdff1aSopenharmony_ci 993cabdff1aSopenharmony_ci#define WRAPPER8_16_SQ(name8, name16) \ 994cabdff1aSopenharmony_cistatic int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \ 995cabdff1aSopenharmony_ci ptrdiff_t stride, int h) \ 996cabdff1aSopenharmony_ci{ \ 997cabdff1aSopenharmony_ci int score = 0; \ 998cabdff1aSopenharmony_ci \ 999cabdff1aSopenharmony_ci score += name8(s, dst, src, stride, 8); \ 1000cabdff1aSopenharmony_ci score += name8(s, dst + 8, src + 8, stride, 8); \ 1001cabdff1aSopenharmony_ci if (h == 16) { \ 1002cabdff1aSopenharmony_ci dst += 8 * stride; \ 1003cabdff1aSopenharmony_ci src += 8 * stride; \ 1004cabdff1aSopenharmony_ci score += name8(s, dst, src, stride, 8); \ 1005cabdff1aSopenharmony_ci score += name8(s, dst + 8, src + 8, stride, 8); \ 1006cabdff1aSopenharmony_ci } \ 1007cabdff1aSopenharmony_ci return score; \ 1008cabdff1aSopenharmony_ci} 1009cabdff1aSopenharmony_ci 1010cabdff1aSopenharmony_ciWRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) 1011cabdff1aSopenharmony_ciWRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) 1012cabdff1aSopenharmony_ciWRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) 1013cabdff1aSopenharmony_ci#if CONFIG_GPL 1014cabdff1aSopenharmony_ciWRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) 1015cabdff1aSopenharmony_ci#endif 1016cabdff1aSopenharmony_ciWRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) 1017cabdff1aSopenharmony_ciWRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) 1018cabdff1aSopenharmony_ciWRAPPER8_16_SQ(rd8x8_c, rd16_c) 1019cabdff1aSopenharmony_ciWRAPPER8_16_SQ(bit8x8_c, bit16_c) 1020cabdff1aSopenharmony_ci 1021cabdff1aSopenharmony_ciav_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx) 1022cabdff1aSopenharmony_ci{ 1023cabdff1aSopenharmony_ci c->sum_abs_dctelem = sum_abs_dctelem_c; 1024cabdff1aSopenharmony_ci 1025cabdff1aSopenharmony_ci /* TODO [0] 16 [1] 8 */ 1026cabdff1aSopenharmony_ci c->pix_abs[0][0] = pix_abs16_c; 1027cabdff1aSopenharmony_ci c->pix_abs[0][1] = pix_abs16_x2_c; 1028cabdff1aSopenharmony_ci c->pix_abs[0][2] = pix_abs16_y2_c; 1029cabdff1aSopenharmony_ci c->pix_abs[0][3] = pix_abs16_xy2_c; 1030cabdff1aSopenharmony_ci c->pix_abs[1][0] = pix_abs8_c; 1031cabdff1aSopenharmony_ci c->pix_abs[1][1] = pix_abs8_x2_c; 1032cabdff1aSopenharmony_ci c->pix_abs[1][2] = pix_abs8_y2_c; 1033cabdff1aSopenharmony_ci c->pix_abs[1][3] = pix_abs8_xy2_c; 1034cabdff1aSopenharmony_ci 1035cabdff1aSopenharmony_ci#define SET_CMP_FUNC(name) \ 1036cabdff1aSopenharmony_ci c->name[0] = name ## 16_c; \ 1037cabdff1aSopenharmony_ci c->name[1] = name ## 8x8_c; 1038cabdff1aSopenharmony_ci 1039cabdff1aSopenharmony_ci SET_CMP_FUNC(hadamard8_diff) 1040cabdff1aSopenharmony_ci c->hadamard8_diff[4] = hadamard8_intra16_c; 1041cabdff1aSopenharmony_ci c->hadamard8_diff[5] = hadamard8_intra8x8_c; 1042cabdff1aSopenharmony_ci SET_CMP_FUNC(dct_sad) 1043cabdff1aSopenharmony_ci SET_CMP_FUNC(dct_max) 1044cabdff1aSopenharmony_ci#if CONFIG_GPL 1045cabdff1aSopenharmony_ci SET_CMP_FUNC(dct264_sad) 1046cabdff1aSopenharmony_ci#endif 1047cabdff1aSopenharmony_ci c->sad[0] = pix_abs16_c; 1048cabdff1aSopenharmony_ci c->sad[1] = pix_abs8_c; 1049cabdff1aSopenharmony_ci c->sse[0] = sse16_c; 1050cabdff1aSopenharmony_ci c->sse[1] = sse8_c; 1051cabdff1aSopenharmony_ci c->sse[2] = sse4_c; 1052cabdff1aSopenharmony_ci SET_CMP_FUNC(quant_psnr) 1053cabdff1aSopenharmony_ci SET_CMP_FUNC(rd) 1054cabdff1aSopenharmony_ci SET_CMP_FUNC(bit) 1055cabdff1aSopenharmony_ci c->vsad[0] = vsad16_c; 1056cabdff1aSopenharmony_ci c->vsad[1] = vsad8_c; 1057cabdff1aSopenharmony_ci c->vsad[4] = vsad_intra16_c; 1058cabdff1aSopenharmony_ci c->vsad[5] = vsad_intra8_c; 1059cabdff1aSopenharmony_ci c->vsse[0] = vsse16_c; 1060cabdff1aSopenharmony_ci c->vsse[1] = vsse8_c; 1061cabdff1aSopenharmony_ci c->vsse[4] = vsse_intra16_c; 1062cabdff1aSopenharmony_ci c->vsse[5] = vsse_intra8_c; 1063cabdff1aSopenharmony_ci c->nsse[0] = nsse16_c; 1064cabdff1aSopenharmony_ci c->nsse[1] = nsse8_c; 1065cabdff1aSopenharmony_ci#if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER 1066cabdff1aSopenharmony_ci ff_dsputil_init_dwt(c); 1067cabdff1aSopenharmony_ci#endif 1068cabdff1aSopenharmony_ci 1069cabdff1aSopenharmony_ci#if ARCH_AARCH64 1070cabdff1aSopenharmony_ci ff_me_cmp_init_aarch64(c, avctx); 1071cabdff1aSopenharmony_ci#elif ARCH_ALPHA 1072cabdff1aSopenharmony_ci ff_me_cmp_init_alpha(c, avctx); 1073cabdff1aSopenharmony_ci#elif ARCH_ARM 1074cabdff1aSopenharmony_ci ff_me_cmp_init_arm(c, avctx); 1075cabdff1aSopenharmony_ci#elif ARCH_PPC 1076cabdff1aSopenharmony_ci ff_me_cmp_init_ppc(c, avctx); 1077cabdff1aSopenharmony_ci#elif ARCH_X86 1078cabdff1aSopenharmony_ci ff_me_cmp_init_x86(c, avctx); 1079cabdff1aSopenharmony_ci#elif ARCH_MIPS 1080cabdff1aSopenharmony_ci ff_me_cmp_init_mips(c, avctx); 1081cabdff1aSopenharmony_ci#endif 1082cabdff1aSopenharmony_ci 1083cabdff1aSopenharmony_ci c->median_sad[0] = pix_median_abs16_c; 1084cabdff1aSopenharmony_ci c->median_sad[1] = pix_median_abs8_c; 1085cabdff1aSopenharmony_ci} 1086