1/* 2 * Copyright (C) 2016 foo86 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include <stdlib.h> 22 23#include "dcadct.h" 24#include "dcamath.h" 25 26static void sum_a(const int *input, int *output, int len) 27{ 28 int i; 29 30 for (i = 0; i < len; i++) 31 output[i] = input[2 * i] + input[2 * i + 1]; 32} 33 34static void sum_b(const int *input, int *output, int len) 35{ 36 int i; 37 38 output[0] = input[0]; 39 for (i = 1; i < len; i++) 40 output[i] = input[2 * i] + input[2 * i - 1]; 41} 42 43static void sum_c(const int *input, int *output, int len) 44{ 45 int i; 46 47 for (i = 0; i < len; i++) 48 output[i] = input[2 * i]; 49} 50 51static void sum_d(const int *input, int *output, int len) 52{ 53 int i; 54 55 output[0] = input[1]; 56 for (i = 1; i < len; i++) 57 output[i] = input[2 * i - 1] + input[2 * i + 1]; 58} 59 60static void dct_a(const int *input, int *output) 61{ 62 static const int cos_mod[8][8] = { 63 { 8348215, 8027397, 7398092, 6484482, 5321677, 3954362, 2435084, 822227 }, 64 { 8027397, 5321677, 822227, -3954362, -7398092, -8348215, -6484482, -2435084 }, 65 { 7398092, 822227, -6484482, -8027397, -2435084, 5321677, 8348215, 3954362 }, 66 { 6484482, -3954362, -8027397, 822227, 8348215, 2435084, -7398092, -5321677 }, 67 { 5321677, -7398092, -2435084, 8348215, -822227, -8027397, 3954362, 6484482 }, 68 { 3954362, -8348215, 5321677, 2435084, -8027397, 6484482, 822227, -7398092 }, 69 { 2435084, -6484482, 8348215, -7398092, 3954362, 822227, -5321677, 8027397 }, 70 { 822227, -2435084, 3954362, -5321677, 6484482, -7398092, 8027397, -8348215 } 71 }; 72 73 int i, j; 74 75 for (i = 0; i < 8; i++) { 76 int64_t res = 0; 77 for (j = 0; j < 8; j++) 78 res += (int64_t)cos_mod[i][j] * input[j]; 79 output[i] = norm23(res); 80 } 81} 82 83static void dct_b(const int *input, int *output) 84{ 85 static const int cos_mod[8][7] = { 86 { 8227423, 7750063, 6974873, 5931642, 4660461, 3210181, 1636536 }, 87 { 6974873, 3210181, -1636536, -5931642, -8227423, -7750063, -4660461 }, 88 { 4660461, -3210181, -8227423, -5931642, 1636536, 7750063, 6974873 }, 89 { 1636536, -7750063, -4660461, 5931642, 6974873, -3210181, -8227423 }, 90 { -1636536, -7750063, 4660461, 5931642, -6974873, -3210181, 8227423 }, 91 { -4660461, -3210181, 8227423, -5931642, -1636536, 7750063, -6974873 }, 92 { -6974873, 3210181, 1636536, -5931642, 8227423, -7750063, 4660461 }, 93 { -8227423, 7750063, -6974873, 5931642, -4660461, 3210181, -1636536 } 94 }; 95 96 int i, j; 97 98 for (i = 0; i < 8; i++) { 99 int64_t res = input[0] * (INT64_C(1) << 23); 100 for (j = 0; j < 7; j++) 101 res += (int64_t)cos_mod[i][j] * input[1 + j]; 102 output[i] = norm23(res); 103 } 104} 105 106static void mod_a(const int *input, int *output) 107{ 108 static const int cos_mod[16] = { 109 4199362, 4240198, 4323885, 4454708, 110 4639772, 4890013, 5221943, 5660703, 111 -6245623, -7040975, -8158494, -9809974, 112 -12450076, -17261920, -28585092, -85479984 113 }; 114 115 int i, k; 116 117 for (i = 0; i < 8; i++) 118 output[i] = mul23(cos_mod[i], input[i] + input[8 + i]); 119 120 for (i = 8, k = 7; i < 16; i++, k--) 121 output[i] = mul23(cos_mod[i], input[k] - input[8 + k]); 122} 123 124static void mod_b(int *input, int *output) 125{ 126 static const int cos_mod[8] = { 127 4214598, 4383036, 4755871, 5425934, 128 6611520, 8897610, 14448934, 42791536 129 }; 130 131 int i, k; 132 133 for (i = 0; i < 8; i++) 134 input[8 + i] = mul23(cos_mod[i], input[8 + i]); 135 136 for (i = 0; i < 8; i++) 137 output[i] = input[i] + input[8 + i]; 138 139 for (i = 8, k = 7; i < 16; i++, k--) 140 output[i] = input[k] - input[8 + k]; 141} 142 143static void mod_c(const int *input, int *output) 144{ 145 static const int cos_mod[32] = { 146 1048892, 1051425, 1056522, 1064244, 147 1074689, 1087987, 1104313, 1123884, 148 1146975, 1173922, 1205139, 1241133, 149 1282529, 1330095, 1384791, 1447815, 150 -1520688, -1605358, -1704360, -1821051, 151 -1959964, -2127368, -2332183, -2587535, 152 -2913561, -3342802, -3931480, -4785806, 153 -6133390, -8566050, -14253820, -42727120 154 }; 155 156 int i, k; 157 158 for (i = 0; i < 16; i++) 159 output[i] = mul23(cos_mod[i], input[i] + input[16 + i]); 160 161 for (i = 16, k = 15; i < 32; i++, k--) 162 output[i] = mul23(cos_mod[i], input[k] - input[16 + k]); 163} 164 165static void clp_v(int *input, int len) 166{ 167 int i; 168 169 for (i = 0; i < len; i++) 170 input[i] = clip23(input[i]); 171} 172 173static void imdct_half_32(int32_t *output, const int32_t *input) 174{ 175 int buf_a[32], buf_b[32]; 176 int i, k, mag, shift, round; 177 178 mag = 0; 179 for (i = 0; i < 32; i++) 180 mag += abs(input[i]); 181 182 shift = mag > 0x400000 ? 2 : 0; 183 round = shift > 0 ? 1 << (shift - 1) : 0; 184 185 for (i = 0; i < 32; i++) 186 buf_a[i] = (input[i] + round) >> shift; 187 188 sum_a(buf_a, buf_b + 0, 16); 189 sum_b(buf_a, buf_b + 16, 16); 190 clp_v(buf_b, 32); 191 192 sum_a(buf_b + 0, buf_a + 0, 8); 193 sum_b(buf_b + 0, buf_a + 8, 8); 194 sum_c(buf_b + 16, buf_a + 16, 8); 195 sum_d(buf_b + 16, buf_a + 24, 8); 196 clp_v(buf_a, 32); 197 198 dct_a(buf_a + 0, buf_b + 0); 199 dct_b(buf_a + 8, buf_b + 8); 200 dct_b(buf_a + 16, buf_b + 16); 201 dct_b(buf_a + 24, buf_b + 24); 202 clp_v(buf_b, 32); 203 204 mod_a(buf_b + 0, buf_a + 0); 205 mod_b(buf_b + 16, buf_a + 16); 206 clp_v(buf_a, 32); 207 208 mod_c(buf_a, buf_b); 209 210 for (i = 0; i < 32; i++) 211 buf_b[i] = clip23(buf_b[i] * (1 << shift)); 212 213 for (i = 0, k = 31; i < 16; i++, k--) { 214 output[ i] = clip23(buf_b[i] - buf_b[k]); 215 output[16 + i] = clip23(buf_b[i] + buf_b[k]); 216 } 217} 218 219static void mod64_a(const int *input, int *output) 220{ 221 static const int cos_mod[32] = { 222 4195568, 4205700, 4226086, 4256977, 223 4298755, 4351949, 4417251, 4495537, 224 4587901, 4695690, 4820557, 4964534, 225 5130115, 5320382, 5539164, 5791261, 226 -6082752, -6421430, -6817439, -7284203, 227 -7839855, -8509474, -9328732, -10350140, 228 -11654242, -13371208, -15725922, -19143224, 229 -24533560, -34264200, -57015280, -170908480 230 }; 231 232 int i, k; 233 234 for (i = 0; i < 16; i++) 235 output[i] = mul23(cos_mod[i], input[i] + input[16 + i]); 236 237 for (i = 16, k = 15; i < 32; i++, k--) 238 output[i] = mul23(cos_mod[i], input[k] - input[16 + k]); 239} 240 241static void mod64_b(int *input, int *output) 242{ 243 static const int cos_mod[16] = { 244 4199362, 4240198, 4323885, 4454708, 245 4639772, 4890013, 5221943, 5660703, 246 6245623, 7040975, 8158494, 9809974, 247 12450076, 17261920, 28585092, 85479984 248 }; 249 250 int i, k; 251 252 for (i = 0; i < 16; i++) 253 input[16 + i] = mul23(cos_mod[i], input[16 + i]); 254 255 for (i = 0; i < 16; i++) 256 output[i] = input[i] + input[16 + i]; 257 258 for (i = 16, k = 15; i < 32; i++, k--) 259 output[i] = input[k] - input[16 + k]; 260} 261 262static void mod64_c(const int *input, int *output) 263{ 264 static const int cos_mod[64] = { 265 741511, 741958, 742853, 744199, 266 746001, 748262, 750992, 754197, 267 757888, 762077, 766777, 772003, 268 777772, 784105, 791021, 798546, 269 806707, 815532, 825054, 835311, 270 846342, 858193, 870912, 884554, 271 899181, 914860, 931667, 949686, 272 969011, 989747, 1012012, 1035941, 273 -1061684, -1089412, -1119320, -1151629, 274 -1186595, -1224511, -1265719, -1310613, 275 -1359657, -1413400, -1472490, -1537703, 276 -1609974, -1690442, -1780506, -1881904, 277 -1996824, -2128058, -2279225, -2455101, 278 -2662128, -2909200, -3208956, -3579983, 279 -4050785, -4667404, -5509372, -6726913, 280 -8641940, -12091426, -20144284, -60420720 281 }; 282 283 int i, k; 284 285 for (i = 0; i < 32; i++) 286 output[i] = mul23(cos_mod[i], input[i] + input[32 + i]); 287 288 for (i = 32, k = 31; i < 64; i++, k--) 289 output[i] = mul23(cos_mod[i], input[k] - input[32 + k]); 290} 291 292static void imdct_half_64(int32_t *output, const int32_t *input) 293{ 294 int buf_a[64], buf_b[64]; 295 int i, k, mag, shift, round; 296 297 mag = 0; 298 for (i = 0; i < 64; i++) 299 mag += abs(input[i]); 300 301 shift = mag > 0x400000 ? 2 : 0; 302 round = shift > 0 ? 1 << (shift - 1) : 0; 303 304 for (i = 0; i < 64; i++) 305 buf_a[i] = (input[i] + round) >> shift; 306 307 sum_a(buf_a, buf_b + 0, 32); 308 sum_b(buf_a, buf_b + 32, 32); 309 clp_v(buf_b, 64); 310 311 sum_a(buf_b + 0, buf_a + 0, 16); 312 sum_b(buf_b + 0, buf_a + 16, 16); 313 sum_c(buf_b + 32, buf_a + 32, 16); 314 sum_d(buf_b + 32, buf_a + 48, 16); 315 clp_v(buf_a, 64); 316 317 sum_a(buf_a + 0, buf_b + 0, 8); 318 sum_b(buf_a + 0, buf_b + 8, 8); 319 sum_c(buf_a + 16, buf_b + 16, 8); 320 sum_d(buf_a + 16, buf_b + 24, 8); 321 sum_c(buf_a + 32, buf_b + 32, 8); 322 sum_d(buf_a + 32, buf_b + 40, 8); 323 sum_c(buf_a + 48, buf_b + 48, 8); 324 sum_d(buf_a + 48, buf_b + 56, 8); 325 clp_v(buf_b, 64); 326 327 dct_a(buf_b + 0, buf_a + 0); 328 dct_b(buf_b + 8, buf_a + 8); 329 dct_b(buf_b + 16, buf_a + 16); 330 dct_b(buf_b + 24, buf_a + 24); 331 dct_b(buf_b + 32, buf_a + 32); 332 dct_b(buf_b + 40, buf_a + 40); 333 dct_b(buf_b + 48, buf_a + 48); 334 dct_b(buf_b + 56, buf_a + 56); 335 clp_v(buf_a, 64); 336 337 mod_a(buf_a + 0, buf_b + 0); 338 mod_b(buf_a + 16, buf_b + 16); 339 mod_b(buf_a + 32, buf_b + 32); 340 mod_b(buf_a + 48, buf_b + 48); 341 clp_v(buf_b, 64); 342 343 mod64_a(buf_b + 0, buf_a + 0); 344 mod64_b(buf_b + 32, buf_a + 32); 345 clp_v(buf_a, 64); 346 347 mod64_c(buf_a, buf_b); 348 349 for (i = 0; i < 64; i++) 350 buf_b[i] = clip23(buf_b[i] * (1 << shift)); 351 352 for (i = 0, k = 63; i < 32; i++, k--) { 353 output[ i] = clip23(buf_b[i] - buf_b[k]); 354 output[32 + i] = clip23(buf_b[i] + buf_b[k]); 355 } 356} 357 358av_cold void ff_dcadct_init(DCADCTContext *c) 359{ 360 c->imdct_half[0] = imdct_half_32; 361 c->imdct_half[1] = imdct_half_64; 362} 363