1/* 2 * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at> 3 * Copyright (C) 2008 David Conrad 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#include "libavutil/attributes.h" 23#include "libavutil/avassert.h" 24#include "libavutil/common.h" 25#include "me_cmp.h" 26#include "snow_dwt.h" 27 28int ff_slice_buffer_init(slice_buffer *buf, int line_count, 29 int max_allocated_lines, int line_width, 30 IDWTELEM *base_buffer) 31{ 32 int i; 33 34 buf->base_buffer = base_buffer; 35 buf->line_count = line_count; 36 buf->line_width = line_width; 37 buf->data_count = max_allocated_lines; 38 buf->line = av_calloc(line_count, sizeof(*buf->line)); 39 if (!buf->line) 40 return AVERROR(ENOMEM); 41 buf->data_stack = av_malloc_array(max_allocated_lines, sizeof(IDWTELEM *)); 42 if (!buf->data_stack) { 43 av_freep(&buf->line); 44 return AVERROR(ENOMEM); 45 } 46 47 for (i = 0; i < max_allocated_lines; i++) { 48 buf->data_stack[i] = av_malloc_array(line_width, sizeof(IDWTELEM)); 49 if (!buf->data_stack[i]) { 50 for (i--; i >=0; i--) 51 av_freep(&buf->data_stack[i]); 52 av_freep(&buf->data_stack); 53 av_freep(&buf->line); 54 return AVERROR(ENOMEM); 55 } 56 } 57 58 buf->data_stack_top = max_allocated_lines - 1; 59 return 0; 60} 61 62IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, int line) 63{ 64 IDWTELEM *buffer; 65 66 av_assert0(buf->data_stack_top >= 0); 67// av_assert1(!buf->line[line]); 68 if (buf->line[line]) 69 return buf->line[line]; 70 71 buffer = buf->data_stack[buf->data_stack_top]; 72 buf->data_stack_top--; 73 buf->line[line] = buffer; 74 75 return buffer; 76} 77 78void ff_slice_buffer_release(slice_buffer *buf, int line) 79{ 80 IDWTELEM *buffer; 81 82 av_assert1(line >= 0 && line < buf->line_count); 83 av_assert1(buf->line[line]); 84 85 buffer = buf->line[line]; 86 buf->data_stack_top++; 87 buf->data_stack[buf->data_stack_top] = buffer; 88 buf->line[line] = NULL; 89} 90 91void ff_slice_buffer_flush(slice_buffer *buf) 92{ 93 int i; 94 95 if (!buf->line) 96 return; 97 98 for (i = 0; i < buf->line_count; i++) 99 if (buf->line[i]) 100 ff_slice_buffer_release(buf, i); 101} 102 103void ff_slice_buffer_destroy(slice_buffer *buf) 104{ 105 int i; 106 ff_slice_buffer_flush(buf); 107 108 if (buf->data_stack) 109 for (i = buf->data_count - 1; i >= 0; i--) 110 av_freep(&buf->data_stack[i]); 111 av_freep(&buf->data_stack); 112 av_freep(&buf->line); 113} 114 115static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, 116 int dst_step, int src_step, int ref_step, 117 int width, int mul, int add, int shift, 118 int highpass, int inverse) 119{ 120 const int mirror_left = !highpass; 121 const int mirror_right = (width & 1) ^ highpass; 122 const int w = (width >> 1) - 1 + (highpass & width); 123 int i; 124 125#define LIFT(src, ref, inv) ((src) + ((inv) ? -(ref) : +(ref))) 126 if (mirror_left) { 127 dst[0] = LIFT(src[0], ((mul * 2 * ref[0] + add) >> shift), inverse); 128 dst += dst_step; 129 src += src_step; 130 } 131 132 for (i = 0; i < w; i++) 133 dst[i * dst_step] = LIFT(src[i * src_step], 134 ((mul * (ref[i * ref_step] + 135 ref[(i + 1) * ref_step]) + 136 add) >> shift), 137 inverse); 138 139 if (mirror_right) 140 dst[w * dst_step] = LIFT(src[w * src_step], 141 ((mul * 2 * ref[w * ref_step] + add) >> shift), 142 inverse); 143} 144 145static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, 146 int dst_step, int src_step, int ref_step, 147 int width, int mul, int add, int shift, 148 int highpass, int inverse) 149{ 150 const int mirror_left = !highpass; 151 const int mirror_right = (width & 1) ^ highpass; 152 const int w = (width >> 1) - 1 + (highpass & width); 153 int i; 154 155 av_assert1(shift == 4); 156#define LIFTS(src, ref, inv) \ 157 ((inv) ? (src) + (((ref) + 4 * (src)) >> shift) \ 158 : -((-16 * (src) + (ref) + add / \ 159 4 + 1 + (5 << 25)) / (5 * 4) - (1 << 23))) 160 if (mirror_left) { 161 dst[0] = LIFTS(src[0], mul * 2 * ref[0] + add, inverse); 162 dst += dst_step; 163 src += src_step; 164 } 165 166 for (i = 0; i < w; i++) 167 dst[i * dst_step] = LIFTS(src[i * src_step], 168 mul * (ref[i * ref_step] + 169 ref[(i + 1) * ref_step]) + add, 170 inverse); 171 172 if (mirror_right) 173 dst[w * dst_step] = LIFTS(src[w * src_step], 174 mul * 2 * ref[w * ref_step] + add, 175 inverse); 176} 177 178static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width) 179{ 180 const int width2 = width >> 1; 181 int x; 182 const int w2 = (width + 1) >> 1; 183 184 for (x = 0; x < width2; x++) { 185 temp[x] = b[2 * x]; 186 temp[x + w2] = b[2 * x + 1]; 187 } 188 if (width & 1) 189 temp[x] = b[2 * x]; 190 lift(b + w2, temp + w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); 191 lift(b, temp, b + w2, 1, 1, 1, width, 1, 2, 2, 0, 0); 192} 193 194static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, 195 int width) 196{ 197 int i; 198 199 for (i = 0; i < width; i++) 200 b1[i] -= (b0[i] + b2[i]) >> 1; 201} 202 203static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, 204 int width) 205{ 206 int i; 207 208 for (i = 0; i < width; i++) 209 b1[i] += (b0[i] + b2[i] + 2) >> 2; 210} 211 212static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp, 213 int width, int height, int stride) 214{ 215 int y; 216 DWTELEM *b0 = buffer + avpriv_mirror(-2 - 1, height - 1) * stride; 217 DWTELEM *b1 = buffer + avpriv_mirror(-2, height - 1) * stride; 218 219 for (y = -2; y < height; y += 2) { 220 DWTELEM *b2 = buffer + avpriv_mirror(y + 1, height - 1) * stride; 221 DWTELEM *b3 = buffer + avpriv_mirror(y + 2, height - 1) * stride; 222 223 if (y + 1 < (unsigned)height) 224 horizontal_decompose53i(b2, temp, width); 225 if (y + 2 < (unsigned)height) 226 horizontal_decompose53i(b3, temp, width); 227 228 if (y + 1 < (unsigned)height) 229 vertical_decompose53iH0(b1, b2, b3, width); 230 if (y + 0 < (unsigned)height) 231 vertical_decompose53iL0(b0, b1, b2, width); 232 233 b0 = b2; 234 b1 = b3; 235 } 236} 237 238static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width) 239{ 240 const int w2 = (width + 1) >> 1; 241 242 lift(temp + w2, b + 1, b, 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); 243 liftS(temp, b, temp + w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); 244 lift(b + w2, temp + w2, temp, 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); 245 lift(b, temp, b + w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); 246} 247 248static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, 249 int width) 250{ 251 int i; 252 253 for (i = 0; i < width; i++) 254 b1[i] -= (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS; 255} 256 257static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, 258 int width) 259{ 260 int i; 261 262 for (i = 0; i < width; i++) 263 b1[i] += (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS; 264} 265 266static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, 267 int width) 268{ 269 int i; 270 271 for (i = 0; i < width; i++) 272 b1[i] = (16 * 4 * b1[i] - 4 * (b0[i] + b2[i]) + W_BO * 5 + (5 << 27)) / 273 (5 * 16) - (1 << 23); 274} 275 276static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, 277 int width) 278{ 279 int i; 280 281 for (i = 0; i < width; i++) 282 b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS; 283} 284 285static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp, 286 int width, int height, int stride) 287{ 288 int y; 289 DWTELEM *b0 = buffer + avpriv_mirror(-4 - 1, height - 1) * stride; 290 DWTELEM *b1 = buffer + avpriv_mirror(-4, height - 1) * stride; 291 DWTELEM *b2 = buffer + avpriv_mirror(-4 + 1, height - 1) * stride; 292 DWTELEM *b3 = buffer + avpriv_mirror(-4 + 2, height - 1) * stride; 293 294 for (y = -4; y < height; y += 2) { 295 DWTELEM *b4 = buffer + avpriv_mirror(y + 3, height - 1) * stride; 296 DWTELEM *b5 = buffer + avpriv_mirror(y + 4, height - 1) * stride; 297 298 if (y + 3 < (unsigned)height) 299 horizontal_decompose97i(b4, temp, width); 300 if (y + 4 < (unsigned)height) 301 horizontal_decompose97i(b5, temp, width); 302 303 if (y + 3 < (unsigned)height) 304 vertical_decompose97iH0(b3, b4, b5, width); 305 if (y + 2 < (unsigned)height) 306 vertical_decompose97iL0(b2, b3, b4, width); 307 if (y + 1 < (unsigned)height) 308 vertical_decompose97iH1(b1, b2, b3, width); 309 if (y + 0 < (unsigned)height) 310 vertical_decompose97iL1(b0, b1, b2, width); 311 312 b0 = b2; 313 b1 = b3; 314 b2 = b4; 315 b3 = b5; 316 } 317} 318 319void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height, 320 int stride, int type, int decomposition_count) 321{ 322 int level; 323 324 for (level = 0; level < decomposition_count; level++) { 325 switch (type) { 326 case DWT_97: 327 spatial_decompose97i(buffer, temp, 328 width >> level, height >> level, 329 stride << level); 330 break; 331 case DWT_53: 332 spatial_decompose53i(buffer, temp, 333 width >> level, height >> level, 334 stride << level); 335 break; 336 } 337 } 338} 339 340static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width) 341{ 342 const int width2 = width >> 1; 343 const int w2 = (width + 1) >> 1; 344 int x; 345 346 for (x = 0; x < width2; x++) { 347 temp[2 * x] = b[x]; 348 temp[2 * x + 1] = b[x + w2]; 349 } 350 if (width & 1) 351 temp[2 * x] = b[x]; 352 353 b[0] = temp[0] - ((temp[1] + 1) >> 1); 354 for (x = 2; x < width - 1; x += 2) { 355 b[x] = temp[x] - ((temp[x - 1] + temp[x + 1] + 2) >> 2); 356 b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1); 357 } 358 if (width & 1) { 359 b[x] = temp[x] - ((temp[x - 1] + 1) >> 1); 360 b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1); 361 } else 362 b[x - 1] = temp[x - 1] + b[x - 2]; 363} 364 365static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, 366 int width) 367{ 368 int i; 369 370 for (i = 0; i < width; i++) 371 b1[i] += (b0[i] + b2[i]) >> 1; 372} 373 374static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, 375 int width) 376{ 377 int i; 378 379 for (i = 0; i < width; i++) 380 b1[i] -= (b0[i] + b2[i] + 2) >> 2; 381} 382 383static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer *sb, 384 int height, int stride_line) 385{ 386 cs->b0 = slice_buffer_get_line(sb, 387 avpriv_mirror(-1 - 1, height - 1) * stride_line); 388 cs->b1 = slice_buffer_get_line(sb, avpriv_mirror(-1, height - 1) * stride_line); 389 cs->y = -1; 390} 391 392static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, 393 int height, int stride) 394{ 395 cs->b0 = buffer + avpriv_mirror(-1 - 1, height - 1) * stride; 396 cs->b1 = buffer + avpriv_mirror(-1, height - 1) * stride; 397 cs->y = -1; 398} 399 400static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb, 401 IDWTELEM *temp, 402 int width, int height, 403 int stride_line) 404{ 405 int y = cs->y; 406 407 IDWTELEM *b0 = cs->b0; 408 IDWTELEM *b1 = cs->b1; 409 IDWTELEM *b2 = slice_buffer_get_line(sb, 410 avpriv_mirror(y + 1, height - 1) * 411 stride_line); 412 IDWTELEM *b3 = slice_buffer_get_line(sb, 413 avpriv_mirror(y + 2, height - 1) * 414 stride_line); 415 416 if (y + 1 < (unsigned)height && y < (unsigned)height) { 417 int x; 418 419 for (x = 0; x < width; x++) { 420 b2[x] -= (b1[x] + b3[x] + 2) >> 2; 421 b1[x] += (b0[x] + b2[x]) >> 1; 422 } 423 } else { 424 if (y + 1 < (unsigned)height) 425 vertical_compose53iL0(b1, b2, b3, width); 426 if (y + 0 < (unsigned)height) 427 vertical_compose53iH0(b0, b1, b2, width); 428 } 429 430 if (y - 1 < (unsigned)height) 431 horizontal_compose53i(b0, temp, width); 432 if (y + 0 < (unsigned)height) 433 horizontal_compose53i(b1, temp, width); 434 435 cs->b0 = b2; 436 cs->b1 = b3; 437 cs->y += 2; 438} 439 440static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, 441 IDWTELEM *temp, int width, int height, 442 int stride) 443{ 444 int y = cs->y; 445 IDWTELEM *b0 = cs->b0; 446 IDWTELEM *b1 = cs->b1; 447 IDWTELEM *b2 = buffer + avpriv_mirror(y + 1, height - 1) * stride; 448 IDWTELEM *b3 = buffer + avpriv_mirror(y + 2, height - 1) * stride; 449 450 if (y + 1 < (unsigned)height) 451 vertical_compose53iL0(b1, b2, b3, width); 452 if (y + 0 < (unsigned)height) 453 vertical_compose53iH0(b0, b1, b2, width); 454 455 if (y - 1 < (unsigned)height) 456 horizontal_compose53i(b0, temp, width); 457 if (y + 0 < (unsigned)height) 458 horizontal_compose53i(b1, temp, width); 459 460 cs->b0 = b2; 461 cs->b1 = b3; 462 cs->y += 2; 463} 464 465static void snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width) 466{ 467 const int w2 = (width + 1) >> 1; 468 int x; 469 470 temp[0] = b[0] - ((3 * b[w2] + 2) >> 2); 471 for (x = 1; x < (width >> 1); x++) { 472 temp[2 * x] = b[x] - ((3 * (b[x + w2 - 1] + b[x + w2]) + 4) >> 3); 473 temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x]; 474 } 475 if (width & 1) { 476 temp[2 * x] = b[x] - ((3 * b[x + w2 - 1] + 2) >> 2); 477 temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x]; 478 } else 479 temp[2 * x - 1] = b[x + w2 - 1] - 2 * temp[2 * x - 2]; 480 481 b[0] = temp[0] + ((2 * temp[0] + temp[1] + 4) >> 3); 482 for (x = 2; x < width - 1; x += 2) { 483 b[x] = temp[x] + ((4 * temp[x] + temp[x - 1] + temp[x + 1] + 8) >> 4); 484 b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1); 485 } 486 if (width & 1) { 487 b[x] = temp[x] + ((2 * temp[x] + temp[x - 1] + 4) >> 3); 488 b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1); 489 } else 490 b[x - 1] = temp[x - 1] + 3 * b[x - 2]; 491} 492 493static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, 494 int width) 495{ 496 int i; 497 498 for (i = 0; i < width; i++) 499 b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS; 500} 501 502static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, 503 int width) 504{ 505 int i; 506 507 for (i = 0; i < width; i++) 508 b1[i] -= (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS; 509} 510 511static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, 512 int width) 513{ 514 int i; 515 516 for (i = 0; i < width; i++) 517 b1[i] += (W_BM * (b0[i] + b2[i]) + 4 * b1[i] + W_BO) >> W_BS; 518} 519 520static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, 521 int width) 522{ 523 int i; 524 525 for (i = 0; i < width; i++) 526 b1[i] -= (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS; 527} 528 529static void snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, 530 IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, 531 int width) 532{ 533 int i; 534 535 for (i = 0; i < width; i++) { 536 b4[i] -= (W_DM * (b3[i] + b5[i]) + W_DO) >> W_DS; 537 b3[i] -= (W_CM * (b2[i] + b4[i]) + W_CO) >> W_CS; 538 b2[i] += (W_BM * (b1[i] + b3[i]) + 4 * b2[i] + W_BO) >> W_BS; 539 b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS; 540 } 541} 542 543static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer *sb, 544 int height, int stride_line) 545{ 546 cs->b0 = slice_buffer_get_line(sb, avpriv_mirror(-3 - 1, height - 1) * stride_line); 547 cs->b1 = slice_buffer_get_line(sb, avpriv_mirror(-3, height - 1) * stride_line); 548 cs->b2 = slice_buffer_get_line(sb, avpriv_mirror(-3 + 1, height - 1) * stride_line); 549 cs->b3 = slice_buffer_get_line(sb, avpriv_mirror(-3 + 2, height - 1) * stride_line); 550 cs->y = -3; 551} 552 553static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, 554 int stride) 555{ 556 cs->b0 = buffer + avpriv_mirror(-3 - 1, height - 1) * stride; 557 cs->b1 = buffer + avpriv_mirror(-3, height - 1) * stride; 558 cs->b2 = buffer + avpriv_mirror(-3 + 1, height - 1) * stride; 559 cs->b3 = buffer + avpriv_mirror(-3 + 2, height - 1) * stride; 560 cs->y = -3; 561} 562 563static void spatial_compose97i_dy_buffered(SnowDWTContext *dsp, DWTCompose *cs, 564 slice_buffer * sb, IDWTELEM *temp, 565 int width, int height, 566 int stride_line) 567{ 568 int y = cs->y; 569 570 IDWTELEM *b0 = cs->b0; 571 IDWTELEM *b1 = cs->b1; 572 IDWTELEM *b2 = cs->b2; 573 IDWTELEM *b3 = cs->b3; 574 IDWTELEM *b4 = slice_buffer_get_line(sb, 575 avpriv_mirror(y + 3, height - 1) * 576 stride_line); 577 IDWTELEM *b5 = slice_buffer_get_line(sb, 578 avpriv_mirror(y + 4, height - 1) * 579 stride_line); 580 581 if (y > 0 && y + 4 < height) { 582 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); 583 } else { 584 if (y + 3 < (unsigned)height) 585 vertical_compose97iL1(b3, b4, b5, width); 586 if (y + 2 < (unsigned)height) 587 vertical_compose97iH1(b2, b3, b4, width); 588 if (y + 1 < (unsigned)height) 589 vertical_compose97iL0(b1, b2, b3, width); 590 if (y + 0 < (unsigned)height) 591 vertical_compose97iH0(b0, b1, b2, width); 592 } 593 594 if (y - 1 < (unsigned)height) 595 dsp->horizontal_compose97i(b0, temp, width); 596 if (y + 0 < (unsigned)height) 597 dsp->horizontal_compose97i(b1, temp, width); 598 599 cs->b0 = b2; 600 cs->b1 = b3; 601 cs->b2 = b4; 602 cs->b3 = b5; 603 cs->y += 2; 604} 605 606static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, 607 IDWTELEM *temp, int width, int height, 608 int stride) 609{ 610 int y = cs->y; 611 IDWTELEM *b0 = cs->b0; 612 IDWTELEM *b1 = cs->b1; 613 IDWTELEM *b2 = cs->b2; 614 IDWTELEM *b3 = cs->b3; 615 IDWTELEM *b4 = buffer + avpriv_mirror(y + 3, height - 1) * stride; 616 IDWTELEM *b5 = buffer + avpriv_mirror(y + 4, height - 1) * stride; 617 618 if (y + 3 < (unsigned)height) 619 vertical_compose97iL1(b3, b4, b5, width); 620 if (y + 2 < (unsigned)height) 621 vertical_compose97iH1(b2, b3, b4, width); 622 if (y + 1 < (unsigned)height) 623 vertical_compose97iL0(b1, b2, b3, width); 624 if (y + 0 < (unsigned)height) 625 vertical_compose97iH0(b0, b1, b2, width); 626 627 if (y - 1 < (unsigned)height) 628 snow_horizontal_compose97i(b0, temp, width); 629 if (y + 0 < (unsigned)height) 630 snow_horizontal_compose97i(b1, temp, width); 631 632 cs->b0 = b2; 633 cs->b1 = b3; 634 cs->b2 = b4; 635 cs->b3 = b5; 636 cs->y += 2; 637} 638 639void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width, 640 int height, int stride_line, int type, 641 int decomposition_count) 642{ 643 int level; 644 for (level = decomposition_count - 1; level >= 0; level--) { 645 switch (type) { 646 case DWT_97: 647 spatial_compose97i_buffered_init(cs + level, sb, height >> level, 648 stride_line << level); 649 break; 650 case DWT_53: 651 spatial_compose53i_buffered_init(cs + level, sb, height >> level, 652 stride_line << level); 653 break; 654 } 655 } 656} 657 658void ff_spatial_idwt_buffered_slice(SnowDWTContext *dsp, DWTCompose *cs, 659 slice_buffer *slice_buf, IDWTELEM *temp, 660 int width, int height, int stride_line, 661 int type, int decomposition_count, int y) 662{ 663 const int support = type == 1 ? 3 : 5; 664 int level; 665 if (type == 2) 666 return; 667 668 for (level = decomposition_count - 1; level >= 0; level--) 669 while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) { 670 switch (type) { 671 case DWT_97: 672 spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, temp, 673 width >> level, 674 height >> level, 675 stride_line << level); 676 break; 677 case DWT_53: 678 spatial_compose53i_dy_buffered(cs + level, slice_buf, temp, 679 width >> level, 680 height >> level, 681 stride_line << level); 682 break; 683 } 684 } 685} 686 687static void spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, 688 int height, int stride, int type, 689 int decomposition_count) 690{ 691 int level; 692 for (level = decomposition_count - 1; level >= 0; level--) { 693 switch (type) { 694 case DWT_97: 695 spatial_compose97i_init(cs + level, buffer, height >> level, 696 stride << level); 697 break; 698 case DWT_53: 699 spatial_compose53i_init(cs + level, buffer, height >> level, 700 stride << level); 701 break; 702 } 703 } 704} 705 706static void spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, 707 IDWTELEM *temp, int width, int height, 708 int stride, int type, 709 int decomposition_count, int y) 710{ 711 const int support = type == 1 ? 3 : 5; 712 int level; 713 if (type == 2) 714 return; 715 716 for (level = decomposition_count - 1; level >= 0; level--) 717 while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) { 718 switch (type) { 719 case DWT_97: 720 spatial_compose97i_dy(cs + level, buffer, temp, width >> level, 721 height >> level, stride << level); 722 break; 723 case DWT_53: 724 spatial_compose53i_dy(cs + level, buffer, temp, width >> level, 725 height >> level, stride << level); 726 break; 727 } 728 } 729} 730 731void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height, 732 int stride, int type, int decomposition_count) 733{ 734 DWTCompose cs[MAX_DECOMPOSITIONS]; 735 int y; 736 spatial_idwt_init(cs, buffer, width, height, stride, type, 737 decomposition_count); 738 for (y = 0; y < height; y += 4) 739 spatial_idwt_slice(cs, buffer, temp, width, height, stride, type, 740 decomposition_count, y); 741} 742 743static inline int w_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, 744 int w, int h, int type) 745{ 746 int s, i, j; 747 const int dec_count = w == 8 ? 3 : 4; 748 int tmp[32 * 32], tmp2[32]; 749 int level, ori; 750 static const int scale[2][2][4][4] = { 751 { 752 { // 9/7 8x8 dec=3 753 { 268, 239, 239, 213 }, 754 { 0, 224, 224, 152 }, 755 { 0, 135, 135, 110 }, 756 }, 757 { // 9/7 16x16 or 32x32 dec=4 758 { 344, 310, 310, 280 }, 759 { 0, 320, 320, 228 }, 760 { 0, 175, 175, 136 }, 761 { 0, 129, 129, 102 }, 762 } 763 }, 764 { 765 { // 5/3 8x8 dec=3 766 { 275, 245, 245, 218 }, 767 { 0, 230, 230, 156 }, 768 { 0, 138, 138, 113 }, 769 }, 770 { // 5/3 16x16 or 32x32 dec=4 771 { 352, 317, 317, 286 }, 772 { 0, 328, 328, 233 }, 773 { 0, 180, 180, 140 }, 774 { 0, 132, 132, 105 }, 775 } 776 } 777 }; 778 779 for (i = 0; i < h; i++) { 780 for (j = 0; j < w; j += 4) { 781 tmp[32 * i + j + 0] = (pix1[j + 0] - pix2[j + 0]) << 4; 782 tmp[32 * i + j + 1] = (pix1[j + 1] - pix2[j + 1]) << 4; 783 tmp[32 * i + j + 2] = (pix1[j + 2] - pix2[j + 2]) << 4; 784 tmp[32 * i + j + 3] = (pix1[j + 3] - pix2[j + 3]) << 4; 785 } 786 pix1 += line_size; 787 pix2 += line_size; 788 } 789 790 ff_spatial_dwt(tmp, tmp2, w, h, 32, type, dec_count); 791 792 s = 0; 793 av_assert1(w == h); 794 for (level = 0; level < dec_count; level++) 795 for (ori = level ? 1 : 0; ori < 4; ori++) { 796 int size = w >> (dec_count - level); 797 int sx = (ori & 1) ? size : 0; 798 int stride = 32 << (dec_count - level); 799 int sy = (ori & 2) ? stride >> 1 : 0; 800 801 for (i = 0; i < size; i++) 802 for (j = 0; j < size; j++) { 803 int v = tmp[sx + sy + i * stride + j] * 804 scale[type][dec_count - 3][level][ori]; 805 s += FFABS(v); 806 } 807 } 808 av_assert1(s >= 0); 809 return s >> 9; 810} 811 812static int w53_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h) 813{ 814 return w_c(v, pix1, pix2, line_size, 8, h, 1); 815} 816 817static int w97_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h) 818{ 819 return w_c(v, pix1, pix2, line_size, 8, h, 0); 820} 821 822static int w53_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h) 823{ 824 return w_c(v, pix1, pix2, line_size, 16, h, 1); 825} 826 827static int w97_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h) 828{ 829 return w_c(v, pix1, pix2, line_size, 16, h, 0); 830} 831 832int ff_w53_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h) 833{ 834 return w_c(v, pix1, pix2, line_size, 32, h, 1); 835} 836 837int ff_w97_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h) 838{ 839 return w_c(v, pix1, pix2, line_size, 32, h, 0); 840} 841 842av_cold void ff_dsputil_init_dwt(MECmpContext *c) 843{ 844 c->w53[0] = w53_16_c; 845 c->w53[1] = w53_8_c; 846 c->w97[0] = w97_16_c; 847 c->w97[1] = w97_8_c; 848} 849 850av_cold void ff_dwt_init(SnowDWTContext *c) 851{ 852 c->vertical_compose97i = snow_vertical_compose97i; 853 c->horizontal_compose97i = snow_horizontal_compose97i; 854 c->inner_add_yblock = ff_snow_inner_add_yblock; 855 856#if ARCH_X86 && HAVE_MMX 857 ff_dwt_init_x86(c); 858#endif 859} 860 861 862