1/* 2 * x86 optimized discrete wavelet transform 3 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 4 * Copyright (c) 2010 David Conrad 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23#include "libavutil/x86/asm.h" 24#include "libavutil/x86/cpu.h" 25#include "libavcodec/dirac_dwt.h" 26 27#define COMPOSE_VERTICAL(ext, align) \ 28void ff_vertical_compose53iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \ 29void ff_vertical_compose_dirac53iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \ 30void ff_vertical_compose_dd137iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \ 31void ff_vertical_compose_dd97iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \ 32void ff_vertical_compose_haar##ext(int16_t *b0, int16_t *b1, int width); \ 33void ff_horizontal_compose_haar0i##ext(int16_t *b, int16_t *tmp, int w);\ 34void ff_horizontal_compose_haar1i##ext(int16_t *b, int16_t *tmp, int w);\ 35\ 36static void vertical_compose53iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \ 37{ \ 38 int i, width_align = width&~(align-1); \ 39 int16_t *b0 = (int16_t *)_b0; \ 40 int16_t *b1 = (int16_t *)_b1; \ 41 int16_t *b2 = (int16_t *)_b2; \ 42\ 43 for(i=width_align; i<width; i++) \ 44 b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \ 45\ 46 ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \ 47} \ 48\ 49static void vertical_compose_dirac53iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \ 50{ \ 51 int i, width_align = width&~(align-1); \ 52 int16_t *b0 = (int16_t *)_b0; \ 53 int16_t *b1 = (int16_t *)_b1; \ 54 int16_t *b2 = (int16_t *)_b2; \ 55\ 56 for(i=width_align; i<width; i++) \ 57 b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \ 58\ 59 ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \ 60} \ 61\ 62static void vertical_compose_dd137iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \ 63 uint8_t *_b3, uint8_t *_b4, int width) \ 64{ \ 65 int i, width_align = width&~(align-1); \ 66 int16_t *b0 = (int16_t *)_b0; \ 67 int16_t *b1 = (int16_t *)_b1; \ 68 int16_t *b2 = (int16_t *)_b2; \ 69 int16_t *b3 = (int16_t *)_b3; \ 70 int16_t *b4 = (int16_t *)_b4; \ 71\ 72 for(i=width_align; i<width; i++) \ 73 b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \ 74\ 75 ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \ 76} \ 77\ 78static void vertical_compose_dd97iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \ 79 uint8_t *_b3, uint8_t *_b4, int width) \ 80{ \ 81 int i, width_align = width&~(align-1); \ 82 int16_t *b0 = (int16_t *)_b0; \ 83 int16_t *b1 = (int16_t *)_b1; \ 84 int16_t *b2 = (int16_t *)_b2; \ 85 int16_t *b3 = (int16_t *)_b3; \ 86 int16_t *b4 = (int16_t *)_b4; \ 87\ 88 for(i=width_align; i<width; i++) \ 89 b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \ 90\ 91 ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \ 92} \ 93static void vertical_compose_haar##ext(uint8_t *_b0, uint8_t *_b1, int width) \ 94{ \ 95 int i, width_align = width&~(align-1); \ 96 int16_t *b0 = (int16_t *)_b0; \ 97 int16_t *b1 = (int16_t *)_b1; \ 98\ 99 for(i=width_align; i<width; i++) { \ 100 b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \ 101 b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \ 102 } \ 103\ 104 ff_vertical_compose_haar##ext(b0, b1, width_align); \ 105} \ 106static void horizontal_compose_haar0i##ext(uint8_t *_b, uint8_t *_tmp, int w)\ 107{\ 108 int w2= w>>1;\ 109 int x= w2 - (w2&(align-1));\ 110 int16_t *b = (int16_t *)_b; \ 111 int16_t *tmp = (int16_t *)_tmp; \ 112\ 113 ff_horizontal_compose_haar0i##ext(b, tmp, w);\ 114\ 115 for (; x < w2; x++) {\ 116 b[2*x ] = tmp[x];\ 117 b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\ 118 }\ 119}\ 120static void horizontal_compose_haar1i##ext(uint8_t *_b, uint8_t *_tmp, int w)\ 121{\ 122 int w2= w>>1;\ 123 int x= w2 - (w2&(align-1));\ 124 int16_t *b = (int16_t *)_b; \ 125 int16_t *tmp = (int16_t *)_tmp; \ 126\ 127 ff_horizontal_compose_haar1i##ext(b, tmp, w);\ 128\ 129 for (; x < w2; x++) {\ 130 b[2*x ] = (tmp[x] + 1)>>1;\ 131 b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\ 132 }\ 133}\ 134\ 135 136#if HAVE_X86ASM 137COMPOSE_VERTICAL(_sse2, 8) 138 139 140void ff_horizontal_compose_dd97i_ssse3(int16_t *_b, int16_t *_tmp, int w); 141 142static void horizontal_compose_dd97i_ssse3(uint8_t *_b, uint8_t *_tmp, int w) 143{ 144 int w2= w>>1; 145 int x= w2 - (w2&7); 146 int16_t *b = (int16_t *)_b; 147 int16_t *tmp = (int16_t *)_tmp; 148 149 ff_horizontal_compose_dd97i_ssse3(b, tmp, w); 150 151 for (; x < w2; x++) { 152 b[2*x ] = (tmp[x] + 1)>>1; 153 b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1; 154 } 155} 156#endif 157 158void ff_spatial_idwt_init_x86(DWTContext *d, enum dwt_type type) 159{ 160#if HAVE_X86ASM 161 int mm_flags = av_get_cpu_flags(); 162 163 if (!(mm_flags & AV_CPU_FLAG_SSE2)) 164 return; 165 166 switch (type) { 167 case DWT_DIRAC_DD9_7: 168 d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2; 169 d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2; 170 break; 171 case DWT_DIRAC_LEGALL5_3: 172 d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2; 173 d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_sse2; 174 break; 175 case DWT_DIRAC_DD13_7: 176 d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_sse2; 177 d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2; 178 break; 179 case DWT_DIRAC_HAAR0: 180 d->vertical_compose = (void*)vertical_compose_haar_sse2; 181 d->horizontal_compose = horizontal_compose_haar0i_sse2; 182 break; 183 case DWT_DIRAC_HAAR1: 184 d->vertical_compose = (void*)vertical_compose_haar_sse2; 185 d->horizontal_compose = horizontal_compose_haar1i_sse2; 186 break; 187 } 188 189 if (!(mm_flags & AV_CPU_FLAG_SSSE3)) 190 return; 191 192 switch (type) { 193 case DWT_DIRAC_DD9_7: 194 d->horizontal_compose = horizontal_compose_dd97i_ssse3; 195 break; 196 } 197#endif // HAVE_X86ASM 198} 199