1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * VC-1 and WMV3 - DSP functions MMX-optimized 3cabdff1aSopenharmony_ci * Copyright (c) 2007 Christophe GISQUET <christophe.gisquet@free.fr> 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * Permission is hereby granted, free of charge, to any person 6cabdff1aSopenharmony_ci * obtaining a copy of this software and associated documentation 7cabdff1aSopenharmony_ci * files (the "Software"), to deal in the Software without 8cabdff1aSopenharmony_ci * restriction, including without limitation the rights to use, 9cabdff1aSopenharmony_ci * copy, modify, merge, publish, distribute, sublicense, and/or sell 10cabdff1aSopenharmony_ci * copies of the Software, and to permit persons to whom the 11cabdff1aSopenharmony_ci * Software is furnished to do so, subject to the following 12cabdff1aSopenharmony_ci * conditions: 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * The above copyright notice and this permission notice shall be 15cabdff1aSopenharmony_ci * included in all copies or substantial portions of the Software. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18cabdff1aSopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19cabdff1aSopenharmony_ci * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20cabdff1aSopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21cabdff1aSopenharmony_ci * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22cabdff1aSopenharmony_ci * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23cabdff1aSopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24cabdff1aSopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 25cabdff1aSopenharmony_ci */ 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 28cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 29cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h" 30cabdff1aSopenharmony_ci#include "libavutil/x86/asm.h" 31cabdff1aSopenharmony_ci#include "libavcodec/vc1dsp.h" 32cabdff1aSopenharmony_ci#include "fpel.h" 33cabdff1aSopenharmony_ci#include "vc1dsp.h" 34cabdff1aSopenharmony_ci#include "config.h" 35cabdff1aSopenharmony_ci 36cabdff1aSopenharmony_ci#define LOOP_FILTER4(EXT) \ 37cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \ 38cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); 39cabdff1aSopenharmony_ci#define LOOP_FILTER816(EXT) \ 40cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter8_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \ 41cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter8_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \ 42cabdff1aSopenharmony_ci\ 43cabdff1aSopenharmony_cistatic void vc1_v_loop_filter16_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq) \ 44cabdff1aSopenharmony_ci{ \ 45cabdff1aSopenharmony_ci ff_vc1_v_loop_filter8_ ## EXT(src, stride, pq); \ 46cabdff1aSopenharmony_ci ff_vc1_v_loop_filter8_ ## EXT(src+8, stride, pq); \ 47cabdff1aSopenharmony_ci} \ 48cabdff1aSopenharmony_ci\ 49cabdff1aSopenharmony_cistatic void vc1_h_loop_filter16_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq) \ 50cabdff1aSopenharmony_ci{ \ 51cabdff1aSopenharmony_ci ff_vc1_h_loop_filter8_ ## EXT(src, stride, pq); \ 52cabdff1aSopenharmony_ci ff_vc1_h_loop_filter8_ ## EXT(src+8*stride, stride, pq); \ 53cabdff1aSopenharmony_ci} 54cabdff1aSopenharmony_ci 55cabdff1aSopenharmony_ci#if HAVE_X86ASM 56cabdff1aSopenharmony_ciLOOP_FILTER4(mmxext) 57cabdff1aSopenharmony_ciLOOP_FILTER816(sse2) 58cabdff1aSopenharmony_ciLOOP_FILTER4(ssse3) 59cabdff1aSopenharmony_ciLOOP_FILTER816(ssse3) 60cabdff1aSopenharmony_ci 61cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter8_sse4(uint8_t *src, ptrdiff_t stride, int pq); 62cabdff1aSopenharmony_ci 63cabdff1aSopenharmony_cistatic void vc1_h_loop_filter16_sse4(uint8_t *src, ptrdiff_t stride, int pq) 64cabdff1aSopenharmony_ci{ 65cabdff1aSopenharmony_ci ff_vc1_h_loop_filter8_sse4(src, stride, pq); 66cabdff1aSopenharmony_ci ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq); 67cabdff1aSopenharmony_ci} 68cabdff1aSopenharmony_ci 69cabdff1aSopenharmony_ci#define DECLARE_FUNCTION(OP, DEPTH, INSN) \ 70cabdff1aSopenharmony_ci static void OP##vc1_mspel_mc00_##DEPTH##INSN(uint8_t *dst, \ 71cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t stride, int rnd) \ 72cabdff1aSopenharmony_ci { \ 73cabdff1aSopenharmony_ci ff_ ## OP ## pixels ## DEPTH ## INSN(dst, src, stride, DEPTH); \ 74cabdff1aSopenharmony_ci } 75cabdff1aSopenharmony_ci 76cabdff1aSopenharmony_ciDECLARE_FUNCTION(put_, 8, _mmx) 77cabdff1aSopenharmony_ciDECLARE_FUNCTION(avg_, 8, _mmxext) 78cabdff1aSopenharmony_ciDECLARE_FUNCTION(put_, 16, _sse2) 79cabdff1aSopenharmony_ciDECLARE_FUNCTION(avg_, 16, _sse2) 80cabdff1aSopenharmony_ci 81cabdff1aSopenharmony_ci#endif /* HAVE_X86ASM */ 82cabdff1aSopenharmony_ci 83cabdff1aSopenharmony_civoid ff_put_vc1_chroma_mc8_nornd_mmx (uint8_t *dst, uint8_t *src, 84cabdff1aSopenharmony_ci ptrdiff_t stride, int h, int x, int y); 85cabdff1aSopenharmony_civoid ff_avg_vc1_chroma_mc8_nornd_mmxext(uint8_t *dst, uint8_t *src, 86cabdff1aSopenharmony_ci ptrdiff_t stride, int h, int x, int y); 87cabdff1aSopenharmony_civoid ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src, 88cabdff1aSopenharmony_ci ptrdiff_t stride, int h, int x, int y); 89cabdff1aSopenharmony_civoid ff_avg_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src, 90cabdff1aSopenharmony_ci ptrdiff_t stride, int h, int x, int y); 91cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x4_dc_mmxext(uint8_t *dest, ptrdiff_t linesize, 92cabdff1aSopenharmony_ci int16_t *block); 93cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x8_dc_mmxext(uint8_t *dest, ptrdiff_t linesize, 94cabdff1aSopenharmony_ci int16_t *block); 95cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x4_dc_mmxext(uint8_t *dest, ptrdiff_t linesize, 96cabdff1aSopenharmony_ci int16_t *block); 97cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, ptrdiff_t linesize, 98cabdff1aSopenharmony_ci int16_t *block); 99cabdff1aSopenharmony_ci 100cabdff1aSopenharmony_ci 101cabdff1aSopenharmony_ciav_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) 102cabdff1aSopenharmony_ci{ 103cabdff1aSopenharmony_ci int cpu_flags = av_get_cpu_flags(); 104cabdff1aSopenharmony_ci 105cabdff1aSopenharmony_ci if (HAVE_6REGS && INLINE_MMX(cpu_flags)) 106cabdff1aSopenharmony_ci if (EXTERNAL_MMX(cpu_flags)) 107cabdff1aSopenharmony_ci ff_vc1dsp_init_mmx(dsp); 108cabdff1aSopenharmony_ci 109cabdff1aSopenharmony_ci if (HAVE_6REGS && INLINE_MMXEXT(cpu_flags)) 110cabdff1aSopenharmony_ci if (EXTERNAL_MMXEXT(cpu_flags)) 111cabdff1aSopenharmony_ci ff_vc1dsp_init_mmxext(dsp); 112cabdff1aSopenharmony_ci 113cabdff1aSopenharmony_ci#define ASSIGN_LF4(EXT) \ 114cabdff1aSopenharmony_ci dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \ 115cabdff1aSopenharmony_ci dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT 116cabdff1aSopenharmony_ci#define ASSIGN_LF816(EXT) \ 117cabdff1aSopenharmony_ci dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_ ## EXT; \ 118cabdff1aSopenharmony_ci dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_ ## EXT; \ 119cabdff1aSopenharmony_ci dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \ 120cabdff1aSopenharmony_ci dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT 121cabdff1aSopenharmony_ci 122cabdff1aSopenharmony_ci#if HAVE_X86ASM 123cabdff1aSopenharmony_ci if (EXTERNAL_MMX(cpu_flags)) { 124cabdff1aSopenharmony_ci dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx; 125cabdff1aSopenharmony_ci 126cabdff1aSopenharmony_ci dsp->put_vc1_mspel_pixels_tab[1][0] = put_vc1_mspel_mc00_8_mmx; 127cabdff1aSopenharmony_ci } 128cabdff1aSopenharmony_ci if (EXTERNAL_MMXEXT(cpu_flags)) { 129cabdff1aSopenharmony_ci ASSIGN_LF4(mmxext); 130cabdff1aSopenharmony_ci dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext; 131cabdff1aSopenharmony_ci 132cabdff1aSopenharmony_ci dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_8_mmxext; 133cabdff1aSopenharmony_ci 134cabdff1aSopenharmony_ci dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_mmxext; 135cabdff1aSopenharmony_ci dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_mmxext; 136cabdff1aSopenharmony_ci dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_mmxext; 137cabdff1aSopenharmony_ci dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_mmxext; 138cabdff1aSopenharmony_ci } 139cabdff1aSopenharmony_ci if (EXTERNAL_SSE2(cpu_flags)) { 140cabdff1aSopenharmony_ci ASSIGN_LF816(sse2); 141cabdff1aSopenharmony_ci 142cabdff1aSopenharmony_ci dsp->put_vc1_mspel_pixels_tab[0][0] = put_vc1_mspel_mc00_16_sse2; 143cabdff1aSopenharmony_ci dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_sse2; 144cabdff1aSopenharmony_ci } 145cabdff1aSopenharmony_ci if (EXTERNAL_SSSE3(cpu_flags)) { 146cabdff1aSopenharmony_ci ASSIGN_LF4(ssse3); 147cabdff1aSopenharmony_ci ASSIGN_LF816(ssse3); 148cabdff1aSopenharmony_ci dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3; 149cabdff1aSopenharmony_ci dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3; 150cabdff1aSopenharmony_ci } 151cabdff1aSopenharmony_ci if (EXTERNAL_SSE4(cpu_flags)) { 152cabdff1aSopenharmony_ci dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4; 153cabdff1aSopenharmony_ci dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4; 154cabdff1aSopenharmony_ci } 155cabdff1aSopenharmony_ci#endif /* HAVE_X86ASM */ 156cabdff1aSopenharmony_ci} 157