1/* 2 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include <stdint.h> 22 23#include "libavutil/attributes.h" 24#include "libavutil/cpu.h" 25#include "libavutil/aarch64/cpu.h" 26#include "libavcodec/h264dsp.h" 27 28void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, 29 int beta, int8_t *tc0); 30void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, 31 int beta, int8_t *tc0); 32void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha, 33 int beta); 34void ff_h264_h_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha, 35 int beta); 36void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, 37 int beta, int8_t *tc0); 38void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, 39 int beta, int8_t *tc0); 40void ff_h264_h_loop_filter_chroma422_neon(uint8_t *pix, ptrdiff_t stride, int alpha, 41 int beta, int8_t *tc0); 42void ff_h264_v_loop_filter_chroma_intra_neon(uint8_t *pix, ptrdiff_t stride, 43 int alpha, int beta); 44void ff_h264_h_loop_filter_chroma_intra_neon(uint8_t *pix, ptrdiff_t stride, 45 int alpha, int beta); 46void ff_h264_h_loop_filter_chroma422_intra_neon(uint8_t *pix, ptrdiff_t stride, 47 int alpha, int beta); 48void ff_h264_h_loop_filter_chroma_mbaff_intra_neon(uint8_t *pix, ptrdiff_t stride, 49 int alpha, int beta); 50 51void ff_weight_h264_pixels_16_neon(uint8_t *dst, ptrdiff_t stride, int height, 52 int log2_den, int weight, int offset); 53void ff_weight_h264_pixels_8_neon(uint8_t *dst, ptrdiff_t stride, int height, 54 int log2_den, int weight, int offset); 55void ff_weight_h264_pixels_4_neon(uint8_t *dst, ptrdiff_t stride, int height, 56 int log2_den, int weight, int offset); 57 58void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 59 int height, int log2_den, int weightd, 60 int weights, int offset); 61void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 62 int height, int log2_den, int weightd, 63 int weights, int offset); 64void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 65 int height, int log2_den, int weightd, 66 int weights, int offset); 67 68void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, int stride); 69void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, int stride); 70void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset, 71 int16_t *block, int stride, 72 const uint8_t nnzc[5 * 8]); 73void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset, 74 int16_t *block, int stride, 75 const uint8_t nnzc[5 * 8]); 76void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset, 77 int16_t *block, int stride, 78 const uint8_t nnzc[15 * 8]); 79 80void ff_h264_idct8_add_neon(uint8_t *dst, int16_t *block, int stride); 81void ff_h264_idct8_dc_add_neon(uint8_t *dst, int16_t *block, int stride); 82void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset, 83 int16_t *block, int stride, 84 const uint8_t nnzc[5 * 8]); 85 86void ff_h264_v_loop_filter_luma_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha, 87 int beta, int8_t *tc0); 88void ff_h264_h_loop_filter_luma_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha, 89 int beta, int8_t *tc0); 90void ff_h264_v_loop_filter_luma_intra_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha, 91 int beta); 92void ff_h264_h_loop_filter_luma_intra_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha, 93 int beta); 94void ff_h264_v_loop_filter_chroma_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha, 95 int beta, int8_t *tc0); 96void ff_h264_h_loop_filter_chroma_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha, 97 int beta, int8_t *tc0); 98void ff_h264_h_loop_filter_chroma422_neon_10(uint8_t *pix, ptrdiff_t stride, int alpha, 99 int beta, int8_t *tc0); 100void ff_h264_v_loop_filter_chroma_intra_neon_10(uint8_t *pix, ptrdiff_t stride, 101 int alpha, int beta); 102void ff_h264_h_loop_filter_chroma_intra_neon_10(uint8_t *pix, ptrdiff_t stride, 103 int alpha, int beta); 104void ff_h264_h_loop_filter_chroma422_intra_neon_10(uint8_t *pix, ptrdiff_t stride, 105 int alpha, int beta); 106void ff_h264_h_loop_filter_chroma_mbaff_intra_neon_10(uint8_t *pix, ptrdiff_t stride, 107 int alpha, int beta); 108 109av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth, 110 const int chroma_format_idc) 111{ 112 int cpu_flags = av_get_cpu_flags(); 113 114 if (have_neon(cpu_flags) && bit_depth == 8) { 115 c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; 116 c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; 117 c->h264_v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon; 118 c->h264_h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon; 119 120 c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; 121 c->h264_v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon; 122 123 if (chroma_format_idc <= 1) { 124 c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; 125 c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon; 126 c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon; 127 } else { 128 c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon; 129 c->h264_h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon; 130 c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon; 131 c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_intra_neon; 132 } 133 134 c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon; 135 c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon; 136 c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon; 137 138 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon; 139 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon; 140 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon; 141 142 c->h264_idct_add = ff_h264_idct_add_neon; 143 c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; 144 c->h264_idct_add16 = ff_h264_idct_add16_neon; 145 c->h264_idct_add16intra = ff_h264_idct_add16intra_neon; 146 if (chroma_format_idc <= 1) 147 c->h264_idct_add8 = ff_h264_idct_add8_neon; 148 c->h264_idct8_add = ff_h264_idct8_add_neon; 149 c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon; 150 c->h264_idct8_add4 = ff_h264_idct8_add4_neon; 151 } else if (have_neon(cpu_flags) && bit_depth == 10) { 152 c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon_10; 153 c->h264_v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon_10; 154 155 if (chroma_format_idc <= 1) { 156 c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon_10; 157 c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon_10; 158 c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon_10; 159 } else { 160 c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon_10; 161 c->h264_h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon_10; 162 c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon_10; 163 c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_intra_neon_10; 164 } 165 } 166} 167