1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Optimization of some functions from mpegvideo.c for armv5te 3cabdff1aSopenharmony_ci * Copyright (c) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net> 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci#include "config.h" 23cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S" 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci/* 26cabdff1aSopenharmony_ci * Special optimized version of dct_unquantize_h263_helper_c, it 27cabdff1aSopenharmony_ci * requires the block to be at least 8 bytes aligned, and may process 28cabdff1aSopenharmony_ci * more elements than requested. But it is guaranteed to never 29cabdff1aSopenharmony_ci * process more than 64 elements provided that count argument is <= 64, 30cabdff1aSopenharmony_ci * so it is safe. This function is optimized for a common distribution 31cabdff1aSopenharmony_ci * of values for nCoeffs (they are mostly multiple of 8 plus one or 32cabdff1aSopenharmony_ci * two extra elements). So this function processes data as 8 elements 33cabdff1aSopenharmony_ci * per loop iteration and contains optional 2 elements processing in 34cabdff1aSopenharmony_ci * the end. 35cabdff1aSopenharmony_ci * 36cabdff1aSopenharmony_ci * Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770) 37cabdff1aSopenharmony_ci */ 38cabdff1aSopenharmony_ci 39cabdff1aSopenharmony_ci.macro dequant_t dst, src, mul, add, tmp 40cabdff1aSopenharmony_ci rsbs \tmp, ip, \src, asr #16 41cabdff1aSopenharmony_ci it gt 42cabdff1aSopenharmony_ci addgt \tmp, \add, #0 43cabdff1aSopenharmony_ci it lt 44cabdff1aSopenharmony_ci rsblt \tmp, \add, #0 45cabdff1aSopenharmony_ci it ne 46cabdff1aSopenharmony_ci smlatbne \dst, \src, \mul, \tmp 47cabdff1aSopenharmony_ci.endm 48cabdff1aSopenharmony_ci 49cabdff1aSopenharmony_ci.macro dequant_b dst, src, mul, add, tmp 50cabdff1aSopenharmony_ci rsbs \tmp, ip, \src, lsl #16 51cabdff1aSopenharmony_ci it gt 52cabdff1aSopenharmony_ci addgt \tmp, \add, #0 53cabdff1aSopenharmony_ci it lt 54cabdff1aSopenharmony_ci rsblt \tmp, \add, #0 55cabdff1aSopenharmony_ci it ne 56cabdff1aSopenharmony_ci smlabbne \dst, \src, \mul, \tmp 57cabdff1aSopenharmony_ci.endm 58cabdff1aSopenharmony_ci 59cabdff1aSopenharmony_cifunction ff_dct_unquantize_h263_armv5te, export=1 60cabdff1aSopenharmony_ci push {r4-r9,lr} 61cabdff1aSopenharmony_ci mov ip, #0 62cabdff1aSopenharmony_ci subs r3, r3, #2 63cabdff1aSopenharmony_ci ble 2f 64cabdff1aSopenharmony_ci ldrd r4, r5, [r0, #0] 65cabdff1aSopenharmony_ci1: 66cabdff1aSopenharmony_ci ldrd r6, r7, [r0, #8] 67cabdff1aSopenharmony_ci 68cabdff1aSopenharmony_ci dequant_t r9, r4, r1, r2, r9 69cabdff1aSopenharmony_ci dequant_t lr, r5, r1, r2, lr 70cabdff1aSopenharmony_ci dequant_b r4, r4, r1, r2, r8 71cabdff1aSopenharmony_ci dequant_b r5, r5, r1, r2, r8 72cabdff1aSopenharmony_ci 73cabdff1aSopenharmony_ci strh r4, [r0], #2 74cabdff1aSopenharmony_ci strh r9, [r0], #2 75cabdff1aSopenharmony_ci strh r5, [r0], #2 76cabdff1aSopenharmony_ci strh lr, [r0], #2 77cabdff1aSopenharmony_ci 78cabdff1aSopenharmony_ci dequant_t r9, r6, r1, r2, r9 79cabdff1aSopenharmony_ci dequant_t lr, r7, r1, r2, lr 80cabdff1aSopenharmony_ci dequant_b r6, r6, r1, r2, r8 81cabdff1aSopenharmony_ci dequant_b r7, r7, r1, r2, r8 82cabdff1aSopenharmony_ci 83cabdff1aSopenharmony_ci strh r6, [r0], #2 84cabdff1aSopenharmony_ci strh r9, [r0], #2 85cabdff1aSopenharmony_ci strh r7, [r0], #2 86cabdff1aSopenharmony_ci strh lr, [r0], #2 87cabdff1aSopenharmony_ci 88cabdff1aSopenharmony_ci subs r3, r3, #8 89cabdff1aSopenharmony_ci it gt 90cabdff1aSopenharmony_ci ldrdgt r4, r5, [r0, #0] /* load data early to avoid load/use pipeline stall */ 91cabdff1aSopenharmony_ci bgt 1b 92cabdff1aSopenharmony_ci 93cabdff1aSopenharmony_ci adds r3, r3, #2 94cabdff1aSopenharmony_ci it le 95cabdff1aSopenharmony_ci pople {r4-r9,pc} 96cabdff1aSopenharmony_ci2: 97cabdff1aSopenharmony_ci ldrsh r9, [r0, #0] 98cabdff1aSopenharmony_ci ldrsh lr, [r0, #2] 99cabdff1aSopenharmony_ci mov r8, r2 100cabdff1aSopenharmony_ci cmp r9, #0 101cabdff1aSopenharmony_ci it lt 102cabdff1aSopenharmony_ci rsblt r8, r2, #0 103cabdff1aSopenharmony_ci it ne 104cabdff1aSopenharmony_ci smlabbne r9, r9, r1, r8 105cabdff1aSopenharmony_ci mov r8, r2 106cabdff1aSopenharmony_ci cmp lr, #0 107cabdff1aSopenharmony_ci it lt 108cabdff1aSopenharmony_ci rsblt r8, r2, #0 109cabdff1aSopenharmony_ci it ne 110cabdff1aSopenharmony_ci smlabbne lr, lr, r1, r8 111cabdff1aSopenharmony_ci strh r9, [r0], #2 112cabdff1aSopenharmony_ci strh lr, [r0], #2 113cabdff1aSopenharmony_ci pop {r4-r9,pc} 114cabdff1aSopenharmony_ciendfunc 115