1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Alpha optimized DSP utils 3cabdff1aSopenharmony_ci * Copyright (c) 2002 Falk Hueffner <falk@debian.org> 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci#include "regdef.h" 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci/* Some nicer register names. */ 25cabdff1aSopenharmony_ci#define ta t10 26cabdff1aSopenharmony_ci#define tb t11 27cabdff1aSopenharmony_ci#define tc t12 28cabdff1aSopenharmony_ci#define td AT 29cabdff1aSopenharmony_ci/* Danger: these overlap with the argument list and the return value */ 30cabdff1aSopenharmony_ci#define te a5 31cabdff1aSopenharmony_ci#define tf a4 32cabdff1aSopenharmony_ci#define tg a3 33cabdff1aSopenharmony_ci#define th v0 34cabdff1aSopenharmony_ci 35cabdff1aSopenharmony_ci .set noat 36cabdff1aSopenharmony_ci .set noreorder 37cabdff1aSopenharmony_ci .arch pca56 38cabdff1aSopenharmony_ci .text 39cabdff1aSopenharmony_ci 40cabdff1aSopenharmony_ci/***************************************************************************** 41cabdff1aSopenharmony_ci * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) 42cabdff1aSopenharmony_ci * 43cabdff1aSopenharmony_ci * This code is written with a pca56 in mind. For ev6, one should 44cabdff1aSopenharmony_ci * really take the increased latency of 3 cycles for MVI instructions 45cabdff1aSopenharmony_ci * into account. 46cabdff1aSopenharmony_ci * 47cabdff1aSopenharmony_ci * It is important to keep the loading and first use of a register as 48cabdff1aSopenharmony_ci * far apart as possible, because if a register is accessed before it 49cabdff1aSopenharmony_ci * has been fetched from memory, the CPU will stall. 50cabdff1aSopenharmony_ci */ 51cabdff1aSopenharmony_ci .align 4 52cabdff1aSopenharmony_ci .globl pix_abs16x16_mvi_asm 53cabdff1aSopenharmony_ci .ent pix_abs16x16_mvi_asm 54cabdff1aSopenharmony_cipix_abs16x16_mvi_asm: 55cabdff1aSopenharmony_ci .frame sp, 0, ra, 0 56cabdff1aSopenharmony_ci .prologue 0 57cabdff1aSopenharmony_ci 58cabdff1aSopenharmony_ci and a2, 7, t0 59cabdff1aSopenharmony_ci clr v0 60cabdff1aSopenharmony_ci beq t0, $aligned 61cabdff1aSopenharmony_ci .align 4 62cabdff1aSopenharmony_ci$unaligned: 63cabdff1aSopenharmony_ci /* Registers: 64cabdff1aSopenharmony_ci line 0: 65cabdff1aSopenharmony_ci t0: left_u -> left lo -> left 66cabdff1aSopenharmony_ci t1: mid 67cabdff1aSopenharmony_ci t2: right_u -> right hi -> right 68cabdff1aSopenharmony_ci t3: ref left 69cabdff1aSopenharmony_ci t4: ref right 70cabdff1aSopenharmony_ci line 1: 71cabdff1aSopenharmony_ci t5: left_u -> left lo -> left 72cabdff1aSopenharmony_ci t6: mid 73cabdff1aSopenharmony_ci t7: right_u -> right hi -> right 74cabdff1aSopenharmony_ci t8: ref left 75cabdff1aSopenharmony_ci t9: ref right 76cabdff1aSopenharmony_ci temp: 77cabdff1aSopenharmony_ci ta: left hi 78cabdff1aSopenharmony_ci tb: right lo 79cabdff1aSopenharmony_ci tc: error left 80cabdff1aSopenharmony_ci td: error right */ 81cabdff1aSopenharmony_ci 82cabdff1aSopenharmony_ci /* load line 0 */ 83cabdff1aSopenharmony_ci ldq_u t0, 0(a2) # left_u 84cabdff1aSopenharmony_ci ldq_u t1, 8(a2) # mid 85cabdff1aSopenharmony_ci ldq_u t2, 16(a2) # right_u 86cabdff1aSopenharmony_ci ldq t3, 0(a1) # ref left 87cabdff1aSopenharmony_ci ldq t4, 8(a1) # ref right 88cabdff1aSopenharmony_ci addq a1, a3, a1 # pix1 89cabdff1aSopenharmony_ci addq a2, a3, a2 # pix2 90cabdff1aSopenharmony_ci /* load line 1 */ 91cabdff1aSopenharmony_ci ldq_u t5, 0(a2) # left_u 92cabdff1aSopenharmony_ci ldq_u t6, 8(a2) # mid 93cabdff1aSopenharmony_ci ldq_u t7, 16(a2) # right_u 94cabdff1aSopenharmony_ci ldq t8, 0(a1) # ref left 95cabdff1aSopenharmony_ci ldq t9, 8(a1) # ref right 96cabdff1aSopenharmony_ci addq a1, a3, a1 # pix1 97cabdff1aSopenharmony_ci addq a2, a3, a2 # pix2 98cabdff1aSopenharmony_ci /* calc line 0 */ 99cabdff1aSopenharmony_ci extql t0, a2, t0 # left lo 100cabdff1aSopenharmony_ci extqh t1, a2, ta # left hi 101cabdff1aSopenharmony_ci extql t1, a2, tb # right lo 102cabdff1aSopenharmony_ci or t0, ta, t0 # left 103cabdff1aSopenharmony_ci extqh t2, a2, t2 # right hi 104cabdff1aSopenharmony_ci perr t3, t0, tc # error left 105cabdff1aSopenharmony_ci or t2, tb, t2 # right 106cabdff1aSopenharmony_ci perr t4, t2, td # error right 107cabdff1aSopenharmony_ci addq v0, tc, v0 # add error left 108cabdff1aSopenharmony_ci addq v0, td, v0 # add error left 109cabdff1aSopenharmony_ci /* calc line 1 */ 110cabdff1aSopenharmony_ci extql t5, a2, t5 # left lo 111cabdff1aSopenharmony_ci extqh t6, a2, ta # left hi 112cabdff1aSopenharmony_ci extql t6, a2, tb # right lo 113cabdff1aSopenharmony_ci or t5, ta, t5 # left 114cabdff1aSopenharmony_ci extqh t7, a2, t7 # right hi 115cabdff1aSopenharmony_ci perr t8, t5, tc # error left 116cabdff1aSopenharmony_ci or t7, tb, t7 # right 117cabdff1aSopenharmony_ci perr t9, t7, td # error right 118cabdff1aSopenharmony_ci addq v0, tc, v0 # add error left 119cabdff1aSopenharmony_ci addq v0, td, v0 # add error left 120cabdff1aSopenharmony_ci /* loop */ 121cabdff1aSopenharmony_ci subq a4, 2, a4 # h -= 2 122cabdff1aSopenharmony_ci bne a4, $unaligned 123cabdff1aSopenharmony_ci ret 124cabdff1aSopenharmony_ci 125cabdff1aSopenharmony_ci .align 4 126cabdff1aSopenharmony_ci$aligned: 127cabdff1aSopenharmony_ci /* load line 0 */ 128cabdff1aSopenharmony_ci ldq t0, 0(a2) # left 129cabdff1aSopenharmony_ci ldq t1, 8(a2) # right 130cabdff1aSopenharmony_ci addq a2, a3, a2 # pix2 131cabdff1aSopenharmony_ci ldq t2, 0(a1) # ref left 132cabdff1aSopenharmony_ci ldq t3, 8(a1) # ref right 133cabdff1aSopenharmony_ci addq a1, a3, a1 # pix1 134cabdff1aSopenharmony_ci /* load line 1 */ 135cabdff1aSopenharmony_ci ldq t4, 0(a2) # left 136cabdff1aSopenharmony_ci ldq t5, 8(a2) # right 137cabdff1aSopenharmony_ci addq a2, a3, a2 # pix2 138cabdff1aSopenharmony_ci ldq t6, 0(a1) # ref left 139cabdff1aSopenharmony_ci ldq t7, 8(a1) # ref right 140cabdff1aSopenharmony_ci addq a1, a3, a1 # pix1 141cabdff1aSopenharmony_ci /* load line 2 */ 142cabdff1aSopenharmony_ci ldq t8, 0(a2) # left 143cabdff1aSopenharmony_ci ldq t9, 8(a2) # right 144cabdff1aSopenharmony_ci addq a2, a3, a2 # pix2 145cabdff1aSopenharmony_ci ldq ta, 0(a1) # ref left 146cabdff1aSopenharmony_ci ldq tb, 8(a1) # ref right 147cabdff1aSopenharmony_ci addq a1, a3, a1 # pix1 148cabdff1aSopenharmony_ci /* load line 3 */ 149cabdff1aSopenharmony_ci ldq tc, 0(a2) # left 150cabdff1aSopenharmony_ci ldq td, 8(a2) # right 151cabdff1aSopenharmony_ci addq a2, a3, a2 # pix2 152cabdff1aSopenharmony_ci ldq te, 0(a1) # ref left 153cabdff1aSopenharmony_ci ldq a0, 8(a1) # ref right 154cabdff1aSopenharmony_ci /* calc line 0 */ 155cabdff1aSopenharmony_ci perr t0, t2, t0 # error left 156cabdff1aSopenharmony_ci addq a1, a3, a1 # pix1 157cabdff1aSopenharmony_ci perr t1, t3, t1 # error right 158cabdff1aSopenharmony_ci addq v0, t0, v0 # add error left 159cabdff1aSopenharmony_ci /* calc line 1 */ 160cabdff1aSopenharmony_ci perr t4, t6, t0 # error left 161cabdff1aSopenharmony_ci addq v0, t1, v0 # add error right 162cabdff1aSopenharmony_ci perr t5, t7, t1 # error right 163cabdff1aSopenharmony_ci addq v0, t0, v0 # add error left 164cabdff1aSopenharmony_ci /* calc line 2 */ 165cabdff1aSopenharmony_ci perr t8, ta, t0 # error left 166cabdff1aSopenharmony_ci addq v0, t1, v0 # add error right 167cabdff1aSopenharmony_ci perr t9, tb, t1 # error right 168cabdff1aSopenharmony_ci addq v0, t0, v0 # add error left 169cabdff1aSopenharmony_ci /* calc line 3 */ 170cabdff1aSopenharmony_ci perr tc, te, t0 # error left 171cabdff1aSopenharmony_ci addq v0, t1, v0 # add error right 172cabdff1aSopenharmony_ci perr td, a0, t1 # error right 173cabdff1aSopenharmony_ci addq v0, t0, v0 # add error left 174cabdff1aSopenharmony_ci addq v0, t1, v0 # add error right 175cabdff1aSopenharmony_ci /* loop */ 176cabdff1aSopenharmony_ci subq a4, 4, a4 # h -= 4 177cabdff1aSopenharmony_ci bne a4, $aligned 178cabdff1aSopenharmony_ci ret 179cabdff1aSopenharmony_ci .end pix_abs16x16_mvi_asm 180