162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions 362306a36Sopenharmony_ci * xthal_memcpy and xthal_bcopy 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This file is subject to the terms and conditions of the GNU General Public 662306a36Sopenharmony_ci * License. See the file "COPYING" in the main directory of this archive 762306a36Sopenharmony_ci * for more details. 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Copyright (C) 2002 - 2012 Tensilica Inc. 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/linkage.h> 1362306a36Sopenharmony_ci#include <asm/asmmacro.h> 1462306a36Sopenharmony_ci#include <asm/core.h> 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci/* 1762306a36Sopenharmony_ci * void *memcpy(void *dst, const void *src, size_t len); 1862306a36Sopenharmony_ci * 1962306a36Sopenharmony_ci * This function is intended to do the same thing as the standard 2062306a36Sopenharmony_ci * library function memcpy() for most cases. 2162306a36Sopenharmony_ci * However, where the source and/or destination references 2262306a36Sopenharmony_ci * an instruction RAM or ROM or a data RAM or ROM, that 2362306a36Sopenharmony_ci * source and/or destination will always be accessed with 2462306a36Sopenharmony_ci * 32-bit load and store instructions (as required for these 2562306a36Sopenharmony_ci * types of devices). 2662306a36Sopenharmony_ci * 2762306a36Sopenharmony_ci * !!!!!!! XTFIXME: 2862306a36Sopenharmony_ci * !!!!!!! Handling of IRAM/IROM has not yet 2962306a36Sopenharmony_ci * !!!!!!! been implemented. 3062306a36Sopenharmony_ci * 3162306a36Sopenharmony_ci * The (general case) algorithm is as follows: 3262306a36Sopenharmony_ci * If destination is unaligned, align it by conditionally 3362306a36Sopenharmony_ci * copying 1 and 2 bytes. 3462306a36Sopenharmony_ci * If source is aligned, 3562306a36Sopenharmony_ci * do 16 bytes with a loop, and then finish up with 3662306a36Sopenharmony_ci * 8, 4, 2, and 1 byte copies conditional on the length; 3762306a36Sopenharmony_ci * else (if source is unaligned), 3862306a36Sopenharmony_ci * do the same, but use SRC to align the source data. 3962306a36Sopenharmony_ci * This code tries to use fall-through branches for the common 4062306a36Sopenharmony_ci * case of aligned source and destination and multiple 4162306a36Sopenharmony_ci * of 4 (or 8) length. 4262306a36Sopenharmony_ci * 4362306a36Sopenharmony_ci * Register use: 4462306a36Sopenharmony_ci * a0/ return address 4562306a36Sopenharmony_ci * a1/ stack pointer 4662306a36Sopenharmony_ci * a2/ return value 4762306a36Sopenharmony_ci * a3/ src 4862306a36Sopenharmony_ci * a4/ length 4962306a36Sopenharmony_ci * a5/ dst 5062306a36Sopenharmony_ci * a6/ tmp 5162306a36Sopenharmony_ci * a7/ tmp 5262306a36Sopenharmony_ci * a8/ tmp 5362306a36Sopenharmony_ci * a9/ tmp 5462306a36Sopenharmony_ci * a10/ tmp 5562306a36Sopenharmony_ci * a11/ tmp 5662306a36Sopenharmony_ci */ 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci .text 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci/* 6162306a36Sopenharmony_ci * Byte by byte copy 6262306a36Sopenharmony_ci */ 6362306a36Sopenharmony_ci .align 4 6462306a36Sopenharmony_ci .byte 0 # 1 mod 4 alignment for LOOPNEZ 6562306a36Sopenharmony_ci # (0 mod 4 alignment for LBEG) 6662306a36Sopenharmony_ci.Lbytecopy: 6762306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 6862306a36Sopenharmony_ci loopnez a4, .Lbytecopydone 6962306a36Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */ 7062306a36Sopenharmony_ci beqz a4, .Lbytecopydone 7162306a36Sopenharmony_ci add a7, a3, a4 # a7 = end address for source 7262306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 7362306a36Sopenharmony_ci.Lnextbyte: 7462306a36Sopenharmony_ci l8ui a6, a3, 0 7562306a36Sopenharmony_ci addi a3, a3, 1 7662306a36Sopenharmony_ci s8i a6, a5, 0 7762306a36Sopenharmony_ci addi a5, a5, 1 7862306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 7962306a36Sopenharmony_ci bne a3, a7, .Lnextbyte # continue loop if $a3:src != $a7:src_end 8062306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 8162306a36Sopenharmony_ci.Lbytecopydone: 8262306a36Sopenharmony_ci abi_ret_default 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci/* 8562306a36Sopenharmony_ci * Destination is unaligned 8662306a36Sopenharmony_ci */ 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci .align 4 8962306a36Sopenharmony_ci.Ldst1mod2: # dst is only byte aligned 9062306a36Sopenharmony_ci _bltui a4, 7, .Lbytecopy # do short copies byte by byte 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci # copy 1 byte 9362306a36Sopenharmony_ci l8ui a6, a3, 0 9462306a36Sopenharmony_ci addi a3, a3, 1 9562306a36Sopenharmony_ci addi a4, a4, -1 9662306a36Sopenharmony_ci s8i a6, a5, 0 9762306a36Sopenharmony_ci addi a5, a5, 1 9862306a36Sopenharmony_ci _bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then 9962306a36Sopenharmony_ci # return to main algorithm 10062306a36Sopenharmony_ci.Ldst2mod4: # dst 16-bit aligned 10162306a36Sopenharmony_ci # copy 2 bytes 10262306a36Sopenharmony_ci _bltui a4, 6, .Lbytecopy # do short copies byte by byte 10362306a36Sopenharmony_ci l8ui a6, a3, 0 10462306a36Sopenharmony_ci l8ui a7, a3, 1 10562306a36Sopenharmony_ci addi a3, a3, 2 10662306a36Sopenharmony_ci addi a4, a4, -2 10762306a36Sopenharmony_ci s8i a6, a5, 0 10862306a36Sopenharmony_ci s8i a7, a5, 1 10962306a36Sopenharmony_ci addi a5, a5, 2 11062306a36Sopenharmony_ci j .Ldstaligned # dst is now aligned, return to main algorithm 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ciENTRY(__memcpy) 11362306a36Sopenharmony_ciWEAK(memcpy) 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci abi_entry_default 11662306a36Sopenharmony_ci # a2/ dst, a3/ src, a4/ len 11762306a36Sopenharmony_ci mov a5, a2 # copy dst so that a2 is return value 11862306a36Sopenharmony_ci.Lcommon: 11962306a36Sopenharmony_ci _bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2 12062306a36Sopenharmony_ci _bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4 12162306a36Sopenharmony_ci.Ldstaligned: # return here from .Ldst?mod? once dst is aligned 12262306a36Sopenharmony_ci srli a7, a4, 4 # number of loop iterations with 16B 12362306a36Sopenharmony_ci # per iteration 12462306a36Sopenharmony_ci movi a8, 3 # if source is not aligned, 12562306a36Sopenharmony_ci _bany a3, a8, .Lsrcunaligned # then use shifting copy 12662306a36Sopenharmony_ci /* 12762306a36Sopenharmony_ci * Destination and source are word-aligned, use word copy. 12862306a36Sopenharmony_ci */ 12962306a36Sopenharmony_ci # copy 16 bytes per iteration for word-aligned dst and word-aligned src 13062306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 13162306a36Sopenharmony_ci loopnez a7, .Loop1done 13262306a36Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */ 13362306a36Sopenharmony_ci beqz a7, .Loop1done 13462306a36Sopenharmony_ci slli a8, a7, 4 13562306a36Sopenharmony_ci add a8, a8, a3 # a8 = end of last 16B source chunk 13662306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 13762306a36Sopenharmony_ci.Loop1: 13862306a36Sopenharmony_ci l32i a6, a3, 0 13962306a36Sopenharmony_ci l32i a7, a3, 4 14062306a36Sopenharmony_ci s32i a6, a5, 0 14162306a36Sopenharmony_ci l32i a6, a3, 8 14262306a36Sopenharmony_ci s32i a7, a5, 4 14362306a36Sopenharmony_ci l32i a7, a3, 12 14462306a36Sopenharmony_ci s32i a6, a5, 8 14562306a36Sopenharmony_ci addi a3, a3, 16 14662306a36Sopenharmony_ci s32i a7, a5, 12 14762306a36Sopenharmony_ci addi a5, a5, 16 14862306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 14962306a36Sopenharmony_ci bne a3, a8, .Loop1 # continue loop if a3:src != a8:src_end 15062306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 15162306a36Sopenharmony_ci.Loop1done: 15262306a36Sopenharmony_ci bbci.l a4, 3, .L2 15362306a36Sopenharmony_ci # copy 8 bytes 15462306a36Sopenharmony_ci l32i a6, a3, 0 15562306a36Sopenharmony_ci l32i a7, a3, 4 15662306a36Sopenharmony_ci addi a3, a3, 8 15762306a36Sopenharmony_ci s32i a6, a5, 0 15862306a36Sopenharmony_ci s32i a7, a5, 4 15962306a36Sopenharmony_ci addi a5, a5, 8 16062306a36Sopenharmony_ci.L2: 16162306a36Sopenharmony_ci bbsi.l a4, 2, .L3 16262306a36Sopenharmony_ci bbsi.l a4, 1, .L4 16362306a36Sopenharmony_ci bbsi.l a4, 0, .L5 16462306a36Sopenharmony_ci abi_ret_default 16562306a36Sopenharmony_ci.L3: 16662306a36Sopenharmony_ci # copy 4 bytes 16762306a36Sopenharmony_ci l32i a6, a3, 0 16862306a36Sopenharmony_ci addi a3, a3, 4 16962306a36Sopenharmony_ci s32i a6, a5, 0 17062306a36Sopenharmony_ci addi a5, a5, 4 17162306a36Sopenharmony_ci bbsi.l a4, 1, .L4 17262306a36Sopenharmony_ci bbsi.l a4, 0, .L5 17362306a36Sopenharmony_ci abi_ret_default 17462306a36Sopenharmony_ci.L4: 17562306a36Sopenharmony_ci # copy 2 bytes 17662306a36Sopenharmony_ci l16ui a6, a3, 0 17762306a36Sopenharmony_ci addi a3, a3, 2 17862306a36Sopenharmony_ci s16i a6, a5, 0 17962306a36Sopenharmony_ci addi a5, a5, 2 18062306a36Sopenharmony_ci bbsi.l a4, 0, .L5 18162306a36Sopenharmony_ci abi_ret_default 18262306a36Sopenharmony_ci.L5: 18362306a36Sopenharmony_ci # copy 1 byte 18462306a36Sopenharmony_ci l8ui a6, a3, 0 18562306a36Sopenharmony_ci s8i a6, a5, 0 18662306a36Sopenharmony_ci abi_ret_default 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci/* 18962306a36Sopenharmony_ci * Destination is aligned, Source is unaligned 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci .align 4 19362306a36Sopenharmony_ci.Lsrcunaligned: 19462306a36Sopenharmony_ci _beqz a4, .Ldone # avoid loading anything for zero-length copies 19562306a36Sopenharmony_ci # copy 16 bytes per iteration for word-aligned dst and unaligned src 19662306a36Sopenharmony_ci __ssa8 a3 # set shift amount from byte offset 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci/* set to 1 when running on ISS (simulator) with the 19962306a36Sopenharmony_ci lint or ferret client, or 0 to save a few cycles */ 20062306a36Sopenharmony_ci#define SIM_CHECKS_ALIGNMENT 1 20162306a36Sopenharmony_ci#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT 20262306a36Sopenharmony_ci and a11, a3, a8 # save unalignment offset for below 20362306a36Sopenharmony_ci sub a3, a3, a11 # align a3 20462306a36Sopenharmony_ci#endif 20562306a36Sopenharmony_ci l32i a6, a3, 0 # load first word 20662306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 20762306a36Sopenharmony_ci loopnez a7, .Loop2done 20862306a36Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */ 20962306a36Sopenharmony_ci beqz a7, .Loop2done 21062306a36Sopenharmony_ci slli a10, a7, 4 21162306a36Sopenharmony_ci add a10, a10, a3 # a10 = end of last 16B source chunk 21262306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 21362306a36Sopenharmony_ci.Loop2: 21462306a36Sopenharmony_ci l32i a7, a3, 4 21562306a36Sopenharmony_ci l32i a8, a3, 8 21662306a36Sopenharmony_ci __src_b a6, a6, a7 21762306a36Sopenharmony_ci s32i a6, a5, 0 21862306a36Sopenharmony_ci l32i a9, a3, 12 21962306a36Sopenharmony_ci __src_b a7, a7, a8 22062306a36Sopenharmony_ci s32i a7, a5, 4 22162306a36Sopenharmony_ci l32i a6, a3, 16 22262306a36Sopenharmony_ci __src_b a8, a8, a9 22362306a36Sopenharmony_ci s32i a8, a5, 8 22462306a36Sopenharmony_ci addi a3, a3, 16 22562306a36Sopenharmony_ci __src_b a9, a9, a6 22662306a36Sopenharmony_ci s32i a9, a5, 12 22762306a36Sopenharmony_ci addi a5, a5, 16 22862306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 22962306a36Sopenharmony_ci bne a3, a10, .Loop2 # continue loop if a3:src != a10:src_end 23062306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 23162306a36Sopenharmony_ci.Loop2done: 23262306a36Sopenharmony_ci bbci.l a4, 3, .L12 23362306a36Sopenharmony_ci # copy 8 bytes 23462306a36Sopenharmony_ci l32i a7, a3, 4 23562306a36Sopenharmony_ci l32i a8, a3, 8 23662306a36Sopenharmony_ci __src_b a6, a6, a7 23762306a36Sopenharmony_ci s32i a6, a5, 0 23862306a36Sopenharmony_ci addi a3, a3, 8 23962306a36Sopenharmony_ci __src_b a7, a7, a8 24062306a36Sopenharmony_ci s32i a7, a5, 4 24162306a36Sopenharmony_ci addi a5, a5, 8 24262306a36Sopenharmony_ci mov a6, a8 24362306a36Sopenharmony_ci.L12: 24462306a36Sopenharmony_ci bbci.l a4, 2, .L13 24562306a36Sopenharmony_ci # copy 4 bytes 24662306a36Sopenharmony_ci l32i a7, a3, 4 24762306a36Sopenharmony_ci addi a3, a3, 4 24862306a36Sopenharmony_ci __src_b a6, a6, a7 24962306a36Sopenharmony_ci s32i a6, a5, 0 25062306a36Sopenharmony_ci addi a5, a5, 4 25162306a36Sopenharmony_ci mov a6, a7 25262306a36Sopenharmony_ci.L13: 25362306a36Sopenharmony_ci#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT 25462306a36Sopenharmony_ci add a3, a3, a11 # readjust a3 with correct misalignment 25562306a36Sopenharmony_ci#endif 25662306a36Sopenharmony_ci bbsi.l a4, 1, .L14 25762306a36Sopenharmony_ci bbsi.l a4, 0, .L15 25862306a36Sopenharmony_ci.Ldone: abi_ret_default 25962306a36Sopenharmony_ci.L14: 26062306a36Sopenharmony_ci # copy 2 bytes 26162306a36Sopenharmony_ci l8ui a6, a3, 0 26262306a36Sopenharmony_ci l8ui a7, a3, 1 26362306a36Sopenharmony_ci addi a3, a3, 2 26462306a36Sopenharmony_ci s8i a6, a5, 0 26562306a36Sopenharmony_ci s8i a7, a5, 1 26662306a36Sopenharmony_ci addi a5, a5, 2 26762306a36Sopenharmony_ci bbsi.l a4, 0, .L15 26862306a36Sopenharmony_ci abi_ret_default 26962306a36Sopenharmony_ci.L15: 27062306a36Sopenharmony_ci # copy 1 byte 27162306a36Sopenharmony_ci l8ui a6, a3, 0 27262306a36Sopenharmony_ci s8i a6, a5, 0 27362306a36Sopenharmony_ci abi_ret_default 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ciENDPROC(__memcpy) 27662306a36Sopenharmony_ciEXPORT_SYMBOL(__memcpy) 27762306a36Sopenharmony_ciEXPORT_SYMBOL(memcpy) 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci/* 28062306a36Sopenharmony_ci * void *memmove(void *dst, const void *src, size_t len); 28162306a36Sopenharmony_ci * 28262306a36Sopenharmony_ci * This function is intended to do the same thing as the standard 28362306a36Sopenharmony_ci * library function memmove() for most cases. 28462306a36Sopenharmony_ci * However, where the source and/or destination references 28562306a36Sopenharmony_ci * an instruction RAM or ROM or a data RAM or ROM, that 28662306a36Sopenharmony_ci * source and/or destination will always be accessed with 28762306a36Sopenharmony_ci * 32-bit load and store instructions (as required for these 28862306a36Sopenharmony_ci * types of devices). 28962306a36Sopenharmony_ci * 29062306a36Sopenharmony_ci * !!!!!!! XTFIXME: 29162306a36Sopenharmony_ci * !!!!!!! Handling of IRAM/IROM has not yet 29262306a36Sopenharmony_ci * !!!!!!! been implemented. 29362306a36Sopenharmony_ci * 29462306a36Sopenharmony_ci * The (general case) algorithm is as follows: 29562306a36Sopenharmony_ci * If end of source doesn't overlap destination then use memcpy. 29662306a36Sopenharmony_ci * Otherwise do memcpy backwards. 29762306a36Sopenharmony_ci * 29862306a36Sopenharmony_ci * Register use: 29962306a36Sopenharmony_ci * a0/ return address 30062306a36Sopenharmony_ci * a1/ stack pointer 30162306a36Sopenharmony_ci * a2/ return value 30262306a36Sopenharmony_ci * a3/ src 30362306a36Sopenharmony_ci * a4/ length 30462306a36Sopenharmony_ci * a5/ dst 30562306a36Sopenharmony_ci * a6/ tmp 30662306a36Sopenharmony_ci * a7/ tmp 30762306a36Sopenharmony_ci * a8/ tmp 30862306a36Sopenharmony_ci * a9/ tmp 30962306a36Sopenharmony_ci * a10/ tmp 31062306a36Sopenharmony_ci * a11/ tmp 31162306a36Sopenharmony_ci */ 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci/* 31462306a36Sopenharmony_ci * Byte by byte copy 31562306a36Sopenharmony_ci */ 31662306a36Sopenharmony_ci .align 4 31762306a36Sopenharmony_ci .byte 0 # 1 mod 4 alignment for LOOPNEZ 31862306a36Sopenharmony_ci # (0 mod 4 alignment for LBEG) 31962306a36Sopenharmony_ci.Lbackbytecopy: 32062306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 32162306a36Sopenharmony_ci loopnez a4, .Lbackbytecopydone 32262306a36Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */ 32362306a36Sopenharmony_ci beqz a4, .Lbackbytecopydone 32462306a36Sopenharmony_ci sub a7, a3, a4 # a7 = start address for source 32562306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 32662306a36Sopenharmony_ci.Lbacknextbyte: 32762306a36Sopenharmony_ci addi a3, a3, -1 32862306a36Sopenharmony_ci l8ui a6, a3, 0 32962306a36Sopenharmony_ci addi a5, a5, -1 33062306a36Sopenharmony_ci s8i a6, a5, 0 33162306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 33262306a36Sopenharmony_ci bne a3, a7, .Lbacknextbyte # continue loop if 33362306a36Sopenharmony_ci # $a3:src != $a7:src_start 33462306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 33562306a36Sopenharmony_ci.Lbackbytecopydone: 33662306a36Sopenharmony_ci abi_ret_default 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci/* 33962306a36Sopenharmony_ci * Destination is unaligned 34062306a36Sopenharmony_ci */ 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci .align 4 34362306a36Sopenharmony_ci.Lbackdst1mod2: # dst is only byte aligned 34462306a36Sopenharmony_ci _bltui a4, 7, .Lbackbytecopy # do short copies byte by byte 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci # copy 1 byte 34762306a36Sopenharmony_ci addi a3, a3, -1 34862306a36Sopenharmony_ci l8ui a6, a3, 0 34962306a36Sopenharmony_ci addi a5, a5, -1 35062306a36Sopenharmony_ci s8i a6, a5, 0 35162306a36Sopenharmony_ci addi a4, a4, -1 35262306a36Sopenharmony_ci _bbci.l a5, 1, .Lbackdstaligned # if dst is now aligned, then 35362306a36Sopenharmony_ci # return to main algorithm 35462306a36Sopenharmony_ci.Lbackdst2mod4: # dst 16-bit aligned 35562306a36Sopenharmony_ci # copy 2 bytes 35662306a36Sopenharmony_ci _bltui a4, 6, .Lbackbytecopy # do short copies byte by byte 35762306a36Sopenharmony_ci addi a3, a3, -2 35862306a36Sopenharmony_ci l8ui a6, a3, 0 35962306a36Sopenharmony_ci l8ui a7, a3, 1 36062306a36Sopenharmony_ci addi a5, a5, -2 36162306a36Sopenharmony_ci s8i a6, a5, 0 36262306a36Sopenharmony_ci s8i a7, a5, 1 36362306a36Sopenharmony_ci addi a4, a4, -2 36462306a36Sopenharmony_ci j .Lbackdstaligned # dst is now aligned, 36562306a36Sopenharmony_ci # return to main algorithm 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ciENTRY(__memmove) 36862306a36Sopenharmony_ciWEAK(memmove) 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci abi_entry_default 37162306a36Sopenharmony_ci # a2/ dst, a3/ src, a4/ len 37262306a36Sopenharmony_ci mov a5, a2 # copy dst so that a2 is return value 37362306a36Sopenharmony_ci.Lmovecommon: 37462306a36Sopenharmony_ci sub a6, a5, a3 37562306a36Sopenharmony_ci bgeu a6, a4, .Lcommon 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci add a5, a5, a4 37862306a36Sopenharmony_ci add a3, a3, a4 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci _bbsi.l a5, 0, .Lbackdst1mod2 # if dst is 1 mod 2 38162306a36Sopenharmony_ci _bbsi.l a5, 1, .Lbackdst2mod4 # if dst is 2 mod 4 38262306a36Sopenharmony_ci.Lbackdstaligned: # return here from .Lbackdst?mod? once dst is aligned 38362306a36Sopenharmony_ci srli a7, a4, 4 # number of loop iterations with 16B 38462306a36Sopenharmony_ci # per iteration 38562306a36Sopenharmony_ci movi a8, 3 # if source is not aligned, 38662306a36Sopenharmony_ci _bany a3, a8, .Lbacksrcunaligned # then use shifting copy 38762306a36Sopenharmony_ci /* 38862306a36Sopenharmony_ci * Destination and source are word-aligned, use word copy. 38962306a36Sopenharmony_ci */ 39062306a36Sopenharmony_ci # copy 16 bytes per iteration for word-aligned dst and word-aligned src 39162306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 39262306a36Sopenharmony_ci loopnez a7, .LbackLoop1done 39362306a36Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */ 39462306a36Sopenharmony_ci beqz a7, .LbackLoop1done 39562306a36Sopenharmony_ci slli a8, a7, 4 39662306a36Sopenharmony_ci sub a8, a3, a8 # a8 = start of first 16B source chunk 39762306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 39862306a36Sopenharmony_ci.LbackLoop1: 39962306a36Sopenharmony_ci addi a3, a3, -16 40062306a36Sopenharmony_ci l32i a7, a3, 12 40162306a36Sopenharmony_ci l32i a6, a3, 8 40262306a36Sopenharmony_ci addi a5, a5, -16 40362306a36Sopenharmony_ci s32i a7, a5, 12 40462306a36Sopenharmony_ci l32i a7, a3, 4 40562306a36Sopenharmony_ci s32i a6, a5, 8 40662306a36Sopenharmony_ci l32i a6, a3, 0 40762306a36Sopenharmony_ci s32i a7, a5, 4 40862306a36Sopenharmony_ci s32i a6, a5, 0 40962306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 41062306a36Sopenharmony_ci bne a3, a8, .LbackLoop1 # continue loop if a3:src != a8:src_start 41162306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 41262306a36Sopenharmony_ci.LbackLoop1done: 41362306a36Sopenharmony_ci bbci.l a4, 3, .Lback2 41462306a36Sopenharmony_ci # copy 8 bytes 41562306a36Sopenharmony_ci addi a3, a3, -8 41662306a36Sopenharmony_ci l32i a6, a3, 0 41762306a36Sopenharmony_ci l32i a7, a3, 4 41862306a36Sopenharmony_ci addi a5, a5, -8 41962306a36Sopenharmony_ci s32i a6, a5, 0 42062306a36Sopenharmony_ci s32i a7, a5, 4 42162306a36Sopenharmony_ci.Lback2: 42262306a36Sopenharmony_ci bbsi.l a4, 2, .Lback3 42362306a36Sopenharmony_ci bbsi.l a4, 1, .Lback4 42462306a36Sopenharmony_ci bbsi.l a4, 0, .Lback5 42562306a36Sopenharmony_ci abi_ret_default 42662306a36Sopenharmony_ci.Lback3: 42762306a36Sopenharmony_ci # copy 4 bytes 42862306a36Sopenharmony_ci addi a3, a3, -4 42962306a36Sopenharmony_ci l32i a6, a3, 0 43062306a36Sopenharmony_ci addi a5, a5, -4 43162306a36Sopenharmony_ci s32i a6, a5, 0 43262306a36Sopenharmony_ci bbsi.l a4, 1, .Lback4 43362306a36Sopenharmony_ci bbsi.l a4, 0, .Lback5 43462306a36Sopenharmony_ci abi_ret_default 43562306a36Sopenharmony_ci.Lback4: 43662306a36Sopenharmony_ci # copy 2 bytes 43762306a36Sopenharmony_ci addi a3, a3, -2 43862306a36Sopenharmony_ci l16ui a6, a3, 0 43962306a36Sopenharmony_ci addi a5, a5, -2 44062306a36Sopenharmony_ci s16i a6, a5, 0 44162306a36Sopenharmony_ci bbsi.l a4, 0, .Lback5 44262306a36Sopenharmony_ci abi_ret_default 44362306a36Sopenharmony_ci.Lback5: 44462306a36Sopenharmony_ci # copy 1 byte 44562306a36Sopenharmony_ci addi a3, a3, -1 44662306a36Sopenharmony_ci l8ui a6, a3, 0 44762306a36Sopenharmony_ci addi a5, a5, -1 44862306a36Sopenharmony_ci s8i a6, a5, 0 44962306a36Sopenharmony_ci abi_ret_default 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci/* 45262306a36Sopenharmony_ci * Destination is aligned, Source is unaligned 45362306a36Sopenharmony_ci */ 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci .align 4 45662306a36Sopenharmony_ci.Lbacksrcunaligned: 45762306a36Sopenharmony_ci _beqz a4, .Lbackdone # avoid loading anything for zero-length copies 45862306a36Sopenharmony_ci # copy 16 bytes per iteration for word-aligned dst and unaligned src 45962306a36Sopenharmony_ci __ssa8 a3 # set shift amount from byte offset 46062306a36Sopenharmony_ci#define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with 46162306a36Sopenharmony_ci * the lint or ferret client, or 0 46262306a36Sopenharmony_ci * to save a few cycles */ 46362306a36Sopenharmony_ci#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT 46462306a36Sopenharmony_ci and a11, a3, a8 # save unalignment offset for below 46562306a36Sopenharmony_ci sub a3, a3, a11 # align a3 46662306a36Sopenharmony_ci#endif 46762306a36Sopenharmony_ci l32i a6, a3, 0 # load first word 46862306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 46962306a36Sopenharmony_ci loopnez a7, .LbackLoop2done 47062306a36Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */ 47162306a36Sopenharmony_ci beqz a7, .LbackLoop2done 47262306a36Sopenharmony_ci slli a10, a7, 4 47362306a36Sopenharmony_ci sub a10, a3, a10 # a10 = start of first 16B source chunk 47462306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 47562306a36Sopenharmony_ci.LbackLoop2: 47662306a36Sopenharmony_ci addi a3, a3, -16 47762306a36Sopenharmony_ci l32i a7, a3, 12 47862306a36Sopenharmony_ci l32i a8, a3, 8 47962306a36Sopenharmony_ci addi a5, a5, -16 48062306a36Sopenharmony_ci __src_b a6, a7, a6 48162306a36Sopenharmony_ci s32i a6, a5, 12 48262306a36Sopenharmony_ci l32i a9, a3, 4 48362306a36Sopenharmony_ci __src_b a7, a8, a7 48462306a36Sopenharmony_ci s32i a7, a5, 8 48562306a36Sopenharmony_ci l32i a6, a3, 0 48662306a36Sopenharmony_ci __src_b a8, a9, a8 48762306a36Sopenharmony_ci s32i a8, a5, 4 48862306a36Sopenharmony_ci __src_b a9, a6, a9 48962306a36Sopenharmony_ci s32i a9, a5, 0 49062306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 49162306a36Sopenharmony_ci bne a3, a10, .LbackLoop2 # continue loop if a3:src != a10:src_start 49262306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 49362306a36Sopenharmony_ci.LbackLoop2done: 49462306a36Sopenharmony_ci bbci.l a4, 3, .Lback12 49562306a36Sopenharmony_ci # copy 8 bytes 49662306a36Sopenharmony_ci addi a3, a3, -8 49762306a36Sopenharmony_ci l32i a7, a3, 4 49862306a36Sopenharmony_ci l32i a8, a3, 0 49962306a36Sopenharmony_ci addi a5, a5, -8 50062306a36Sopenharmony_ci __src_b a6, a7, a6 50162306a36Sopenharmony_ci s32i a6, a5, 4 50262306a36Sopenharmony_ci __src_b a7, a8, a7 50362306a36Sopenharmony_ci s32i a7, a5, 0 50462306a36Sopenharmony_ci mov a6, a8 50562306a36Sopenharmony_ci.Lback12: 50662306a36Sopenharmony_ci bbci.l a4, 2, .Lback13 50762306a36Sopenharmony_ci # copy 4 bytes 50862306a36Sopenharmony_ci addi a3, a3, -4 50962306a36Sopenharmony_ci l32i a7, a3, 0 51062306a36Sopenharmony_ci addi a5, a5, -4 51162306a36Sopenharmony_ci __src_b a6, a7, a6 51262306a36Sopenharmony_ci s32i a6, a5, 0 51362306a36Sopenharmony_ci mov a6, a7 51462306a36Sopenharmony_ci.Lback13: 51562306a36Sopenharmony_ci#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT 51662306a36Sopenharmony_ci add a3, a3, a11 # readjust a3 with correct misalignment 51762306a36Sopenharmony_ci#endif 51862306a36Sopenharmony_ci bbsi.l a4, 1, .Lback14 51962306a36Sopenharmony_ci bbsi.l a4, 0, .Lback15 52062306a36Sopenharmony_ci.Lbackdone: 52162306a36Sopenharmony_ci abi_ret_default 52262306a36Sopenharmony_ci.Lback14: 52362306a36Sopenharmony_ci # copy 2 bytes 52462306a36Sopenharmony_ci addi a3, a3, -2 52562306a36Sopenharmony_ci l8ui a6, a3, 0 52662306a36Sopenharmony_ci l8ui a7, a3, 1 52762306a36Sopenharmony_ci addi a5, a5, -2 52862306a36Sopenharmony_ci s8i a6, a5, 0 52962306a36Sopenharmony_ci s8i a7, a5, 1 53062306a36Sopenharmony_ci bbsi.l a4, 0, .Lback15 53162306a36Sopenharmony_ci abi_ret_default 53262306a36Sopenharmony_ci.Lback15: 53362306a36Sopenharmony_ci # copy 1 byte 53462306a36Sopenharmony_ci addi a3, a3, -1 53562306a36Sopenharmony_ci addi a5, a5, -1 53662306a36Sopenharmony_ci l8ui a6, a3, 0 53762306a36Sopenharmony_ci s8i a6, a5, 0 53862306a36Sopenharmony_ci abi_ret_default 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ciENDPROC(__memmove) 54162306a36Sopenharmony_ciEXPORT_SYMBOL(__memmove) 54262306a36Sopenharmony_ciEXPORT_SYMBOL(memmove) 543