18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * arch/xtensa/lib/memset.S 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * ANSI C standard library function memset 58c2ecf20Sopenharmony_ci * (Well, almost. .fixup code might return zero.) 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * This file is subject to the terms and conditions of the GNU General 88c2ecf20Sopenharmony_ci * Public License. See the file "COPYING" in the main directory of 98c2ecf20Sopenharmony_ci * this archive for more details. 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * Copyright (C) 2002 Tensilica Inc. 128c2ecf20Sopenharmony_ci */ 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#include <linux/linkage.h> 158c2ecf20Sopenharmony_ci#include <asm/asmmacro.h> 168c2ecf20Sopenharmony_ci#include <asm/core.h> 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci/* 198c2ecf20Sopenharmony_ci * void *memset(void *dst, int c, size_t length) 208c2ecf20Sopenharmony_ci * 218c2ecf20Sopenharmony_ci * The algorithm is as follows: 228c2ecf20Sopenharmony_ci * Create a word with c in all byte positions 238c2ecf20Sopenharmony_ci * If the destination is aligned, 248c2ecf20Sopenharmony_ci * do 16B chucks with a loop, and then finish up with 258c2ecf20Sopenharmony_ci * 8B, 4B, 2B, and 1B stores conditional on the length. 268c2ecf20Sopenharmony_ci * If destination is unaligned, align it by conditionally 278c2ecf20Sopenharmony_ci * setting 1B and 2B and then go to aligned case. 288c2ecf20Sopenharmony_ci * This code tries to use fall-through branches for the common 298c2ecf20Sopenharmony_ci * case of an aligned destination (except for the branches to 308c2ecf20Sopenharmony_ci * the alignment labels). 318c2ecf20Sopenharmony_ci */ 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci.text 348c2ecf20Sopenharmony_ciENTRY(__memset) 358c2ecf20Sopenharmony_ciWEAK(memset) 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci abi_entry_default 388c2ecf20Sopenharmony_ci # a2/ dst, a3/ c, a4/ length 398c2ecf20Sopenharmony_ci extui a3, a3, 0, 8 # mask to just 8 bits 408c2ecf20Sopenharmony_ci slli a7, a3, 8 # duplicate character in all bytes of word 418c2ecf20Sopenharmony_ci or a3, a3, a7 # ... 428c2ecf20Sopenharmony_ci slli a7, a3, 16 # ... 438c2ecf20Sopenharmony_ci or a3, a3, a7 # ... 448c2ecf20Sopenharmony_ci mov a5, a2 # copy dst so that a2 is return value 458c2ecf20Sopenharmony_ci movi a6, 3 # for alignment tests 468c2ecf20Sopenharmony_ci bany a2, a6, .Ldstunaligned # if dst is unaligned 478c2ecf20Sopenharmony_ci.L0: # return here from .Ldstunaligned when dst is aligned 488c2ecf20Sopenharmony_ci srli a7, a4, 4 # number of loop iterations with 16B 498c2ecf20Sopenharmony_ci # per iteration 508c2ecf20Sopenharmony_ci bnez a4, .Laligned 518c2ecf20Sopenharmony_ci abi_ret_default 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci/* 548c2ecf20Sopenharmony_ci * Destination is word-aligned. 558c2ecf20Sopenharmony_ci */ 568c2ecf20Sopenharmony_ci # set 16 bytes per iteration for word-aligned dst 578c2ecf20Sopenharmony_ci .align 4 # 1 mod 4 alignment for LOOPNEZ 588c2ecf20Sopenharmony_ci .byte 0 # (0 mod 4 alignment for LBEG) 598c2ecf20Sopenharmony_ci.Laligned: 608c2ecf20Sopenharmony_ci#if XCHAL_HAVE_LOOPS 618c2ecf20Sopenharmony_ci loopnez a7, .Loop1done 628c2ecf20Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */ 638c2ecf20Sopenharmony_ci beqz a7, .Loop1done 648c2ecf20Sopenharmony_ci slli a6, a7, 4 658c2ecf20Sopenharmony_ci add a6, a6, a5 # a6 = end of last 16B chunk 668c2ecf20Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 678c2ecf20Sopenharmony_ci.Loop1: 688c2ecf20Sopenharmony_ciEX(10f) s32i a3, a5, 0 698c2ecf20Sopenharmony_ciEX(10f) s32i a3, a5, 4 708c2ecf20Sopenharmony_ciEX(10f) s32i a3, a5, 8 718c2ecf20Sopenharmony_ciEX(10f) s32i a3, a5, 12 728c2ecf20Sopenharmony_ci addi a5, a5, 16 738c2ecf20Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 748c2ecf20Sopenharmony_ci blt a5, a6, .Loop1 758c2ecf20Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 768c2ecf20Sopenharmony_ci.Loop1done: 778c2ecf20Sopenharmony_ci bbci.l a4, 3, .L2 788c2ecf20Sopenharmony_ci # set 8 bytes 798c2ecf20Sopenharmony_ciEX(10f) s32i a3, a5, 0 808c2ecf20Sopenharmony_ciEX(10f) s32i a3, a5, 4 818c2ecf20Sopenharmony_ci addi a5, a5, 8 828c2ecf20Sopenharmony_ci.L2: 838c2ecf20Sopenharmony_ci bbci.l a4, 2, .L3 848c2ecf20Sopenharmony_ci # set 4 bytes 858c2ecf20Sopenharmony_ciEX(10f) s32i a3, a5, 0 868c2ecf20Sopenharmony_ci addi a5, a5, 4 878c2ecf20Sopenharmony_ci.L3: 888c2ecf20Sopenharmony_ci bbci.l a4, 1, .L4 898c2ecf20Sopenharmony_ci # set 2 bytes 908c2ecf20Sopenharmony_ciEX(10f) s16i a3, a5, 0 918c2ecf20Sopenharmony_ci addi a5, a5, 2 928c2ecf20Sopenharmony_ci.L4: 938c2ecf20Sopenharmony_ci bbci.l a4, 0, .L5 948c2ecf20Sopenharmony_ci # set 1 byte 958c2ecf20Sopenharmony_ciEX(10f) s8i a3, a5, 0 968c2ecf20Sopenharmony_ci.L5: 978c2ecf20Sopenharmony_ci.Lret1: 988c2ecf20Sopenharmony_ci abi_ret_default 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci/* 1018c2ecf20Sopenharmony_ci * Destination is unaligned 1028c2ecf20Sopenharmony_ci */ 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci.Ldstunaligned: 1058c2ecf20Sopenharmony_ci bltui a4, 8, .Lbyteset # do short copies byte by byte 1068c2ecf20Sopenharmony_ci bbci.l a5, 0, .L20 # branch if dst alignment half-aligned 1078c2ecf20Sopenharmony_ci # dst is only byte aligned 1088c2ecf20Sopenharmony_ci # set 1 byte 1098c2ecf20Sopenharmony_ciEX(10f) s8i a3, a5, 0 1108c2ecf20Sopenharmony_ci addi a5, a5, 1 1118c2ecf20Sopenharmony_ci addi a4, a4, -1 1128c2ecf20Sopenharmony_ci # now retest if dst aligned 1138c2ecf20Sopenharmony_ci bbci.l a5, 1, .L0 # if now aligned, return to main algorithm 1148c2ecf20Sopenharmony_ci.L20: 1158c2ecf20Sopenharmony_ci # dst half-aligned 1168c2ecf20Sopenharmony_ci # set 2 bytes 1178c2ecf20Sopenharmony_ciEX(10f) s16i a3, a5, 0 1188c2ecf20Sopenharmony_ci addi a5, a5, 2 1198c2ecf20Sopenharmony_ci addi a4, a4, -2 1208c2ecf20Sopenharmony_ci j .L0 # dst is now aligned, return to main algorithm 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci/* 1238c2ecf20Sopenharmony_ci * Byte by byte set 1248c2ecf20Sopenharmony_ci */ 1258c2ecf20Sopenharmony_ci .align 4 1268c2ecf20Sopenharmony_ci .byte 0 # 1 mod 4 alignment for LOOPNEZ 1278c2ecf20Sopenharmony_ci # (0 mod 4 alignment for LBEG) 1288c2ecf20Sopenharmony_ci.Lbyteset: 1298c2ecf20Sopenharmony_ci#if XCHAL_HAVE_LOOPS 1308c2ecf20Sopenharmony_ci loopnez a4, .Lbytesetdone 1318c2ecf20Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */ 1328c2ecf20Sopenharmony_ci beqz a4, .Lbytesetdone 1338c2ecf20Sopenharmony_ci add a6, a5, a4 # a6 = ending address 1348c2ecf20Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 1358c2ecf20Sopenharmony_ci.Lbyteloop: 1368c2ecf20Sopenharmony_ciEX(10f) s8i a3, a5, 0 1378c2ecf20Sopenharmony_ci addi a5, a5, 1 1388c2ecf20Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 1398c2ecf20Sopenharmony_ci blt a5, a6, .Lbyteloop 1408c2ecf20Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 1418c2ecf20Sopenharmony_ci.Lbytesetdone: 1428c2ecf20Sopenharmony_ci abi_ret_default 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ciENDPROC(__memset) 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci .section .fixup, "ax" 1478c2ecf20Sopenharmony_ci .align 4 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci/* We return zero if a failure occurred. */ 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci10: 1528c2ecf20Sopenharmony_ci movi a2, 0 1538c2ecf20Sopenharmony_ci abi_ret_default 154