162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * arch/xtensa/lib/memset.S 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * ANSI C standard library function memset 562306a36Sopenharmony_ci * (Well, almost. .fixup code might return zero.) 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * This file is subject to the terms and conditions of the GNU General 862306a36Sopenharmony_ci * Public License. See the file "COPYING" in the main directory of 962306a36Sopenharmony_ci * this archive for more details. 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * Copyright (C) 2002 Tensilica Inc. 1262306a36Sopenharmony_ci */ 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <linux/linkage.h> 1562306a36Sopenharmony_ci#include <asm/asmmacro.h> 1662306a36Sopenharmony_ci#include <asm/core.h> 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci/* 1962306a36Sopenharmony_ci * void *memset(void *dst, int c, size_t length) 2062306a36Sopenharmony_ci * 2162306a36Sopenharmony_ci * The algorithm is as follows: 2262306a36Sopenharmony_ci * Create a word with c in all byte positions 2362306a36Sopenharmony_ci * If the destination is aligned, 2462306a36Sopenharmony_ci * do 16B chucks with a loop, and then finish up with 2562306a36Sopenharmony_ci * 8B, 4B, 2B, and 1B stores conditional on the length. 2662306a36Sopenharmony_ci * If destination is unaligned, align it by conditionally 2762306a36Sopenharmony_ci * setting 1B and 2B and then go to aligned case. 2862306a36Sopenharmony_ci * This code tries to use fall-through branches for the common 2962306a36Sopenharmony_ci * case of an aligned destination (except for the branches to 3062306a36Sopenharmony_ci * the alignment labels). 3162306a36Sopenharmony_ci */ 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci.text 3462306a36Sopenharmony_ciENTRY(__memset) 3562306a36Sopenharmony_ciWEAK(memset) 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci abi_entry_default 3862306a36Sopenharmony_ci # a2/ dst, a3/ c, a4/ length 3962306a36Sopenharmony_ci extui a3, a3, 0, 8 # mask to just 8 bits 4062306a36Sopenharmony_ci slli a7, a3, 8 # duplicate character in all bytes of word 4162306a36Sopenharmony_ci or a3, a3, a7 # ... 4262306a36Sopenharmony_ci slli a7, a3, 16 # ... 4362306a36Sopenharmony_ci or a3, a3, a7 # ... 4462306a36Sopenharmony_ci mov a5, a2 # copy dst so that a2 is return value 4562306a36Sopenharmony_ci movi a6, 3 # for alignment tests 4662306a36Sopenharmony_ci bany a2, a6, .Ldstunaligned # if dst is unaligned 4762306a36Sopenharmony_ci.L0: # return here from .Ldstunaligned when dst is aligned 4862306a36Sopenharmony_ci srli a7, a4, 4 # number of loop iterations with 16B 4962306a36Sopenharmony_ci # per iteration 5062306a36Sopenharmony_ci bnez a4, .Laligned 5162306a36Sopenharmony_ci abi_ret_default 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci/* 5462306a36Sopenharmony_ci * Destination is word-aligned. 5562306a36Sopenharmony_ci */ 5662306a36Sopenharmony_ci # set 16 bytes per iteration for word-aligned dst 5762306a36Sopenharmony_ci .align 4 # 1 mod 4 alignment for LOOPNEZ 5862306a36Sopenharmony_ci .byte 0 # (0 mod 4 alignment for LBEG) 5962306a36Sopenharmony_ci.Laligned: 6062306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 6162306a36Sopenharmony_ci loopnez a7, .Loop1done 6262306a36Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */ 6362306a36Sopenharmony_ci beqz a7, .Loop1done 6462306a36Sopenharmony_ci slli a6, a7, 4 6562306a36Sopenharmony_ci add a6, a6, a5 # a6 = end of last 16B chunk 6662306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 6762306a36Sopenharmony_ci.Loop1: 6862306a36Sopenharmony_ciEX(10f) s32i a3, a5, 0 6962306a36Sopenharmony_ciEX(10f) s32i a3, a5, 4 7062306a36Sopenharmony_ciEX(10f) s32i a3, a5, 8 7162306a36Sopenharmony_ciEX(10f) s32i a3, a5, 12 7262306a36Sopenharmony_ci addi a5, a5, 16 7362306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 7462306a36Sopenharmony_ci blt a5, a6, .Loop1 7562306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 7662306a36Sopenharmony_ci.Loop1done: 7762306a36Sopenharmony_ci bbci.l a4, 3, .L2 7862306a36Sopenharmony_ci # set 8 bytes 7962306a36Sopenharmony_ciEX(10f) s32i a3, a5, 0 8062306a36Sopenharmony_ciEX(10f) s32i a3, a5, 4 8162306a36Sopenharmony_ci addi a5, a5, 8 8262306a36Sopenharmony_ci.L2: 8362306a36Sopenharmony_ci bbci.l a4, 2, .L3 8462306a36Sopenharmony_ci # set 4 bytes 8562306a36Sopenharmony_ciEX(10f) s32i a3, a5, 0 8662306a36Sopenharmony_ci addi a5, a5, 4 8762306a36Sopenharmony_ci.L3: 8862306a36Sopenharmony_ci bbci.l a4, 1, .L4 8962306a36Sopenharmony_ci # set 2 bytes 9062306a36Sopenharmony_ciEX(10f) s16i a3, a5, 0 9162306a36Sopenharmony_ci addi a5, a5, 2 9262306a36Sopenharmony_ci.L4: 9362306a36Sopenharmony_ci bbci.l a4, 0, .L5 9462306a36Sopenharmony_ci # set 1 byte 9562306a36Sopenharmony_ciEX(10f) s8i a3, a5, 0 9662306a36Sopenharmony_ci.L5: 9762306a36Sopenharmony_ci.Lret1: 9862306a36Sopenharmony_ci abi_ret_default 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci/* 10162306a36Sopenharmony_ci * Destination is unaligned 10262306a36Sopenharmony_ci */ 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci.Ldstunaligned: 10562306a36Sopenharmony_ci bltui a4, 8, .Lbyteset # do short copies byte by byte 10662306a36Sopenharmony_ci bbci.l a5, 0, .L20 # branch if dst alignment half-aligned 10762306a36Sopenharmony_ci # dst is only byte aligned 10862306a36Sopenharmony_ci # set 1 byte 10962306a36Sopenharmony_ciEX(10f) s8i a3, a5, 0 11062306a36Sopenharmony_ci addi a5, a5, 1 11162306a36Sopenharmony_ci addi a4, a4, -1 11262306a36Sopenharmony_ci # now retest if dst aligned 11362306a36Sopenharmony_ci bbci.l a5, 1, .L0 # if now aligned, return to main algorithm 11462306a36Sopenharmony_ci.L20: 11562306a36Sopenharmony_ci # dst half-aligned 11662306a36Sopenharmony_ci # set 2 bytes 11762306a36Sopenharmony_ciEX(10f) s16i a3, a5, 0 11862306a36Sopenharmony_ci addi a5, a5, 2 11962306a36Sopenharmony_ci addi a4, a4, -2 12062306a36Sopenharmony_ci j .L0 # dst is now aligned, return to main algorithm 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci/* 12362306a36Sopenharmony_ci * Byte by byte set 12462306a36Sopenharmony_ci */ 12562306a36Sopenharmony_ci .align 4 12662306a36Sopenharmony_ci .byte 0 # 1 mod 4 alignment for LOOPNEZ 12762306a36Sopenharmony_ci # (0 mod 4 alignment for LBEG) 12862306a36Sopenharmony_ci.Lbyteset: 12962306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 13062306a36Sopenharmony_ci loopnez a4, .Lbytesetdone 13162306a36Sopenharmony_ci#else /* !XCHAL_HAVE_LOOPS */ 13262306a36Sopenharmony_ci beqz a4, .Lbytesetdone 13362306a36Sopenharmony_ci add a6, a5, a4 # a6 = ending address 13462306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 13562306a36Sopenharmony_ci.Lbyteloop: 13662306a36Sopenharmony_ciEX(10f) s8i a3, a5, 0 13762306a36Sopenharmony_ci addi a5, a5, 1 13862306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 13962306a36Sopenharmony_ci blt a5, a6, .Lbyteloop 14062306a36Sopenharmony_ci#endif /* !XCHAL_HAVE_LOOPS */ 14162306a36Sopenharmony_ci.Lbytesetdone: 14262306a36Sopenharmony_ci abi_ret_default 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ciENDPROC(__memset) 14562306a36Sopenharmony_ciEXPORT_SYMBOL(__memset) 14662306a36Sopenharmony_ciEXPORT_SYMBOL(memset) 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci .section .fixup, "ax" 14962306a36Sopenharmony_ci .align 4 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci/* We return zero if a failure occurred. */ 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci10: 15462306a36Sopenharmony_ci movi a2, 0 15562306a36Sopenharmony_ci abi_ret_default 156