162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/arch/arm/lib/memset.S 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 1995-2000 Russell King 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * ASM optimised string functions 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci#include <linux/linkage.h> 1062306a36Sopenharmony_ci#include <asm/assembler.h> 1162306a36Sopenharmony_ci#include <asm/unwind.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci .text 1462306a36Sopenharmony_ci .align 5 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ciENTRY(__memset) 1762306a36Sopenharmony_ciENTRY(mmioset) 1862306a36Sopenharmony_ciWEAK(memset) 1962306a36Sopenharmony_ciUNWIND( .fnstart ) 2062306a36Sopenharmony_ci and r1, r1, #255 @ cast to unsigned char 2162306a36Sopenharmony_ci ands r3, r0, #3 @ 1 unaligned? 2262306a36Sopenharmony_ci mov ip, r0 @ preserve r0 as return value 2362306a36Sopenharmony_ci bne 6f @ 1 2462306a36Sopenharmony_ci/* 2562306a36Sopenharmony_ci * we know that the pointer in ip is aligned to a word boundary. 2662306a36Sopenharmony_ci */ 2762306a36Sopenharmony_ci1: orr r1, r1, r1, lsl #8 2862306a36Sopenharmony_ci orr r1, r1, r1, lsl #16 2962306a36Sopenharmony_ci mov r3, r1 3062306a36Sopenharmony_ci7: cmp r2, #16 3162306a36Sopenharmony_ci blt 4f 3262306a36Sopenharmony_ciUNWIND( .fnend ) 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#if ! CALGN(1)+0 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci/* 3762306a36Sopenharmony_ci * We need 2 extra registers for this loop - use r8 and the LR 3862306a36Sopenharmony_ci */ 3962306a36Sopenharmony_ciUNWIND( .fnstart ) 4062306a36Sopenharmony_ciUNWIND( .save {r8, lr} ) 4162306a36Sopenharmony_ci stmfd sp!, {r8, lr} 4262306a36Sopenharmony_ci mov r8, r1 4362306a36Sopenharmony_ci mov lr, r3 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci2: subs r2, r2, #64 4662306a36Sopenharmony_ci stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. 4762306a36Sopenharmony_ci stmiage ip!, {r1, r3, r8, lr} 4862306a36Sopenharmony_ci stmiage ip!, {r1, r3, r8, lr} 4962306a36Sopenharmony_ci stmiage ip!, {r1, r3, r8, lr} 5062306a36Sopenharmony_ci bgt 2b 5162306a36Sopenharmony_ci ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. 5262306a36Sopenharmony_ci/* 5362306a36Sopenharmony_ci * No need to correct the count; we're only testing bits from now on 5462306a36Sopenharmony_ci */ 5562306a36Sopenharmony_ci tst r2, #32 5662306a36Sopenharmony_ci stmiane ip!, {r1, r3, r8, lr} 5762306a36Sopenharmony_ci stmiane ip!, {r1, r3, r8, lr} 5862306a36Sopenharmony_ci tst r2, #16 5962306a36Sopenharmony_ci stmiane ip!, {r1, r3, r8, lr} 6062306a36Sopenharmony_ci ldmfd sp!, {r8, lr} 6162306a36Sopenharmony_ciUNWIND( .fnend ) 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci#else 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci/* 6662306a36Sopenharmony_ci * This version aligns the destination pointer in order to write 6762306a36Sopenharmony_ci * whole cache lines at once. 6862306a36Sopenharmony_ci */ 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ciUNWIND( .fnstart ) 7162306a36Sopenharmony_ciUNWIND( .save {r4-r8, lr} ) 7262306a36Sopenharmony_ci stmfd sp!, {r4-r8, lr} 7362306a36Sopenharmony_ci mov r4, r1 7462306a36Sopenharmony_ci mov r5, r3 7562306a36Sopenharmony_ci mov r6, r1 7662306a36Sopenharmony_ci mov r7, r3 7762306a36Sopenharmony_ci mov r8, r1 7862306a36Sopenharmony_ci mov lr, r3 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci cmp r2, #96 8162306a36Sopenharmony_ci tstgt ip, #31 8262306a36Sopenharmony_ci ble 3f 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci and r8, ip, #31 8562306a36Sopenharmony_ci rsb r8, r8, #32 8662306a36Sopenharmony_ci sub r2, r2, r8 8762306a36Sopenharmony_ci movs r8, r8, lsl #(32 - 4) 8862306a36Sopenharmony_ci stmiacs ip!, {r4, r5, r6, r7} 8962306a36Sopenharmony_ci stmiami ip!, {r4, r5} 9062306a36Sopenharmony_ci tst r8, #(1 << 30) 9162306a36Sopenharmony_ci mov r8, r1 9262306a36Sopenharmony_ci strne r1, [ip], #4 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci3: subs r2, r2, #64 9562306a36Sopenharmony_ci stmiage ip!, {r1, r3-r8, lr} 9662306a36Sopenharmony_ci stmiage ip!, {r1, r3-r8, lr} 9762306a36Sopenharmony_ci bgt 3b 9862306a36Sopenharmony_ci ldmfdeq sp!, {r4-r8, pc} 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci tst r2, #32 10162306a36Sopenharmony_ci stmiane ip!, {r1, r3-r8, lr} 10262306a36Sopenharmony_ci tst r2, #16 10362306a36Sopenharmony_ci stmiane ip!, {r4-r7} 10462306a36Sopenharmony_ci ldmfd sp!, {r4-r8, lr} 10562306a36Sopenharmony_ciUNWIND( .fnend ) 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci#endif 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ciUNWIND( .fnstart ) 11062306a36Sopenharmony_ci4: tst r2, #8 11162306a36Sopenharmony_ci stmiane ip!, {r1, r3} 11262306a36Sopenharmony_ci tst r2, #4 11362306a36Sopenharmony_ci strne r1, [ip], #4 11462306a36Sopenharmony_ci/* 11562306a36Sopenharmony_ci * When we get here, we've got less than 4 bytes to set. We 11662306a36Sopenharmony_ci * may have an unaligned pointer as well. 11762306a36Sopenharmony_ci */ 11862306a36Sopenharmony_ci5: tst r2, #2 11962306a36Sopenharmony_ci strbne r1, [ip], #1 12062306a36Sopenharmony_ci strbne r1, [ip], #1 12162306a36Sopenharmony_ci tst r2, #1 12262306a36Sopenharmony_ci strbne r1, [ip], #1 12362306a36Sopenharmony_ci ret lr 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci6: subs r2, r2, #4 @ 1 do we have enough 12662306a36Sopenharmony_ci blt 5b @ 1 bytes to align with? 12762306a36Sopenharmony_ci cmp r3, #2 @ 1 12862306a36Sopenharmony_ci strblt r1, [ip], #1 @ 1 12962306a36Sopenharmony_ci strble r1, [ip], #1 @ 1 13062306a36Sopenharmony_ci strb r1, [ip], #1 @ 1 13162306a36Sopenharmony_ci add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) 13262306a36Sopenharmony_ci b 1b 13362306a36Sopenharmony_ciUNWIND( .fnend ) 13462306a36Sopenharmony_ciENDPROC(memset) 13562306a36Sopenharmony_ciENDPROC(mmioset) 13662306a36Sopenharmony_ciENDPROC(__memset) 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ciENTRY(__memset32) 13962306a36Sopenharmony_ciUNWIND( .fnstart ) 14062306a36Sopenharmony_ci mov r3, r1 @ copy r1 to r3 and fall into memset64 14162306a36Sopenharmony_ciUNWIND( .fnend ) 14262306a36Sopenharmony_ciENDPROC(__memset32) 14362306a36Sopenharmony_ciENTRY(__memset64) 14462306a36Sopenharmony_ciUNWIND( .fnstart ) 14562306a36Sopenharmony_ci mov ip, r0 @ preserve r0 as return value 14662306a36Sopenharmony_ci b 7b @ jump into the middle of memset 14762306a36Sopenharmony_ciUNWIND( .fnend ) 14862306a36Sopenharmony_ciENDPROC(__memset64) 149