18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * OpenRISC memset.S
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Hand-optimized assembler version of memset for OpenRISC.
68c2ecf20Sopenharmony_ci * Algorithm inspired by several other arch-specific memset routines
78c2ecf20Sopenharmony_ci * in the kernel tree
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci	.global memset
138c2ecf20Sopenharmony_ci	.type	memset, @function
148c2ecf20Sopenharmony_cimemset:
158c2ecf20Sopenharmony_ci	/* arguments:
168c2ecf20Sopenharmony_ci	 * r3 = *s
178c2ecf20Sopenharmony_ci	 * r4 = c
188c2ecf20Sopenharmony_ci	 * r5 = n
198c2ecf20Sopenharmony_ci	 * r13, r15, r17, r19 used as temp regs
208c2ecf20Sopenharmony_ci	*/
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci	/* Exit if n == 0 */
238c2ecf20Sopenharmony_ci	l.sfeqi		r5, 0
248c2ecf20Sopenharmony_ci	l.bf		4f
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci	/* Truncate c to char */
278c2ecf20Sopenharmony_ci	l.andi  	r13, r4, 0xff
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci	/* Skip word extension if c is 0 */
308c2ecf20Sopenharmony_ci	l.sfeqi		r13, 0
318c2ecf20Sopenharmony_ci	l.bf		1f
328c2ecf20Sopenharmony_ci	/* Check for at least two whole words (8 bytes) */
338c2ecf20Sopenharmony_ci	 l.sfleui	r5, 7
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci	/* Extend char c to 32-bit word cccc in r13 */
368c2ecf20Sopenharmony_ci	l.slli		r15, r13, 16  // r13 = 000c, r15 = 0c00
378c2ecf20Sopenharmony_ci	l.or		r13, r13, r15 // r13 = 0c0c, r15 = 0c00
388c2ecf20Sopenharmony_ci	l.slli		r15, r13, 8   // r13 = 0c0c, r15 = c0c0
398c2ecf20Sopenharmony_ci	l.or		r13, r13, r15 // r13 = cccc, r15 = c0c0
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci1:	l.addi		r19, r3, 0 // Set r19 = src
428c2ecf20Sopenharmony_ci	/* Jump to byte copy loop if less than two words */
438c2ecf20Sopenharmony_ci	l.bf		3f
448c2ecf20Sopenharmony_ci	 l.or		r17, r5, r0 // Set r17 = n
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci	/* Mask out two LSBs to check alignment */
478c2ecf20Sopenharmony_ci	l.andi		r15, r3, 0x3
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci	/* lsb == 00, jump to word copy loop */
508c2ecf20Sopenharmony_ci	l.sfeqi		r15, 0
518c2ecf20Sopenharmony_ci	l.bf		2f
528c2ecf20Sopenharmony_ci	 l.addi		r19, r3, 0 // Set r19 = src
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci	/* lsb == 01,10 or 11 */
558c2ecf20Sopenharmony_ci	l.sb		0(r3), r13   // *src = c
568c2ecf20Sopenharmony_ci	l.addi		r17, r17, -1 // Decrease n
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	l.sfeqi		r15, 3
598c2ecf20Sopenharmony_ci	l.bf		2f
608c2ecf20Sopenharmony_ci	 l.addi		r19, r3, 1  // src += 1
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	/* lsb == 01 or 10 */
638c2ecf20Sopenharmony_ci	l.sb		1(r3), r13   // *(src+1) = c
648c2ecf20Sopenharmony_ci	l.addi		r17, r17, -1 // Decrease n
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	l.sfeqi		r15, 2
678c2ecf20Sopenharmony_ci	l.bf		2f
688c2ecf20Sopenharmony_ci	 l.addi		r19, r3, 2  // src += 2
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	/* lsb == 01 */
718c2ecf20Sopenharmony_ci	l.sb		2(r3), r13   // *(src+2) = c
728c2ecf20Sopenharmony_ci	l.addi		r17, r17, -1 // Decrease n
738c2ecf20Sopenharmony_ci	l.addi		r19, r3, 3   // src += 3
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci	/* Word copy loop */
768c2ecf20Sopenharmony_ci2:	l.sw		0(r19), r13  // *src = cccc
778c2ecf20Sopenharmony_ci	l.addi		r17, r17, -4 // Decrease n
788c2ecf20Sopenharmony_ci	l.sfgeui	r17, 4
798c2ecf20Sopenharmony_ci	l.bf		2b
808c2ecf20Sopenharmony_ci	 l.addi		r19, r19, 4  // Increase src
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	/* When n > 0, copy the remaining bytes, otherwise jump to exit */
838c2ecf20Sopenharmony_ci	l.sfeqi		r17, 0
848c2ecf20Sopenharmony_ci	l.bf		4f
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	/* Byte copy loop */
878c2ecf20Sopenharmony_ci3:	l.addi		r17, r17, -1 // Decrease n
888c2ecf20Sopenharmony_ci	l.sb		0(r19), r13  // *src = cccc
898c2ecf20Sopenharmony_ci	l.sfnei		r17, 0
908c2ecf20Sopenharmony_ci	l.bf		3b
918c2ecf20Sopenharmony_ci	 l.addi		r19, r19, 1  // Increase src
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci4:	l.jr		r9
948c2ecf20Sopenharmony_ci	 l.ori		r11, r3, 0
95