xref: /kernel/linux/linux-6.6/arch/alpha/lib/memset.S (revision 62306a36)
162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * linux/arch/alpha/lib/memset.S
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * This is an efficient (and small) implementation of the C library "memset()"
662306a36Sopenharmony_ci * function for the alpha.
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci *	(C) Copyright 1996 Linus Torvalds
962306a36Sopenharmony_ci *
1062306a36Sopenharmony_ci * This routine is "moral-ware": you are free to use it any way you wish, and
1162306a36Sopenharmony_ci * the only obligation I put on you is a moral one: if you make any improvements
1262306a36Sopenharmony_ci * to the routine, please send me your improvements for me to use similarly.
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * The scheduling comments are according to the EV5 documentation (and done by
1562306a36Sopenharmony_ci * hand, so they might well be incorrect, please do tell me about it..)
1662306a36Sopenharmony_ci */
1762306a36Sopenharmony_ci#include <linux/export.h>
1862306a36Sopenharmony_ci	.set noat
1962306a36Sopenharmony_ci	.set noreorder
2062306a36Sopenharmony_ci.text
2162306a36Sopenharmony_ci	.globl memset
2262306a36Sopenharmony_ci	.globl __memset
2362306a36Sopenharmony_ci	.globl ___memset
2462306a36Sopenharmony_ci	.globl __memset16
2562306a36Sopenharmony_ci	.globl __constant_c_memset
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci	.ent ___memset
2862306a36Sopenharmony_ci.align 5
2962306a36Sopenharmony_ci___memset:
3062306a36Sopenharmony_ci	.frame $30,0,$26,0
3162306a36Sopenharmony_ci	.prologue 0
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci	and $17,255,$1		/* E1 */
3462306a36Sopenharmony_ci	insbl $17,1,$17		/* .. E0 */
3562306a36Sopenharmony_ci	bis $17,$1,$17		/* E0 (p-c latency, next cycle) */
3662306a36Sopenharmony_ci	sll $17,16,$1		/* E1 (p-c latency, next cycle) */
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci	bis $17,$1,$17		/* E0 (p-c latency, next cycle) */
3962306a36Sopenharmony_ci	sll $17,32,$1		/* E1 (p-c latency, next cycle) */
4062306a36Sopenharmony_ci	bis $17,$1,$17		/* E0 (p-c latency, next cycle) */
4162306a36Sopenharmony_ci	ldq_u $31,0($30)	/* .. E1 */
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci.align 5
4462306a36Sopenharmony_ci__constant_c_memset:
4562306a36Sopenharmony_ci	addq $18,$16,$6		/* E0 */
4662306a36Sopenharmony_ci	bis $16,$16,$0		/* .. E1 */
4762306a36Sopenharmony_ci	xor $16,$6,$1		/* E0 */
4862306a36Sopenharmony_ci	ble $18,end		/* .. E1 */
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci	bic $1,7,$1		/* E0 */
5162306a36Sopenharmony_ci	beq $1,within_one_quad	/* .. E1 (note EV5 zero-latency forwarding) */
5262306a36Sopenharmony_ci	and $16,7,$3		/* E0 */
5362306a36Sopenharmony_ci	beq $3,aligned		/* .. E1 (note EV5 zero-latency forwarding) */
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci	ldq_u $4,0($16)		/* E0 */
5662306a36Sopenharmony_ci	bis $16,$16,$5		/* .. E1 */
5762306a36Sopenharmony_ci	insql $17,$16,$2	/* E0 */
5862306a36Sopenharmony_ci	subq $3,8,$3		/* .. E1 */
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	addq $18,$3,$18		/* E0	$18 is new count ($3 is negative) */
6162306a36Sopenharmony_ci	mskql $4,$16,$4		/* .. E1 (and possible load stall) */
6262306a36Sopenharmony_ci	subq $16,$3,$16		/* E0 	$16 is new aligned destination */
6362306a36Sopenharmony_ci	bis $2,$4,$1		/* .. E1 */
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	bis $31,$31,$31		/* E0 */
6662306a36Sopenharmony_ci	ldq_u $31,0($30)	/* .. E1 */
6762306a36Sopenharmony_ci	stq_u $1,0($5)		/* E0 */
6862306a36Sopenharmony_ci	bis $31,$31,$31		/* .. E1 */
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci.align 4
7162306a36Sopenharmony_cialigned:
7262306a36Sopenharmony_ci	sra $18,3,$3		/* E0 */
7362306a36Sopenharmony_ci	and $18,7,$18		/* .. E1 */
7462306a36Sopenharmony_ci	bis $16,$16,$5		/* E0 */
7562306a36Sopenharmony_ci	beq $3,no_quad		/* .. E1 */
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci.align 3
7862306a36Sopenharmony_ciloop:
7962306a36Sopenharmony_ci	stq $17,0($5)		/* E0 */
8062306a36Sopenharmony_ci	subq $3,1,$3		/* .. E1 */
8162306a36Sopenharmony_ci	addq $5,8,$5		/* E0 */
8262306a36Sopenharmony_ci	bne $3,loop		/* .. E1 */
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_cino_quad:
8562306a36Sopenharmony_ci	bis $31,$31,$31		/* E0 */
8662306a36Sopenharmony_ci	beq $18,end		/* .. E1 */
8762306a36Sopenharmony_ci	ldq $7,0($5)		/* E0 */
8862306a36Sopenharmony_ci	mskqh $7,$6,$2		/* .. E1 (and load stall) */
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	insqh $17,$6,$4		/* E0 */
9162306a36Sopenharmony_ci	bis $2,$4,$1		/* .. E1 */
9262306a36Sopenharmony_ci	stq $1,0($5)		/* E0 */
9362306a36Sopenharmony_ci	ret $31,($26),1		/* .. E1 */
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci.align 3
9662306a36Sopenharmony_ciwithin_one_quad:
9762306a36Sopenharmony_ci	ldq_u $1,0($16)		/* E0 */
9862306a36Sopenharmony_ci	insql $17,$16,$2	/* E1 */
9962306a36Sopenharmony_ci	mskql $1,$16,$4		/* E0 (after load stall) */
10062306a36Sopenharmony_ci	bis $2,$4,$2		/* E0 */
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	mskql $2,$6,$4		/* E0 */
10362306a36Sopenharmony_ci	mskqh $1,$6,$2		/* .. E1 */
10462306a36Sopenharmony_ci	bis $2,$4,$1		/* E0 */
10562306a36Sopenharmony_ci	stq_u $1,0($16)		/* E0 */
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ciend:
10862306a36Sopenharmony_ci	ret $31,($26),1		/* E1 */
10962306a36Sopenharmony_ci	.end ___memset
11062306a36Sopenharmony_ciEXPORT_SYMBOL(___memset)
11162306a36Sopenharmony_ciEXPORT_SYMBOL(__constant_c_memset)
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	.align 5
11462306a36Sopenharmony_ci	.ent __memset16
11562306a36Sopenharmony_ci__memset16:
11662306a36Sopenharmony_ci	.prologue 0
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	inswl $17,0,$1		/* E0 */
11962306a36Sopenharmony_ci	inswl $17,2,$2		/* E0 */
12062306a36Sopenharmony_ci	inswl $17,4,$3		/* E0 */
12162306a36Sopenharmony_ci	or $1,$2,$1		/* .. E1 */
12262306a36Sopenharmony_ci	inswl $17,6,$4		/* E0 */
12362306a36Sopenharmony_ci	or $1,$3,$1		/* .. E1 */
12462306a36Sopenharmony_ci	or $1,$4,$17		/* E0 */
12562306a36Sopenharmony_ci	br __constant_c_memset	/* .. E1 */
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	.end __memset16
12862306a36Sopenharmony_ciEXPORT_SYMBOL(__memset16)
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_cimemset = ___memset
13162306a36Sopenharmony_ci__memset = ___memset
13262306a36Sopenharmony_ci	EXPORT_SYMBOL(memset)
13362306a36Sopenharmony_ci	EXPORT_SYMBOL(__memset)
134