1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2021 Loongson Technology Corporation Limited
4 */
5
6#include <asm/alternative-asm.h>
7#include <asm/asm.h>
8#include <asm/asmmacro.h>
9#include <asm/cpu.h>
10#include <asm/export.h>
11#include <asm/regdef.h>
12
13.macro fill_to_64 r0
14	bstrins.d \r0, \r0, 15, 8
15	bstrins.d \r0, \r0, 31, 16
16	bstrins.d \r0, \r0, 63, 32
17.endm
18
19SYM_FUNC_START_WEAK(memset)
20SYM_FUNC_START_ALIAS(__memset)
21	/*
22	 * Some CPUs support hardware unaligned access
23	 */
24	ALTERNATIVE	"b __memset_generic", \
25			"b __memset_fast", CPU_FEATURE_UAL
26SYM_FUNC_END(memset)
27SYM_FUNC_END_ALIAS(__memset)
28
29EXPORT_SYMBOL(memset)
30EXPORT_SYMBOL(__memset)
31
32/*
33 * void *__memset_generic(void *s, int c, size_t n)
34 *
35 * a0: s
36 * a1: c
37 * a2: n
38 */
39SYM_FUNC_START(__memset_generic)
40	move	a3, a0
41	beqz	a2, 2f
42
431:	st.b	a1, a0, 0
44	addi.d	a0, a0, 1
45	addi.d	a2, a2, -1
46	bgt	a2, zero, 1b
47
482:	move	a0, a3
49	jr	ra
50SYM_FUNC_END(__memset_generic)
51
52/*
53 * void *__memset_fast(void *s, int c, size_t n)
54 *
55 * a0: s
56 * a1: c
57 * a2: n
58 */
59SYM_FUNC_START(__memset_fast)
60	/* fill a1 to 64 bits */
61	fill_to_64 a1
62
63	sltui	t0, a2, 9
64	bnez	t0, .Lsmall
65
66	add.d	a2, a0, a2
67	st.d	a1, a0, 0
68
69	/* align up address */
70	addi.d	a3, a0, 8
71	bstrins.d	a3, zero, 2, 0
72
73	addi.d	a4, a2, -64
74	bgeu	a3, a4, .Llt64
75
76	/* set 64 bytes at a time */
77.Lloop64:
78	st.d	a1, a3, 0
79	st.d	a1, a3, 8
80	st.d	a1, a3, 16
81	st.d	a1, a3, 24
82	st.d	a1, a3, 32
83	st.d	a1, a3, 40
84	st.d	a1, a3, 48
85	st.d	a1, a3, 56
86	addi.d	a3, a3, 64
87	bltu	a3, a4, .Lloop64
88
89	/* set the remaining bytes */
90.Llt64:
91	addi.d	a4, a2, -32
92	bgeu	a3, a4, .Llt32
93	st.d	a1, a3, 0
94	st.d	a1, a3, 8
95	st.d	a1, a3, 16
96	st.d	a1, a3, 24
97	addi.d	a3, a3, 32
98
99.Llt32:
100	addi.d	a4, a2, -16
101	bgeu	a3, a4, .Llt16
102	st.d	a1, a3, 0
103	st.d	a1, a3, 8
104	addi.d	a3, a3, 16
105
106.Llt16:
107	addi.d	a4, a2, -8
108	bgeu	a3, a4, .Llt8
109	st.d	a1, a3, 0
110
111.Llt8:
112	st.d	a1, a2, -8
113
114	/* return */
115	jr	ra
116
117	.align	4
118.Lsmall:
119	pcaddi	t0, 4
120	slli.d	a2, a2, 4
121	add.d	t0, t0, a2
122	jr	t0
123
124	.align	4
1250:	jr	ra
126
127	.align	4
1281:	st.b	a1, a0, 0
129	jr	ra
130
131	.align	4
1322:	st.h	a1, a0, 0
133	jr	ra
134
135	.align	4
1363:	st.h	a1, a0, 0
137	st.b	a1, a0, 2
138	jr	ra
139
140	.align	4
1414:	st.w	a1, a0, 0
142	jr	ra
143
144	.align	4
1455:	st.w	a1, a0, 0
146	st.b	a1, a0, 4
147	jr	ra
148
149	.align	4
1506:	st.w	a1, a0, 0
151	st.h	a1, a0, 4
152	jr	ra
153
154	.align	4
1557:	st.w	a1, a0, 0
156	st.w	a1, a0, 3
157	jr	ra
158
159	.align	4
1608:	st.d	a1, a0, 0
161	jr	ra
162SYM_FUNC_END(__memset_fast)
163