162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci/* Numerology:
762306a36Sopenharmony_ci * WXYZ
862306a36Sopenharmony_ci * W: width in bytes
962306a36Sopenharmony_ci * X: Load=0, Store=1
1062306a36Sopenharmony_ci * Y: Location 0=preamble,8=loop,9=epilog
1162306a36Sopenharmony_ci * Z: Location=0,handler=9
1262306a36Sopenharmony_ci */
1362306a36Sopenharmony_ci	.text
1462306a36Sopenharmony_ci	.global FUNCNAME
1562306a36Sopenharmony_ci	.type FUNCNAME, @function
1662306a36Sopenharmony_ci	.p2align 5
1762306a36Sopenharmony_ciFUNCNAME:
1862306a36Sopenharmony_ci	{
1962306a36Sopenharmony_ci		p0 = cmp.gtu(bytes,#0)
2062306a36Sopenharmony_ci		if (!p0.new) jump:nt .Ldone
2162306a36Sopenharmony_ci		r3 = or(dst,src)
2262306a36Sopenharmony_ci		r4 = xor(dst,src)
2362306a36Sopenharmony_ci	}
2462306a36Sopenharmony_ci	{
2562306a36Sopenharmony_ci		p1 = cmp.gtu(bytes,#15)
2662306a36Sopenharmony_ci		p0 = bitsclr(r3,#7)
2762306a36Sopenharmony_ci		if (!p0.new) jump:nt .Loop_not_aligned_8
2862306a36Sopenharmony_ci		src_dst_sav = combine(src,dst)
2962306a36Sopenharmony_ci	}
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci	{
3262306a36Sopenharmony_ci		loopcount = lsr(bytes,#3)
3362306a36Sopenharmony_ci		if (!p1) jump .Lsmall
3462306a36Sopenharmony_ci	}
3562306a36Sopenharmony_ci	p3=sp1loop0(.Loop8,loopcount)
3662306a36Sopenharmony_ci.Loop8:
3762306a36Sopenharmony_ci8080:
3862306a36Sopenharmony_ci8180:
3962306a36Sopenharmony_ci	{
4062306a36Sopenharmony_ci		if (p3) memd(dst++#8) = d_dbuf
4162306a36Sopenharmony_ci		d_dbuf = memd(src++#8)
4262306a36Sopenharmony_ci	}:endloop0
4362306a36Sopenharmony_ci8190:
4462306a36Sopenharmony_ci	{
4562306a36Sopenharmony_ci		memd(dst++#8) = d_dbuf
4662306a36Sopenharmony_ci		bytes -= asl(loopcount,#3)
4762306a36Sopenharmony_ci		jump .Lsmall
4862306a36Sopenharmony_ci	}
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci.Loop_not_aligned_8:
5162306a36Sopenharmony_ci	{
5262306a36Sopenharmony_ci		p0 = bitsclr(r4,#7)
5362306a36Sopenharmony_ci		if (p0.new) jump:nt .Lalign
5462306a36Sopenharmony_ci	}
5562306a36Sopenharmony_ci	{
5662306a36Sopenharmony_ci		p0 = bitsclr(r3,#3)
5762306a36Sopenharmony_ci		if (!p0.new) jump:nt .Loop_not_aligned_4
5862306a36Sopenharmony_ci		p1 = cmp.gtu(bytes,#7)
5962306a36Sopenharmony_ci	}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	{
6262306a36Sopenharmony_ci		if (!p1) jump .Lsmall
6362306a36Sopenharmony_ci		loopcount = lsr(bytes,#2)
6462306a36Sopenharmony_ci	}
6562306a36Sopenharmony_ci	p3=sp1loop0(.Loop4,loopcount)
6662306a36Sopenharmony_ci.Loop4:
6762306a36Sopenharmony_ci4080:
6862306a36Sopenharmony_ci4180:
6962306a36Sopenharmony_ci	{
7062306a36Sopenharmony_ci		if (p3) memw(dst++#4) = w_dbuf
7162306a36Sopenharmony_ci		w_dbuf = memw(src++#4)
7262306a36Sopenharmony_ci	}:endloop0
7362306a36Sopenharmony_ci4190:
7462306a36Sopenharmony_ci	{
7562306a36Sopenharmony_ci		memw(dst++#4) = w_dbuf
7662306a36Sopenharmony_ci		bytes -= asl(loopcount,#2)
7762306a36Sopenharmony_ci		jump .Lsmall
7862306a36Sopenharmony_ci	}
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci.Loop_not_aligned_4:
8162306a36Sopenharmony_ci	{
8262306a36Sopenharmony_ci		p0 = bitsclr(r3,#1)
8362306a36Sopenharmony_ci		if (!p0.new) jump:nt .Loop_not_aligned
8462306a36Sopenharmony_ci		p1 = cmp.gtu(bytes,#3)
8562306a36Sopenharmony_ci	}
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	{
8862306a36Sopenharmony_ci		if (!p1) jump .Lsmall
8962306a36Sopenharmony_ci		loopcount = lsr(bytes,#1)
9062306a36Sopenharmony_ci	}
9162306a36Sopenharmony_ci	p3=sp1loop0(.Loop2,loopcount)
9262306a36Sopenharmony_ci.Loop2:
9362306a36Sopenharmony_ci2080:
9462306a36Sopenharmony_ci2180:
9562306a36Sopenharmony_ci	{
9662306a36Sopenharmony_ci		if (p3) memh(dst++#2) = w_dbuf
9762306a36Sopenharmony_ci		w_dbuf = memuh(src++#2)
9862306a36Sopenharmony_ci	}:endloop0
9962306a36Sopenharmony_ci2190:
10062306a36Sopenharmony_ci	{
10162306a36Sopenharmony_ci		memh(dst++#2) = w_dbuf
10262306a36Sopenharmony_ci		bytes -= asl(loopcount,#1)
10362306a36Sopenharmony_ci		jump .Lsmall
10462306a36Sopenharmony_ci	}
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci.Loop_not_aligned: /* Works for as small as one byte */
10762306a36Sopenharmony_ci	p3=sp1loop0(.Loop1,bytes)
10862306a36Sopenharmony_ci.Loop1:
10962306a36Sopenharmony_ci1080:
11062306a36Sopenharmony_ci1180:
11162306a36Sopenharmony_ci	{
11262306a36Sopenharmony_ci		if (p3) memb(dst++#1) = w_dbuf
11362306a36Sopenharmony_ci		w_dbuf = memub(src++#1)
11462306a36Sopenharmony_ci	}:endloop0
11562306a36Sopenharmony_ci	/* Done */
11662306a36Sopenharmony_ci1190:
11762306a36Sopenharmony_ci	{
11862306a36Sopenharmony_ci		memb(dst) = w_dbuf
11962306a36Sopenharmony_ci		jumpr r31
12062306a36Sopenharmony_ci		r0 = #0
12162306a36Sopenharmony_ci	}
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci.Lsmall:
12462306a36Sopenharmony_ci	{
12562306a36Sopenharmony_ci		p0 = cmp.gtu(bytes,#0)
12662306a36Sopenharmony_ci		if (p0.new) jump:nt .Loop_not_aligned
12762306a36Sopenharmony_ci	}
12862306a36Sopenharmony_ci.Ldone:
12962306a36Sopenharmony_ci	{
13062306a36Sopenharmony_ci		r0 = #0
13162306a36Sopenharmony_ci		jumpr r31
13262306a36Sopenharmony_ci	}
13362306a36Sopenharmony_ci	.falign
13462306a36Sopenharmony_ci.Lalign:
13562306a36Sopenharmony_ci1000:
13662306a36Sopenharmony_ci	{
13762306a36Sopenharmony_ci		if (p0.new) w_dbuf = memub(src)
13862306a36Sopenharmony_ci		p0 = tstbit(src,#0)
13962306a36Sopenharmony_ci		if (!p1) jump .Lsmall
14062306a36Sopenharmony_ci	}
14162306a36Sopenharmony_ci1100:
14262306a36Sopenharmony_ci	{
14362306a36Sopenharmony_ci		if (p0) memb(dst++#1) = w_dbuf
14462306a36Sopenharmony_ci		if (p0) bytes = add(bytes,#-1)
14562306a36Sopenharmony_ci		if (p0) src = add(src,#1)
14662306a36Sopenharmony_ci	}
14762306a36Sopenharmony_ci2000:
14862306a36Sopenharmony_ci	{
14962306a36Sopenharmony_ci		if (p0.new) w_dbuf = memuh(src)
15062306a36Sopenharmony_ci		p0 = tstbit(src,#1)
15162306a36Sopenharmony_ci		if (!p1) jump .Lsmall
15262306a36Sopenharmony_ci	}
15362306a36Sopenharmony_ci2100:
15462306a36Sopenharmony_ci	{
15562306a36Sopenharmony_ci		if (p0) memh(dst++#2) = w_dbuf
15662306a36Sopenharmony_ci		if (p0) bytes = add(bytes,#-2)
15762306a36Sopenharmony_ci		if (p0) src = add(src,#2)
15862306a36Sopenharmony_ci	}
15962306a36Sopenharmony_ci4000:
16062306a36Sopenharmony_ci	{
16162306a36Sopenharmony_ci		if (p0.new) w_dbuf = memw(src)
16262306a36Sopenharmony_ci		p0 = tstbit(src,#2)
16362306a36Sopenharmony_ci		if (!p1) jump .Lsmall
16462306a36Sopenharmony_ci	}
16562306a36Sopenharmony_ci4100:
16662306a36Sopenharmony_ci	{
16762306a36Sopenharmony_ci		if (p0) memw(dst++#4) = w_dbuf
16862306a36Sopenharmony_ci		if (p0) bytes = add(bytes,#-4)
16962306a36Sopenharmony_ci		if (p0) src = add(src,#4)
17062306a36Sopenharmony_ci		jump FUNCNAME
17162306a36Sopenharmony_ci	}
17262306a36Sopenharmony_ci	.size FUNCNAME,.-FUNCNAME
173