18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci/* Numerology:
78c2ecf20Sopenharmony_ci * WXYZ
88c2ecf20Sopenharmony_ci * W: width in bytes
98c2ecf20Sopenharmony_ci * X: Load=0, Store=1
108c2ecf20Sopenharmony_ci * Y: Location 0=preamble,8=loop,9=epilog
118c2ecf20Sopenharmony_ci * Z: Location=0,handler=9
128c2ecf20Sopenharmony_ci */
138c2ecf20Sopenharmony_ci	.text
148c2ecf20Sopenharmony_ci	.global FUNCNAME
158c2ecf20Sopenharmony_ci	.type FUNCNAME, @function
168c2ecf20Sopenharmony_ci	.p2align 5
178c2ecf20Sopenharmony_ciFUNCNAME:
188c2ecf20Sopenharmony_ci	{
198c2ecf20Sopenharmony_ci		p0 = cmp.gtu(bytes,#0)
208c2ecf20Sopenharmony_ci		if (!p0.new) jump:nt .Ldone
218c2ecf20Sopenharmony_ci		r3 = or(dst,src)
228c2ecf20Sopenharmony_ci		r4 = xor(dst,src)
238c2ecf20Sopenharmony_ci	}
248c2ecf20Sopenharmony_ci	{
258c2ecf20Sopenharmony_ci		p1 = cmp.gtu(bytes,#15)
268c2ecf20Sopenharmony_ci		p0 = bitsclr(r3,#7)
278c2ecf20Sopenharmony_ci		if (!p0.new) jump:nt .Loop_not_aligned_8
288c2ecf20Sopenharmony_ci		src_dst_sav = combine(src,dst)
298c2ecf20Sopenharmony_ci	}
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci	{
328c2ecf20Sopenharmony_ci		loopcount = lsr(bytes,#3)
338c2ecf20Sopenharmony_ci		if (!p1) jump .Lsmall
348c2ecf20Sopenharmony_ci	}
358c2ecf20Sopenharmony_ci	p3=sp1loop0(.Loop8,loopcount)
368c2ecf20Sopenharmony_ci.Loop8:
378c2ecf20Sopenharmony_ci8080:
388c2ecf20Sopenharmony_ci8180:
398c2ecf20Sopenharmony_ci	{
408c2ecf20Sopenharmony_ci		if (p3) memd(dst++#8) = d_dbuf
418c2ecf20Sopenharmony_ci		d_dbuf = memd(src++#8)
428c2ecf20Sopenharmony_ci	}:endloop0
438c2ecf20Sopenharmony_ci8190:
448c2ecf20Sopenharmony_ci	{
458c2ecf20Sopenharmony_ci		memd(dst++#8) = d_dbuf
468c2ecf20Sopenharmony_ci		bytes -= asl(loopcount,#3)
478c2ecf20Sopenharmony_ci		jump .Lsmall
488c2ecf20Sopenharmony_ci	}
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci.Loop_not_aligned_8:
518c2ecf20Sopenharmony_ci	{
528c2ecf20Sopenharmony_ci		p0 = bitsclr(r4,#7)
538c2ecf20Sopenharmony_ci		if (p0.new) jump:nt .Lalign
548c2ecf20Sopenharmony_ci	}
558c2ecf20Sopenharmony_ci	{
568c2ecf20Sopenharmony_ci		p0 = bitsclr(r3,#3)
578c2ecf20Sopenharmony_ci		if (!p0.new) jump:nt .Loop_not_aligned_4
588c2ecf20Sopenharmony_ci		p1 = cmp.gtu(bytes,#7)
598c2ecf20Sopenharmony_ci	}
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	{
628c2ecf20Sopenharmony_ci		if (!p1) jump .Lsmall
638c2ecf20Sopenharmony_ci		loopcount = lsr(bytes,#2)
648c2ecf20Sopenharmony_ci	}
658c2ecf20Sopenharmony_ci	p3=sp1loop0(.Loop4,loopcount)
668c2ecf20Sopenharmony_ci.Loop4:
678c2ecf20Sopenharmony_ci4080:
688c2ecf20Sopenharmony_ci4180:
698c2ecf20Sopenharmony_ci	{
708c2ecf20Sopenharmony_ci		if (p3) memw(dst++#4) = w_dbuf
718c2ecf20Sopenharmony_ci		w_dbuf = memw(src++#4)
728c2ecf20Sopenharmony_ci	}:endloop0
738c2ecf20Sopenharmony_ci4190:
748c2ecf20Sopenharmony_ci	{
758c2ecf20Sopenharmony_ci		memw(dst++#4) = w_dbuf
768c2ecf20Sopenharmony_ci		bytes -= asl(loopcount,#2)
778c2ecf20Sopenharmony_ci		jump .Lsmall
788c2ecf20Sopenharmony_ci	}
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci.Loop_not_aligned_4:
818c2ecf20Sopenharmony_ci	{
828c2ecf20Sopenharmony_ci		p0 = bitsclr(r3,#1)
838c2ecf20Sopenharmony_ci		if (!p0.new) jump:nt .Loop_not_aligned
848c2ecf20Sopenharmony_ci		p1 = cmp.gtu(bytes,#3)
858c2ecf20Sopenharmony_ci	}
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci	{
888c2ecf20Sopenharmony_ci		if (!p1) jump .Lsmall
898c2ecf20Sopenharmony_ci		loopcount = lsr(bytes,#1)
908c2ecf20Sopenharmony_ci	}
918c2ecf20Sopenharmony_ci	p3=sp1loop0(.Loop2,loopcount)
928c2ecf20Sopenharmony_ci.Loop2:
938c2ecf20Sopenharmony_ci2080:
948c2ecf20Sopenharmony_ci2180:
958c2ecf20Sopenharmony_ci	{
968c2ecf20Sopenharmony_ci		if (p3) memh(dst++#2) = w_dbuf
978c2ecf20Sopenharmony_ci		w_dbuf = memuh(src++#2)
988c2ecf20Sopenharmony_ci	}:endloop0
998c2ecf20Sopenharmony_ci2190:
1008c2ecf20Sopenharmony_ci	{
1018c2ecf20Sopenharmony_ci		memh(dst++#2) = w_dbuf
1028c2ecf20Sopenharmony_ci		bytes -= asl(loopcount,#1)
1038c2ecf20Sopenharmony_ci		jump .Lsmall
1048c2ecf20Sopenharmony_ci	}
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci.Loop_not_aligned: /* Works for as small as one byte */
1078c2ecf20Sopenharmony_ci	p3=sp1loop0(.Loop1,bytes)
1088c2ecf20Sopenharmony_ci.Loop1:
1098c2ecf20Sopenharmony_ci1080:
1108c2ecf20Sopenharmony_ci1180:
1118c2ecf20Sopenharmony_ci	{
1128c2ecf20Sopenharmony_ci		if (p3) memb(dst++#1) = w_dbuf
1138c2ecf20Sopenharmony_ci		w_dbuf = memub(src++#1)
1148c2ecf20Sopenharmony_ci	}:endloop0
1158c2ecf20Sopenharmony_ci	/* Done */
1168c2ecf20Sopenharmony_ci1190:
1178c2ecf20Sopenharmony_ci	{
1188c2ecf20Sopenharmony_ci		memb(dst) = w_dbuf
1198c2ecf20Sopenharmony_ci		jumpr r31
1208c2ecf20Sopenharmony_ci		r0 = #0
1218c2ecf20Sopenharmony_ci	}
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci.Lsmall:
1248c2ecf20Sopenharmony_ci	{
1258c2ecf20Sopenharmony_ci		p0 = cmp.gtu(bytes,#0)
1268c2ecf20Sopenharmony_ci		if (p0.new) jump:nt .Loop_not_aligned
1278c2ecf20Sopenharmony_ci	}
1288c2ecf20Sopenharmony_ci.Ldone:
1298c2ecf20Sopenharmony_ci	{
1308c2ecf20Sopenharmony_ci		r0 = #0
1318c2ecf20Sopenharmony_ci		jumpr r31
1328c2ecf20Sopenharmony_ci	}
1338c2ecf20Sopenharmony_ci	.falign
1348c2ecf20Sopenharmony_ci.Lalign:
1358c2ecf20Sopenharmony_ci1000:
1368c2ecf20Sopenharmony_ci	{
1378c2ecf20Sopenharmony_ci		if (p0.new) w_dbuf = memub(src)
1388c2ecf20Sopenharmony_ci		p0 = tstbit(src,#0)
1398c2ecf20Sopenharmony_ci		if (!p1) jump .Lsmall
1408c2ecf20Sopenharmony_ci	}
1418c2ecf20Sopenharmony_ci1100:
1428c2ecf20Sopenharmony_ci	{
1438c2ecf20Sopenharmony_ci		if (p0) memb(dst++#1) = w_dbuf
1448c2ecf20Sopenharmony_ci		if (p0) bytes = add(bytes,#-1)
1458c2ecf20Sopenharmony_ci		if (p0) src = add(src,#1)
1468c2ecf20Sopenharmony_ci	}
1478c2ecf20Sopenharmony_ci2000:
1488c2ecf20Sopenharmony_ci	{
1498c2ecf20Sopenharmony_ci		if (p0.new) w_dbuf = memuh(src)
1508c2ecf20Sopenharmony_ci		p0 = tstbit(src,#1)
1518c2ecf20Sopenharmony_ci		if (!p1) jump .Lsmall
1528c2ecf20Sopenharmony_ci	}
1538c2ecf20Sopenharmony_ci2100:
1548c2ecf20Sopenharmony_ci	{
1558c2ecf20Sopenharmony_ci		if (p0) memh(dst++#2) = w_dbuf
1568c2ecf20Sopenharmony_ci		if (p0) bytes = add(bytes,#-2)
1578c2ecf20Sopenharmony_ci		if (p0) src = add(src,#2)
1588c2ecf20Sopenharmony_ci	}
1598c2ecf20Sopenharmony_ci4000:
1608c2ecf20Sopenharmony_ci	{
1618c2ecf20Sopenharmony_ci		if (p0.new) w_dbuf = memw(src)
1628c2ecf20Sopenharmony_ci		p0 = tstbit(src,#2)
1638c2ecf20Sopenharmony_ci		if (!p1) jump .Lsmall
1648c2ecf20Sopenharmony_ci	}
1658c2ecf20Sopenharmony_ci4100:
1668c2ecf20Sopenharmony_ci	{
1678c2ecf20Sopenharmony_ci		if (p0) memw(dst++#4) = w_dbuf
1688c2ecf20Sopenharmony_ci		if (p0) bytes = add(bytes,#-4)
1698c2ecf20Sopenharmony_ci		if (p0) src = add(src,#4)
1708c2ecf20Sopenharmony_ci		jump FUNCNAME
1718c2ecf20Sopenharmony_ci	}
1728c2ecf20Sopenharmony_ci	.size FUNCNAME,.-FUNCNAME
173