162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci/* Numerology: 762306a36Sopenharmony_ci * WXYZ 862306a36Sopenharmony_ci * W: width in bytes 962306a36Sopenharmony_ci * X: Load=0, Store=1 1062306a36Sopenharmony_ci * Y: Location 0=preamble,8=loop,9=epilog 1162306a36Sopenharmony_ci * Z: Location=0,handler=9 1262306a36Sopenharmony_ci */ 1362306a36Sopenharmony_ci .text 1462306a36Sopenharmony_ci .global FUNCNAME 1562306a36Sopenharmony_ci .type FUNCNAME, @function 1662306a36Sopenharmony_ci .p2align 5 1762306a36Sopenharmony_ciFUNCNAME: 1862306a36Sopenharmony_ci { 1962306a36Sopenharmony_ci p0 = cmp.gtu(bytes,#0) 2062306a36Sopenharmony_ci if (!p0.new) jump:nt .Ldone 2162306a36Sopenharmony_ci r3 = or(dst,src) 2262306a36Sopenharmony_ci r4 = xor(dst,src) 2362306a36Sopenharmony_ci } 2462306a36Sopenharmony_ci { 2562306a36Sopenharmony_ci p1 = cmp.gtu(bytes,#15) 2662306a36Sopenharmony_ci p0 = bitsclr(r3,#7) 2762306a36Sopenharmony_ci if (!p0.new) jump:nt .Loop_not_aligned_8 2862306a36Sopenharmony_ci src_dst_sav = combine(src,dst) 2962306a36Sopenharmony_ci } 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci { 3262306a36Sopenharmony_ci loopcount = lsr(bytes,#3) 3362306a36Sopenharmony_ci if (!p1) jump .Lsmall 3462306a36Sopenharmony_ci } 3562306a36Sopenharmony_ci p3=sp1loop0(.Loop8,loopcount) 3662306a36Sopenharmony_ci.Loop8: 3762306a36Sopenharmony_ci8080: 3862306a36Sopenharmony_ci8180: 3962306a36Sopenharmony_ci { 4062306a36Sopenharmony_ci if (p3) memd(dst++#8) = d_dbuf 4162306a36Sopenharmony_ci d_dbuf = memd(src++#8) 4262306a36Sopenharmony_ci }:endloop0 4362306a36Sopenharmony_ci8190: 4462306a36Sopenharmony_ci { 4562306a36Sopenharmony_ci memd(dst++#8) = d_dbuf 4662306a36Sopenharmony_ci bytes -= asl(loopcount,#3) 4762306a36Sopenharmony_ci jump .Lsmall 4862306a36Sopenharmony_ci } 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci.Loop_not_aligned_8: 5162306a36Sopenharmony_ci { 5262306a36Sopenharmony_ci p0 = bitsclr(r4,#7) 5362306a36Sopenharmony_ci if (p0.new) jump:nt .Lalign 5462306a36Sopenharmony_ci } 5562306a36Sopenharmony_ci { 5662306a36Sopenharmony_ci p0 = bitsclr(r3,#3) 5762306a36Sopenharmony_ci if (!p0.new) jump:nt .Loop_not_aligned_4 5862306a36Sopenharmony_ci p1 = cmp.gtu(bytes,#7) 5962306a36Sopenharmony_ci } 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci { 6262306a36Sopenharmony_ci if (!p1) jump .Lsmall 6362306a36Sopenharmony_ci loopcount = lsr(bytes,#2) 6462306a36Sopenharmony_ci } 6562306a36Sopenharmony_ci p3=sp1loop0(.Loop4,loopcount) 6662306a36Sopenharmony_ci.Loop4: 6762306a36Sopenharmony_ci4080: 6862306a36Sopenharmony_ci4180: 6962306a36Sopenharmony_ci { 7062306a36Sopenharmony_ci if (p3) memw(dst++#4) = w_dbuf 7162306a36Sopenharmony_ci w_dbuf = memw(src++#4) 7262306a36Sopenharmony_ci }:endloop0 7362306a36Sopenharmony_ci4190: 7462306a36Sopenharmony_ci { 7562306a36Sopenharmony_ci memw(dst++#4) = w_dbuf 7662306a36Sopenharmony_ci bytes -= asl(loopcount,#2) 7762306a36Sopenharmony_ci jump .Lsmall 7862306a36Sopenharmony_ci } 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci.Loop_not_aligned_4: 8162306a36Sopenharmony_ci { 8262306a36Sopenharmony_ci p0 = bitsclr(r3,#1) 8362306a36Sopenharmony_ci if (!p0.new) jump:nt .Loop_not_aligned 8462306a36Sopenharmony_ci p1 = cmp.gtu(bytes,#3) 8562306a36Sopenharmony_ci } 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci { 8862306a36Sopenharmony_ci if (!p1) jump .Lsmall 8962306a36Sopenharmony_ci loopcount = lsr(bytes,#1) 9062306a36Sopenharmony_ci } 9162306a36Sopenharmony_ci p3=sp1loop0(.Loop2,loopcount) 9262306a36Sopenharmony_ci.Loop2: 9362306a36Sopenharmony_ci2080: 9462306a36Sopenharmony_ci2180: 9562306a36Sopenharmony_ci { 9662306a36Sopenharmony_ci if (p3) memh(dst++#2) = w_dbuf 9762306a36Sopenharmony_ci w_dbuf = memuh(src++#2) 9862306a36Sopenharmony_ci }:endloop0 9962306a36Sopenharmony_ci2190: 10062306a36Sopenharmony_ci { 10162306a36Sopenharmony_ci memh(dst++#2) = w_dbuf 10262306a36Sopenharmony_ci bytes -= asl(loopcount,#1) 10362306a36Sopenharmony_ci jump .Lsmall 10462306a36Sopenharmony_ci } 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci.Loop_not_aligned: /* Works for as small as one byte */ 10762306a36Sopenharmony_ci p3=sp1loop0(.Loop1,bytes) 10862306a36Sopenharmony_ci.Loop1: 10962306a36Sopenharmony_ci1080: 11062306a36Sopenharmony_ci1180: 11162306a36Sopenharmony_ci { 11262306a36Sopenharmony_ci if (p3) memb(dst++#1) = w_dbuf 11362306a36Sopenharmony_ci w_dbuf = memub(src++#1) 11462306a36Sopenharmony_ci }:endloop0 11562306a36Sopenharmony_ci /* Done */ 11662306a36Sopenharmony_ci1190: 11762306a36Sopenharmony_ci { 11862306a36Sopenharmony_ci memb(dst) = w_dbuf 11962306a36Sopenharmony_ci jumpr r31 12062306a36Sopenharmony_ci r0 = #0 12162306a36Sopenharmony_ci } 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci.Lsmall: 12462306a36Sopenharmony_ci { 12562306a36Sopenharmony_ci p0 = cmp.gtu(bytes,#0) 12662306a36Sopenharmony_ci if (p0.new) jump:nt .Loop_not_aligned 12762306a36Sopenharmony_ci } 12862306a36Sopenharmony_ci.Ldone: 12962306a36Sopenharmony_ci { 13062306a36Sopenharmony_ci r0 = #0 13162306a36Sopenharmony_ci jumpr r31 13262306a36Sopenharmony_ci } 13362306a36Sopenharmony_ci .falign 13462306a36Sopenharmony_ci.Lalign: 13562306a36Sopenharmony_ci1000: 13662306a36Sopenharmony_ci { 13762306a36Sopenharmony_ci if (p0.new) w_dbuf = memub(src) 13862306a36Sopenharmony_ci p0 = tstbit(src,#0) 13962306a36Sopenharmony_ci if (!p1) jump .Lsmall 14062306a36Sopenharmony_ci } 14162306a36Sopenharmony_ci1100: 14262306a36Sopenharmony_ci { 14362306a36Sopenharmony_ci if (p0) memb(dst++#1) = w_dbuf 14462306a36Sopenharmony_ci if (p0) bytes = add(bytes,#-1) 14562306a36Sopenharmony_ci if (p0) src = add(src,#1) 14662306a36Sopenharmony_ci } 14762306a36Sopenharmony_ci2000: 14862306a36Sopenharmony_ci { 14962306a36Sopenharmony_ci if (p0.new) w_dbuf = memuh(src) 15062306a36Sopenharmony_ci p0 = tstbit(src,#1) 15162306a36Sopenharmony_ci if (!p1) jump .Lsmall 15262306a36Sopenharmony_ci } 15362306a36Sopenharmony_ci2100: 15462306a36Sopenharmony_ci { 15562306a36Sopenharmony_ci if (p0) memh(dst++#2) = w_dbuf 15662306a36Sopenharmony_ci if (p0) bytes = add(bytes,#-2) 15762306a36Sopenharmony_ci if (p0) src = add(src,#2) 15862306a36Sopenharmony_ci } 15962306a36Sopenharmony_ci4000: 16062306a36Sopenharmony_ci { 16162306a36Sopenharmony_ci if (p0.new) w_dbuf = memw(src) 16262306a36Sopenharmony_ci p0 = tstbit(src,#2) 16362306a36Sopenharmony_ci if (!p1) jump .Lsmall 16462306a36Sopenharmony_ci } 16562306a36Sopenharmony_ci4100: 16662306a36Sopenharmony_ci { 16762306a36Sopenharmony_ci if (p0) memw(dst++#4) = w_dbuf 16862306a36Sopenharmony_ci if (p0) bytes = add(bytes,#-4) 16962306a36Sopenharmony_ci if (p0) src = add(src,#4) 17062306a36Sopenharmony_ci jump FUNCNAME 17162306a36Sopenharmony_ci } 17262306a36Sopenharmony_ci .size FUNCNAME,.-FUNCNAME 173