162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/linkage.h> 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN__ 962306a36Sopenharmony_ci# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << 1062306a36Sopenharmony_ci# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> 1162306a36Sopenharmony_ci# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM 1262306a36Sopenharmony_ci# define MERGE_2(RX,RY,IMM) 1362306a36Sopenharmony_ci# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF 1462306a36Sopenharmony_ci# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM 1562306a36Sopenharmony_ci#else 1662306a36Sopenharmony_ci# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> 1762306a36Sopenharmony_ci# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << 1862306a36Sopenharmony_ci# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << 1962306a36Sopenharmony_ci# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << 2062306a36Sopenharmony_ci# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM 2162306a36Sopenharmony_ci# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 2262306a36Sopenharmony_ci#endif 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#ifdef CONFIG_ARC_HAS_LL64 2562306a36Sopenharmony_ci# define LOADX(DST,RX) ldd.ab DST, [RX, 8] 2662306a36Sopenharmony_ci# define STOREX(SRC,RX) std.ab SRC, [RX, 8] 2762306a36Sopenharmony_ci# define ZOLSHFT 5 2862306a36Sopenharmony_ci# define ZOLAND 0x1F 2962306a36Sopenharmony_ci#else 3062306a36Sopenharmony_ci# define LOADX(DST,RX) ld.ab DST, [RX, 4] 3162306a36Sopenharmony_ci# define STOREX(SRC,RX) st.ab SRC, [RX, 4] 3262306a36Sopenharmony_ci# define ZOLSHFT 4 3362306a36Sopenharmony_ci# define ZOLAND 0xF 3462306a36Sopenharmony_ci#endif 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ciENTRY_CFI(memcpy) 3762306a36Sopenharmony_ci mov.f 0, r2 3862306a36Sopenharmony_ci;;; if size is zero 3962306a36Sopenharmony_ci jz.d [blink] 4062306a36Sopenharmony_ci mov r3, r0 ; don;t clobber ret val 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci;;; if size <= 8 4362306a36Sopenharmony_ci cmp r2, 8 4462306a36Sopenharmony_ci bls.d @.Lsmallchunk 4562306a36Sopenharmony_ci mov.f lp_count, r2 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci and.f r4, r0, 0x03 4862306a36Sopenharmony_ci rsub lp_count, r4, 4 4962306a36Sopenharmony_ci lpnz @.Laligndestination 5062306a36Sopenharmony_ci ;; LOOP BEGIN 5162306a36Sopenharmony_ci ldb.ab r5, [r1,1] 5262306a36Sopenharmony_ci sub r2, r2, 1 5362306a36Sopenharmony_ci stb.ab r5, [r3,1] 5462306a36Sopenharmony_ci.Laligndestination: 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci;;; Check the alignment of the source 5762306a36Sopenharmony_ci and.f r4, r1, 0x03 5862306a36Sopenharmony_ci bnz.d @.Lsourceunaligned 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci;;; CASE 0: Both source and destination are 32bit aligned 6162306a36Sopenharmony_ci;;; Convert len to Dwords, unfold x4 6262306a36Sopenharmony_ci lsr.f lp_count, r2, ZOLSHFT 6362306a36Sopenharmony_ci lpnz @.Lcopy32_64bytes 6462306a36Sopenharmony_ci ;; LOOP START 6562306a36Sopenharmony_ci LOADX (r6, r1) 6662306a36Sopenharmony_ci LOADX (r8, r1) 6762306a36Sopenharmony_ci LOADX (r10, r1) 6862306a36Sopenharmony_ci LOADX (r4, r1) 6962306a36Sopenharmony_ci STOREX (r6, r3) 7062306a36Sopenharmony_ci STOREX (r8, r3) 7162306a36Sopenharmony_ci STOREX (r10, r3) 7262306a36Sopenharmony_ci STOREX (r4, r3) 7362306a36Sopenharmony_ci.Lcopy32_64bytes: 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes 7662306a36Sopenharmony_ci.Lsmallchunk: 7762306a36Sopenharmony_ci lpnz @.Lcopyremainingbytes 7862306a36Sopenharmony_ci ;; LOOP START 7962306a36Sopenharmony_ci ldb.ab r5, [r1,1] 8062306a36Sopenharmony_ci stb.ab r5, [r3,1] 8162306a36Sopenharmony_ci.Lcopyremainingbytes: 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci j [blink] 8462306a36Sopenharmony_ci;;; END CASE 0 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci.Lsourceunaligned: 8762306a36Sopenharmony_ci cmp r4, 2 8862306a36Sopenharmony_ci beq.d @.LunalignedOffby2 8962306a36Sopenharmony_ci sub r2, r2, 1 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci bhi.d @.LunalignedOffby3 9262306a36Sopenharmony_ci ldb.ab r5, [r1, 1] 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci;;; CASE 1: The source is unaligned, off by 1 9562306a36Sopenharmony_ci ;; Hence I need to read 1 byte for a 16bit alignment 9662306a36Sopenharmony_ci ;; and 2bytes to reach 32bit alignment 9762306a36Sopenharmony_ci ldh.ab r6, [r1, 2] 9862306a36Sopenharmony_ci sub r2, r2, 2 9962306a36Sopenharmony_ci ;; Convert to words, unfold x2 10062306a36Sopenharmony_ci lsr.f lp_count, r2, 3 10162306a36Sopenharmony_ci MERGE_1 (r6, r6, 8) 10262306a36Sopenharmony_ci MERGE_2 (r5, r5, 24) 10362306a36Sopenharmony_ci or r5, r5, r6 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci ;; Both src and dst are aligned 10662306a36Sopenharmony_ci lpnz @.Lcopy8bytes_1 10762306a36Sopenharmony_ci ;; LOOP START 10862306a36Sopenharmony_ci ld.ab r6, [r1, 4] 10962306a36Sopenharmony_ci ld.ab r8, [r1,4] 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci SHIFT_1 (r7, r6, 24) 11262306a36Sopenharmony_ci or r7, r7, r5 11362306a36Sopenharmony_ci SHIFT_2 (r5, r6, 8) 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci SHIFT_1 (r9, r8, 24) 11662306a36Sopenharmony_ci or r9, r9, r5 11762306a36Sopenharmony_ci SHIFT_2 (r5, r8, 8) 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci st.ab r7, [r3, 4] 12062306a36Sopenharmony_ci st.ab r9, [r3, 4] 12162306a36Sopenharmony_ci.Lcopy8bytes_1: 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci ;; Write back the remaining 16bits 12462306a36Sopenharmony_ci EXTRACT_1 (r6, r5, 16) 12562306a36Sopenharmony_ci sth.ab r6, [r3, 2] 12662306a36Sopenharmony_ci ;; Write back the remaining 8bits 12762306a36Sopenharmony_ci EXTRACT_2 (r5, r5, 16) 12862306a36Sopenharmony_ci stb.ab r5, [r3, 1] 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci and.f lp_count, r2, 0x07 ;Last 8bytes 13162306a36Sopenharmony_ci lpnz @.Lcopybytewise_1 13262306a36Sopenharmony_ci ;; LOOP START 13362306a36Sopenharmony_ci ldb.ab r6, [r1,1] 13462306a36Sopenharmony_ci stb.ab r6, [r3,1] 13562306a36Sopenharmony_ci.Lcopybytewise_1: 13662306a36Sopenharmony_ci j [blink] 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci.LunalignedOffby2: 13962306a36Sopenharmony_ci;;; CASE 2: The source is unaligned, off by 2 14062306a36Sopenharmony_ci ldh.ab r5, [r1, 2] 14162306a36Sopenharmony_ci sub r2, r2, 1 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci ;; Both src and dst are aligned 14462306a36Sopenharmony_ci ;; Convert to words, unfold x2 14562306a36Sopenharmony_ci lsr.f lp_count, r2, 3 14662306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__ 14762306a36Sopenharmony_ci asl.nz r5, r5, 16 14862306a36Sopenharmony_ci#endif 14962306a36Sopenharmony_ci lpnz @.Lcopy8bytes_2 15062306a36Sopenharmony_ci ;; LOOP START 15162306a36Sopenharmony_ci ld.ab r6, [r1, 4] 15262306a36Sopenharmony_ci ld.ab r8, [r1,4] 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci SHIFT_1 (r7, r6, 16) 15562306a36Sopenharmony_ci or r7, r7, r5 15662306a36Sopenharmony_ci SHIFT_2 (r5, r6, 16) 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci SHIFT_1 (r9, r8, 16) 15962306a36Sopenharmony_ci or r9, r9, r5 16062306a36Sopenharmony_ci SHIFT_2 (r5, r8, 16) 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci st.ab r7, [r3, 4] 16362306a36Sopenharmony_ci st.ab r9, [r3, 4] 16462306a36Sopenharmony_ci.Lcopy8bytes_2: 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__ 16762306a36Sopenharmony_ci lsr.nz r5, r5, 16 16862306a36Sopenharmony_ci#endif 16962306a36Sopenharmony_ci sth.ab r5, [r3, 2] 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci and.f lp_count, r2, 0x07 ;Last 8bytes 17262306a36Sopenharmony_ci lpnz @.Lcopybytewise_2 17362306a36Sopenharmony_ci ;; LOOP START 17462306a36Sopenharmony_ci ldb.ab r6, [r1,1] 17562306a36Sopenharmony_ci stb.ab r6, [r3,1] 17662306a36Sopenharmony_ci.Lcopybytewise_2: 17762306a36Sopenharmony_ci j [blink] 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci.LunalignedOffby3: 18062306a36Sopenharmony_ci;;; CASE 3: The source is unaligned, off by 3 18162306a36Sopenharmony_ci;;; Hence, I need to read 1byte for achieve the 32bit alignment 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci ;; Both src and dst are aligned 18462306a36Sopenharmony_ci ;; Convert to words, unfold x2 18562306a36Sopenharmony_ci lsr.f lp_count, r2, 3 18662306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__ 18762306a36Sopenharmony_ci asl.ne r5, r5, 24 18862306a36Sopenharmony_ci#endif 18962306a36Sopenharmony_ci lpnz @.Lcopy8bytes_3 19062306a36Sopenharmony_ci ;; LOOP START 19162306a36Sopenharmony_ci ld.ab r6, [r1, 4] 19262306a36Sopenharmony_ci ld.ab r8, [r1,4] 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci SHIFT_1 (r7, r6, 8) 19562306a36Sopenharmony_ci or r7, r7, r5 19662306a36Sopenharmony_ci SHIFT_2 (r5, r6, 24) 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci SHIFT_1 (r9, r8, 8) 19962306a36Sopenharmony_ci or r9, r9, r5 20062306a36Sopenharmony_ci SHIFT_2 (r5, r8, 24) 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci st.ab r7, [r3, 4] 20362306a36Sopenharmony_ci st.ab r9, [r3, 4] 20462306a36Sopenharmony_ci.Lcopy8bytes_3: 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__ 20762306a36Sopenharmony_ci lsr.nz r5, r5, 24 20862306a36Sopenharmony_ci#endif 20962306a36Sopenharmony_ci stb.ab r5, [r3, 1] 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci and.f lp_count, r2, 0x07 ;Last 8bytes 21262306a36Sopenharmony_ci lpnz @.Lcopybytewise_3 21362306a36Sopenharmony_ci ;; LOOP START 21462306a36Sopenharmony_ci ldb.ab r6, [r1,1] 21562306a36Sopenharmony_ci stb.ab r6, [r3,1] 21662306a36Sopenharmony_ci.Lcopybytewise_3: 21762306a36Sopenharmony_ci j [blink] 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ciEND_CFI(memcpy) 220