1570af302Sopenharmony_ci/* 2570af302Sopenharmony_ci * memset - fill memory with a constant byte 3570af302Sopenharmony_ci * 4570af302Sopenharmony_ci * Copyright (c) 2012-2020, Arm Limited. 5570af302Sopenharmony_ci * SPDX-License-Identifier: MIT 6570af302Sopenharmony_ci */ 7570af302Sopenharmony_ci 8570af302Sopenharmony_ci/* Assumptions: 9570af302Sopenharmony_ci * 10570af302Sopenharmony_ci * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. 11570af302Sopenharmony_ci * 12570af302Sopenharmony_ci */ 13570af302Sopenharmony_ci 14570af302Sopenharmony_ci#define dstin x0 15570af302Sopenharmony_ci#define val x1 16570af302Sopenharmony_ci#define valw w1 17570af302Sopenharmony_ci#define count x2 18570af302Sopenharmony_ci#define dst x3 19570af302Sopenharmony_ci#define dstend x4 20570af302Sopenharmony_ci#define zva_val x5 21570af302Sopenharmony_ci 22570af302Sopenharmony_ci.global memset 23570af302Sopenharmony_ci.type memset,%function 24570af302Sopenharmony_cimemset: 25570af302Sopenharmony_ci 26570af302Sopenharmony_ci dup v0.16B, valw 27570af302Sopenharmony_ci add dstend, dstin, count 28570af302Sopenharmony_ci 29570af302Sopenharmony_ci cmp count, 96 30570af302Sopenharmony_ci b.hi .Lset_long 31570af302Sopenharmony_ci cmp count, 16 32570af302Sopenharmony_ci b.hs .Lset_medium 33570af302Sopenharmony_ci mov val, v0.D[0] 34570af302Sopenharmony_ci 35570af302Sopenharmony_ci /* Set 0..15 bytes. */ 36570af302Sopenharmony_ci tbz count, 3, 1f 37570af302Sopenharmony_ci str val, [dstin] 38570af302Sopenharmony_ci str val, [dstend, -8] 39570af302Sopenharmony_ci ret 40570af302Sopenharmony_ci nop 41570af302Sopenharmony_ci1: tbz count, 2, 2f 42570af302Sopenharmony_ci str valw, [dstin] 43570af302Sopenharmony_ci str valw, [dstend, -4] 44570af302Sopenharmony_ci ret 45570af302Sopenharmony_ci2: cbz count, 3f 46570af302Sopenharmony_ci strb valw, [dstin] 47570af302Sopenharmony_ci tbz count, 1, 3f 48570af302Sopenharmony_ci strh valw, [dstend, -2] 49570af302Sopenharmony_ci3: ret 50570af302Sopenharmony_ci 51570af302Sopenharmony_ci /* Set 17..96 bytes. */ 52570af302Sopenharmony_ci.Lset_medium: 53570af302Sopenharmony_ci str q0, [dstin] 54570af302Sopenharmony_ci tbnz count, 6, .Lset96 55570af302Sopenharmony_ci str q0, [dstend, -16] 56570af302Sopenharmony_ci tbz count, 5, 1f 57570af302Sopenharmony_ci str q0, [dstin, 16] 58570af302Sopenharmony_ci str q0, [dstend, -32] 59570af302Sopenharmony_ci1: ret 60570af302Sopenharmony_ci 61570af302Sopenharmony_ci .p2align 4 62570af302Sopenharmony_ci /* Set 64..96 bytes. Write 64 bytes from the start and 63570af302Sopenharmony_ci 32 bytes from the end. */ 64570af302Sopenharmony_ci.Lset96: 65570af302Sopenharmony_ci str q0, [dstin, 16] 66570af302Sopenharmony_ci stp q0, q0, [dstin, 32] 67570af302Sopenharmony_ci stp q0, q0, [dstend, -32] 68570af302Sopenharmony_ci ret 69570af302Sopenharmony_ci 70570af302Sopenharmony_ci .p2align 4 71570af302Sopenharmony_ci.Lset_long: 72570af302Sopenharmony_ci and valw, valw, 255 73570af302Sopenharmony_ci bic dst, dstin, 15 74570af302Sopenharmony_ci str q0, [dstin] 75570af302Sopenharmony_ci cmp count, 160 76570af302Sopenharmony_ci ccmp valw, 0, 0, hs 77570af302Sopenharmony_ci b.ne .Lno_zva 78570af302Sopenharmony_ci 79570af302Sopenharmony_ci#ifndef SKIP_ZVA_CHECK 80570af302Sopenharmony_ci mrs zva_val, dczid_el0 81570af302Sopenharmony_ci and zva_val, zva_val, 31 82570af302Sopenharmony_ci cmp zva_val, 4 /* ZVA size is 64 bytes. */ 83570af302Sopenharmony_ci b.ne .Lno_zva 84570af302Sopenharmony_ci#endif 85570af302Sopenharmony_ci str q0, [dst, 16] 86570af302Sopenharmony_ci stp q0, q0, [dst, 32] 87570af302Sopenharmony_ci bic dst, dst, 63 88570af302Sopenharmony_ci sub count, dstend, dst /* Count is now 64 too large. */ 89570af302Sopenharmony_ci sub count, count, 128 /* Adjust count and bias for loop. */ 90570af302Sopenharmony_ci 91570af302Sopenharmony_ci .p2align 4 92570af302Sopenharmony_ci.Lzva_loop: 93570af302Sopenharmony_ci add dst, dst, 64 94570af302Sopenharmony_ci dc zva, dst 95570af302Sopenharmony_ci subs count, count, 64 96570af302Sopenharmony_ci b.hi .Lzva_loop 97570af302Sopenharmony_ci stp q0, q0, [dstend, -64] 98570af302Sopenharmony_ci stp q0, q0, [dstend, -32] 99570af302Sopenharmony_ci ret 100570af302Sopenharmony_ci 101570af302Sopenharmony_ci.Lno_zva: 102570af302Sopenharmony_ci sub count, dstend, dst /* Count is 16 too large. */ 103570af302Sopenharmony_ci sub dst, dst, 16 /* Dst is biased by -32. */ 104570af302Sopenharmony_ci sub count, count, 64 + 16 /* Adjust count and bias for loop. */ 105570af302Sopenharmony_ci.Lno_zva_loop: 106570af302Sopenharmony_ci stp q0, q0, [dst, 32] 107570af302Sopenharmony_ci stp q0, q0, [dst, 64]! 108570af302Sopenharmony_ci subs count, count, 64 109570af302Sopenharmony_ci b.hi .Lno_zva_loop 110570af302Sopenharmony_ci stp q0, q0, [dstend, -64] 111570af302Sopenharmony_ci stp q0, q0, [dstend, -32] 112570af302Sopenharmony_ci ret 113570af302Sopenharmony_ci 114570af302Sopenharmony_ci.size memset,.-memset 115570af302Sopenharmony_ci 116