1#!/usr/bin/env ruby
2
3# Copyright (c) 2023-2024 Huawei Device Co., Ltd.
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16# wrapper over the library function to avoid
17# the prologue bloating in the caller
18function(:MemmoveInterposer,
19          params: {dst: 'ptr', src: 'ptr', size: 'u32'},
20          mode: [:FastPath]) {
21    Call(dst, src, size).Method("memmove").ptr
22    ReturnVoid().void
23}
24
25#if Options.arch_64_bits?
26#    $regmask = RegMask.new($full_regmap, :tmp0, :tmp1, :arg0, :arg1, :arg2, :arg3, :arg4)
27#else
28    $regmask = $panda_mask
29#end
30# inputs:
31# src_start is len in bytes
32function(:TryBigCopy,
33          params: {dst_data: 'ptr', src_data: 'ptr', len: 'u32'},
34          regalloc_set: $regmask,
35          regmap: $full_regmap,
36          mode: [:FastPath]) {
37
38    # Arm32 is not supported
39    if Options.arch == :arm32
40        Intrinsic(:UNREACHABLE).void.Terminator
41        next
42    end
43
44    len_bytes := len
45    len_word := Cast(len_bytes).word
46
47    # if bytes size > 4096 bytes, copy via system routine
48    compare := Compare(len_bytes, 4096).GT.b
49    IfImm(compare).Imm(0).NE.Unlikely.b {
50        LiveOut(dst_data).DstReg(regmap[:arg0]).ptr
51        LiveOut(src_data).DstReg(regmap[:arg1]).ptr
52        LiveOut(len_bytes).DstReg(regmap[:arg2]).u32
53        ep_offset = get_entrypoint_offset("MEMMOVE_INTERPOSER")
54        Intrinsic(:TAIL_CALL).AddImm(ep_offset).MethodAsImm("MemmoveInterposer").Terminator.void
55        Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG
56    }
57
58    If(src_data, dst_data).CC(:CC_A).b {
59        Goto(:ForwardCopy)
60    }
61
62    gap := Sub(dst_data, src_data).word
63    If(gap, len_word).AE.b {
64        Goto(:ForwardCopy)
65    }
66
67    # BackwardCopy
68    compare := Compare(len_bytes, 64).GE.b
69    IfImm(compare).Imm(0).NE.b {
70        Goto(:BackwardCopy64)
71    }
72
73    idx := Sub(len_bytes, 8).i32
74    buf := Load(src_data, idx).u64
75
76Label(:BackwardCopyLoop)
77    idx := Phi(idx, idx_prev).i32
78    buf := Phi(buf, buf_prev).u64
79    compare := Compare(idx, 8).LE.b
80    IfImm(compare).Imm(0).NE.Unlikely.b {
81        Goto(:BackwardCopyTail)
82    }
83    Store(dst_data, idx, buf).u64
84    idx_prev := Sub(idx, 8).i32
85    buf_prev := Load(src_data, idx_prev).u64
86    Goto(:BackwardCopyLoop)
87
88Label(:BackwardCopyTail)
89    buf1 := LoadI(src_data).Imm(0).u64
90    Store(dst_data, idx, buf).u64
91    StoreI(dst_data, buf1).Imm(0).u64
92    Goto(:End)
93
94Label(:BackwardCopy64)
95    # This is naive x6 loop unroll for 64+ bytes
96    # TODO (asidorov): remove when LoopUnroll pass added in Irtoc
97    first_buf0 := LoadI(src_data).Imm(0).u64
98    first_buf1 := LoadI(src_data).Imm(8).u64
99    first_buf2 := LoadI(src_data).Imm(16).u64
100    first_buf3 := LoadI(src_data).Imm(24).u64
101    first_buf4 := LoadI(src_data).Imm(32).u64
102    first_buf5 := LoadI(src_data).Imm(40).u64
103    len_tail := SubI(len_word).Imm(48).word
104    src_ptr_init := Add(src_data, len_tail).ptr
105    dst_ptr_init := Add(dst_data, len_tail).ptr
106
107Label(:BackwardCopyLoop64)
108    src_ptr := Phi(src_ptr_init, prev_src_ptr).ptr
109    dst_ptr := Phi(dst_ptr_init, prev_dst_ptr).ptr
110
111    buf5 := LoadI(src_ptr).Imm(40).u64
112    buf4 := LoadI(src_ptr).Imm(32).u64
113    buf3 := LoadI(src_ptr).Imm(24).u64
114    buf2 := LoadI(src_ptr).Imm(16).u64
115    buf1 := LoadI(src_ptr).Imm(8).u64
116    buf0 := LoadI(src_ptr).Imm(0).u64
117
118    StoreI(dst_ptr, buf5).Imm(40).u64
119    StoreI(dst_ptr, buf4).Imm(32).u64
120    StoreI(dst_ptr, buf3).Imm(24).u64
121    StoreI(dst_ptr, buf2).Imm(16).u64
122    StoreI(dst_ptr, buf1).Imm(8).u64
123    StoreI(dst_ptr, buf0).Imm(0).u64
124
125    prev_src_ptr := SubI(src_ptr).Imm(48).ptr
126    prev_dst_ptr := SubI(dst_ptr).Imm(48).ptr
127    If(prev_src_ptr, src_data).CC(:CC_B).Unlikely.b {
128        Goto(:BackwardCopyTail64)
129    } Else {
130        Goto(:BackwardCopyLoop64)
131    }
132
133Label(:BackwardCopyTail64)
134    StoreI(dst_data, first_buf5).Imm(40).u64
135    StoreI(dst_data, first_buf4).Imm(32).u64
136    StoreI(dst_data, first_buf3).Imm(24).u64
137    StoreI(dst_data, first_buf2).Imm(16).u64
138    StoreI(dst_data, first_buf1).Imm(8).u64
139    StoreI(dst_data, first_buf0).Imm(0).u64
140    Goto(:End)
141
142Label(:ForwardCopy)
143    compare := Compare(len_bytes, 64).GE.b
144    IfImm(compare).Imm(0).NE.b {
145        Goto(:ForwardCopy64)
146    }
147
148    # ForwardCopy
149    len_bytes := Sub(len_bytes, 16).i32
150    idx := 0
151    buf := Load(src_data, idx).u64
152
153Label(:ForwardCopyLoop)
154    idx := Phi(idx, idx_next).i32
155    buf := Phi(buf, buf_next).u64
156    If(idx, len_bytes).GE.Unlikely.b {
157        Goto(:ForwardCopyTail)
158    }
159    Store(dst_data, idx, buf).u64
160    idx_next := Add(idx, 8).i32
161    buf_next := Load(src_data, idx_next).u64
162    Goto(:ForwardCopyLoop)
163
164Label(:ForwardCopyTail)
165    len_bytes := Add(len_bytes, 8).i32
166    buf1 := Load(src_data, len_bytes).u64
167    Store(dst_data, idx, buf).u64
168    Store(dst_data, len_bytes, buf1).u64
169
170    Goto(:End)
171
172Label(:ForwardCopy64)
173    # This is naive x6 loop unroll for 64+ bytes
174    # TODO (asidorov): remove when LoopUnroll pass added in Irtoc
175    len_tail := SubI(len_word).Imm(48).word
176    src_end_data := Add(src_data, len_tail).ptr
177    last_buf0 := LoadI(src_end_data).Imm(0).u64
178    last_buf1 := LoadI(src_end_data).Imm(8).u64
179    last_buf2 := LoadI(src_end_data).Imm(16).u64
180    last_buf3 := LoadI(src_end_data).Imm(24).u64
181    last_buf4 := LoadI(src_end_data).Imm(32).u64
182    last_buf5 := LoadI(src_end_data).Imm(40).u64
183
184Label(:ForwardCopyLoop64)
185    src_ptr := Phi(src_data, next_src_ptr).ptr
186    dst_ptr := Phi(dst_data, next_dst_ptr).ptr
187
188    buf0 := LoadI(src_ptr).Imm(0).u64
189    buf1 := LoadI(src_ptr).Imm(8).u64
190    buf2 := LoadI(src_ptr).Imm(16).u64
191    buf3 := LoadI(src_ptr).Imm(24).u64
192    buf4 := LoadI(src_ptr).Imm(32).u64
193    buf5 := LoadI(src_ptr).Imm(40).u64
194
195    StoreI(dst_ptr, buf0).Imm(0).u64
196    StoreI(dst_ptr, buf1).Imm(8).u64
197    StoreI(dst_ptr, buf2).Imm(16).u64
198    StoreI(dst_ptr, buf3).Imm(24).u64
199    StoreI(dst_ptr, buf4).Imm(32).u64
200    StoreI(dst_ptr, buf5).Imm(40).u64
201
202    next_src_ptr := AddI(src_ptr).Imm(48).ptr
203    next_dst_ptr := AddI(dst_ptr).Imm(48).ptr
204    If(next_src_ptr, src_end_data).AE.Unlikely.b {
205        Goto(:ForwardCopyTail64)
206    } Else {
207        Goto(:ForwardCopyLoop64)
208    }
209
210Label(:ForwardCopyTail64)
211    dst_end_data := Add(dst_data, len_tail).ptr
212
213    StoreI(dst_end_data, last_buf0).Imm(0).u64
214    StoreI(dst_end_data, last_buf1).Imm(8).u64
215    StoreI(dst_end_data, last_buf2).Imm(16).u64
216    StoreI(dst_end_data, last_buf3).Imm(24).u64
217    StoreI(dst_end_data, last_buf4).Imm(32).u64
218    StoreI(dst_end_data, last_buf5).Imm(40).u64
219    ### End copy
220
221Label(:End)
222    ReturnVoid().void
223}
224
225