1/*
2 * Copyright (c) 2020-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef GRAPHIC_LITE_GRAPHIC_NEON_PIPELINE_H
17#define GRAPHIC_LITE_GRAPHIC_NEON_PIPELINE_H
18
19#include "graphic_config.h"
20#ifdef ARM_NEON_OPT
21#include <arm_neon.h>
22
23#include "gfx_utils/color.h"
24#include "graphic_neon_utils.h"
25
26namespace OHOS {
27using LoadBuf = void (*)(uint8_t* buf, uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a);
28using LoadBufA = void (*)(uint8_t* buf, uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a, uint8_t opa);
29using NeonBlend = void (*)(uint8x8_t& r1, uint8x8_t& g1, uint8x8_t& b1, uint8x8_t& a1,
30                           uint8x8_t r2, uint8x8_t g2, uint8x8_t b2, uint8x8_t a2);
31using StoreBuf = void (*)(uint8_t* buf, uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a);
32
33struct {
34    ColorMode dm;
35    LoadBuf loadDstFunc;
36    NeonBlend blendFunc;
37    StoreBuf storeDstFunc;
38}
39g_dstFunc[] = {
40    {ARGB8888, LoadBuf_ARGB8888, NeonBlendRGBA, StoreBuf_ARGB8888},
41    {XRGB8888, LoadBuf_XRGB8888, NeonBlendXRGB, StoreBuf_XRGB8888},
42    {RGB888, LoadBuf_RGB888, NeonBlendRGB, StoreBuf_RGB888},
43    {RGB565, LoadBuf_RGB565, NeonBlendRGB, StoreBuf_RGB565}
44};
45
46struct {
47    ColorMode sm;
48    LoadBufA loadSrcFunc;
49}
50g_srcFunc[] = {
51    {ARGB8888, LoadBufA_ARGB8888},
52    {XRGB8888, LoadBufA_XRGB8888},
53    {RGB888, LoadBufA_RGB888},
54    {RGB565, LoadBufA_RGB565}
55};
56
57class NeonBlendPipeLine {
58public:
59    NeonBlendPipeLine() {}
60    ~NeonBlendPipeLine() {}
61
62    void Construct(ColorMode dm, ColorMode sm, void* srcColor = nullptr, uint8_t opa = OPA_OPAQUE)
63    {
64        int16_t dstNum = sizeof(g_dstFunc) / sizeof(g_dstFunc[0]);
65        for (int16_t i = 0; i < dstNum; ++i) {
66            if (g_dstFunc[i].dm == dm) {
67                loadDstFunc_ = g_dstFunc[i].loadDstFunc;
68                blendFunc_ = g_dstFunc[i].blendFunc;
69                storeDstFunc_ = g_dstFunc[i].storeDstFunc;
70                break;
71            }
72        }
73        int16_t srcNum = sizeof(g_srcFunc) / sizeof(g_srcFunc[0]);
74        for (int16_t i = 0; i < srcNum; ++i) {
75            if (g_srcFunc[i].sm == sm) {
76                loadSrcFunc_ = g_srcFunc[i].loadSrcFunc;
77                break;
78            }
79        }
80        if (srcColor != nullptr) {
81            ConstructSrcColor(sm, srcColor, opa, r2_, g2_, b2_, a2_);
82        }
83    }
84
85    void Invoke(uint8_t* dst, uint8_t* src, uint8_t opa)
86    {
87        loadDstFunc_(dst, r1_, g1_, b1_, a1_);
88        loadSrcFunc_(src, r2_, g2_, b2_, a2_, opa);
89        blendFunc_(r1_, g1_, b1_, a1_, r2_, g2_, b2_, a2_);
90        storeDstFunc_(dst, r1_, g1_, b1_, a1_);
91    }
92
93    void Invoke(uint8_t* dst)
94    {
95        loadDstFunc_(dst, r1_, g1_, b1_, a1_);
96        blendFunc_(r1_, g1_, b1_, a1_, r2_, g2_, b2_, a2_);
97        storeDstFunc_(dst, r1_, g1_, b1_, a1_);
98    }
99
100    void Invoke(uint8_t* dst, uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a)
101    {
102        loadDstFunc_(dst, r1_, g1_, b1_, a1_);
103        blendFunc_(r1_, g1_, b1_, a1_, r, g, b, a);
104        storeDstFunc_(dst, r1_, g1_, b1_, a1_);
105    }
106
107    void NeonPreLerpARGB8888(uint8_t* buf, uint8_t r, uint8_t g, uint8_t b, uint8_t a, uint8_t* covers)
108    {
109        uint8x8x4_t vBuf = vld4_u8(buf);
110        uint8x8_t r0 = vBuf.val[NEON_R];
111        uint8x8_t g0 = vBuf.val[NEON_G];
112        uint8x8_t b0 = vBuf.val[NEON_B];
113        uint8x8_t a0 = vBuf.val[NEON_A];
114
115        uint8x8_t r1 = Multipling(vdup_n_u8(r), vld1_u8(covers));
116        uint8x8_t g1 = Multipling(vdup_n_u8(g), vld1_u8(covers));
117        uint8x8_t b1 = Multipling(vdup_n_u8(b), vld1_u8(covers));
118        uint8x8_t a1 = Multipling(vdup_n_u8(a), vld1_u8(covers));
119
120        uint8x8_t rs = NeonLerp(r0, r1, a1);
121        uint8x8_t gs = NeonLerp(g0, g1, a1);
122        uint8x8_t bs = NeonLerp(b0, b1, a1);
123        uint8x8_t as = NeonPreLerp(a0, a1, a1);
124
125        StoreBuf_ARGB8888(buf, rs, gs, bs, as);
126    }
127    void NeonPrelerpARGB8888(uint8_t* buf, uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha)
128    {
129        uint8x8x4_t vBuf = vld4_u8(buf);
130        uint8x8_t r0 = vBuf.val[NEON_R];
131        uint8x8_t g0 = vBuf.val[NEON_G];
132        uint8x8_t b0 = vBuf.val[NEON_B];
133        uint8x8_t a0 = vBuf.val[NEON_A];
134
135        uint8x8_t r1 = vdup_n_u8(red);
136        uint8x8_t g1 = vdup_n_u8(green);
137        uint8x8_t b1 = vdup_n_u8(blue);
138        uint8x8_t a1 = vdup_n_u8(alpha);
139
140        uint8x8_t rs = NeonPreLerp(r0, r1, a1);
141        uint8x8_t gs = NeonPreLerp(g0, g1, a1);
142        uint8x8_t bs = NeonPreLerp(b0, b1, a1);
143        uint8x8_t as = NeonPreLerp(a0, a1, a1);
144
145        StoreBuf_ARGB8888(buf, rs, gs, bs, as);
146    }
147
148    void NeonPrelerpARGB8888(uint8_t* buf, uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha, uint8_t cover)
149    {
150        uint8x8x4_t vBuf = vld4_u8(buf);
151        uint8x8_t r0 = vBuf.val[NEON_R];
152        uint8x8_t g0 = vBuf.val[NEON_G];
153        uint8x8_t b0 = vBuf.val[NEON_B];
154        uint8x8_t a0 = vBuf.val[NEON_A];
155
156        uint8x8_t r1 = Multipling(vdup_n_u8(red), vdup_n_u8(cover));
157        uint8x8_t g1 = Multipling(vdup_n_u8(green), vdup_n_u8(cover));
158        uint8x8_t b1 = Multipling(vdup_n_u8(blue), vdup_n_u8(cover));
159        uint8x8_t a1 = Multipling(vdup_n_u8(alpha), vdup_n_u8(cover));
160
161        uint8x8_t rs = NeonPreLerp(r0, r1, a1);
162        uint8x8_t gs = NeonPreLerp(g0, g1, a1);
163        uint8x8_t bs = NeonPreLerp(b0, b1, a1);
164        uint8x8_t as = NeonPreLerp(a0, a1, a1);
165
166        StoreBuf_ARGB8888(buf, rs, gs, bs, as);
167    }
168
169    void NeonPrelerpARGB8888(uint8_t* dstBuffer, uint8_t* srcBuffer, uint8_t cover)
170    {
171        uint8x8x4_t vDstBuf = vld4_u8(dstBuffer);
172        uint8x8_t r0 = vDstBuf.val[NEON_R];
173        uint8x8_t g0 = vDstBuf.val[NEON_G];
174        uint8x8_t b0 = vDstBuf.val[NEON_B];
175        uint8x8_t a0 = vDstBuf.val[NEON_A];
176        uint8x8x4_t vSrcBuf = vld4_u8(srcBuffer);
177        uint8x8_t r1 = vSrcBuf.val[NEON_R];
178        uint8x8_t g1 = vSrcBuf.val[NEON_G];
179        uint8x8_t b1 = vSrcBuf.val[NEON_B];
180        uint8x8_t a1 = vSrcBuf.val[NEON_A];
181
182        r1 = Multipling(r1, vdup_n_u8(cover));
183        g1 = Multipling(g1, vdup_n_u8(cover));
184        b1 = Multipling(b1, vdup_n_u8(cover));
185        a1 = Multipling(a1, vdup_n_u8(cover));
186
187        uint8x8_t rs = NeonPreLerp(r0, r1, a1);
188        uint8x8_t gs = NeonPreLerp(g0, g1, a1);
189        uint8x8_t bs = NeonPreLerp(b0, b1, a1);
190        uint8x8_t as = NeonPreLerp(a0, a1, a1);
191
192        StoreBuf_ARGB8888(dstBuffer, rs, gs, bs, as);
193    }
194
195    void NeonPrelerpARGB8888(uint8_t* dstBuffer, uint8_t* srcBuffer, uint8_t* covers)
196    {
197        uint8x8x4_t vDstBuf = vld4_u8(dstBuffer);
198        uint8x8_t r0 = vDstBuf.val[NEON_R];
199        uint8x8_t g0 = vDstBuf.val[NEON_G];
200        uint8x8_t b0 = vDstBuf.val[NEON_B];
201        uint8x8_t a0 = vDstBuf.val[NEON_A];
202
203        uint8x8x4_t vSrcBuf = vld4_u8(srcBuffer);
204
205        uint8x8_t r1 = Multipling(vSrcBuf.val[NEON_R], vld1_u8(covers));
206        uint8x8_t g1 = Multipling(vSrcBuf.val[NEON_G], vld1_u8(covers));
207        uint8x8_t b1 = Multipling(vSrcBuf.val[NEON_B], vld1_u8(covers));
208        uint8x8_t a1 = Multipling(vSrcBuf.val[NEON_A], vld1_u8(covers));
209
210        uint8x8_t rs = NeonPreLerp(r0, r1, a1);
211        uint8x8_t gs = NeonPreLerp(g0, g1, a1);
212        uint8x8_t bs = NeonPreLerp(b0, b1, a1);
213        uint8x8_t as = NeonPreLerp(a0, a1, a1);
214
215        StoreBuf_ARGB8888(dstBuffer, rs, gs, bs, as);
216    }
217    void NeonLerpARGB8888(uint8_t* buf, uint8_t r, uint8_t g, uint8_t b, uint8_t a,
218                           uint8_t* covers)
219    {
220        uint8x8x4_t vBuf = vld4_u8(buf);
221        uint8x8_t r0 = vBuf.val[NEON_R];
222        uint8x8_t g0 = vBuf.val[NEON_G];
223        uint8x8_t b0 = vBuf.val[NEON_B];
224        uint8x8_t a0 = vBuf.val[NEON_A];
225
226        uint8x8_t r1 = Multipling(vdup_n_u8(r), vld1_u8(covers));
227        uint8x8_t g1 = Multipling(vdup_n_u8(g), vld1_u8(covers));
228        uint8x8_t b1 = Multipling(vdup_n_u8(b), vld1_u8(covers));
229        uint8x8_t a1 = Multipling(vdup_n_u8(a), vld1_u8(covers));
230
231        uint8x8_t rs = NeonLerp(r0, r1, a1);
232        uint8x8_t gs = NeonLerp(g0, g1, a1);
233        uint8x8_t bs = NeonLerp(b0, b1, a1);
234        uint8x8_t as = NeonPreLerp(a0, a1, a1);
235
236        StoreBuf_ARGB8888(buf, rs, gs, bs, as);
237    }
238    void NeonLerpARGB8888(uint8_t* buf, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
239    {
240        uint8x8x4_t vBuf = vld4_u8(buf);
241        uint8x8_t r0 = vBuf.val[NEON_R];
242        uint8x8_t g0 = vBuf.val[NEON_G];
243        uint8x8_t b0 = vBuf.val[NEON_B];
244        uint8x8_t a0 = vBuf.val[NEON_A];
245
246        uint8x8_t r1 = vdup_n_u8(r);
247        uint8x8_t g1 = vdup_n_u8(g);
248        uint8x8_t b1 = vdup_n_u8(b);
249        uint8x8_t a1 = vdup_n_u8(a);
250
251        uint8x8_t rs = NeonLerp(r0, r1, a1);
252        uint8x8_t gs = NeonLerp(g0, g1, a1);
253        uint8x8_t bs = NeonLerp(b0, b1, a1);
254        uint8x8_t as = NeonPreLerp(a0, a1, a1);
255
256        StoreBuf_ARGB8888(buf, rs, gs, bs, as);
257    }
258
259    void NeonLerpARGB8888(uint8_t* buf, uint8_t r, uint8_t g, uint8_t b, uint8_t a, uint8_t cover)
260    {
261        uint8x8x4_t vBuf = vld4_u8(buf);
262        uint8x8_t r0 = vBuf.val[NEON_R];
263        uint8x8_t g0 = vBuf.val[NEON_G];
264        uint8x8_t b0 = vBuf.val[NEON_B];
265        uint8x8_t a0 = vBuf.val[NEON_A];
266
267        uint8x8_t r1 = Multipling(vdup_n_u8(r), vdup_n_u8(cover));
268        uint8x8_t g1 = Multipling(vdup_n_u8(g), vdup_n_u8(cover));
269        uint8x8_t b1 = Multipling(vdup_n_u8(b), vdup_n_u8(cover));
270        uint8x8_t a1 = Multipling(vdup_n_u8(a), vdup_n_u8(cover));
271
272        uint8x8_t rs = NeonLerp(r0, r1, a1);
273        uint8x8_t gs = NeonLerp(g0, g1, a1);
274        uint8x8_t bs = NeonLerp(b0, b1, a1);
275        uint8x8_t as = NeonPreLerp(a0, a1, a1);
276
277        StoreBuf_ARGB8888(buf, rs, gs, bs, as);
278    }
279
280    void NeonLerpARGB8888(uint8_t* dstBuffer, uint8_t* srcBuffer, uint8_t cover)
281    {
282        uint8x8x4_t vDstBuf = vld4_u8(dstBuffer);
283        uint8x8_t r0 = vDstBuf.val[NEON_R];
284        uint8x8_t g0 = vDstBuf.val[NEON_G];
285        uint8x8_t b0 = vDstBuf.val[NEON_B];
286        uint8x8_t a0 = vDstBuf.val[NEON_A];
287        uint8x8x4_t vSrcBuf = vld4_u8(srcBuffer);
288        uint8x8_t r1 = vSrcBuf.val[NEON_R];
289        uint8x8_t g1 = vSrcBuf.val[NEON_G];
290        uint8x8_t b1 = vSrcBuf.val[NEON_B];
291        uint8x8_t a1 = vSrcBuf.val[NEON_A];
292
293        r1 = Multipling(r1, vdup_n_u8(cover));
294        g1 = Multipling(g1, vdup_n_u8(cover));
295        b1 = Multipling(b1, vdup_n_u8(cover));
296        a1 = Multipling(a1, vdup_n_u8(cover));
297
298        uint8x8_t rs = NeonLerp(r0, r1, a1);
299        uint8x8_t gs = NeonLerp(g0, g1, a1);
300        uint8x8_t bs = NeonLerp(b0, b1, a1);
301        uint8x8_t as = NeonPreLerp(a0, a1, a1);
302
303        StoreBuf_ARGB8888(dstBuffer, rs, gs, bs, as);
304    }
305
306    void NeonLerpARGB8888(uint8_t* dstBuffer, uint8_t* srcBuffer, uint8_t* covers)
307    {
308        uint8x8x4_t vDstBuf = vld4_u8(dstBuffer);
309        uint8x8_t r0 = vDstBuf.val[NEON_R];
310        uint8x8_t g0 = vDstBuf.val[NEON_G];
311        uint8x8_t b0 = vDstBuf.val[NEON_B];
312        uint8x8_t a0 = vDstBuf.val[NEON_A];
313
314        uint8x8x4_t vSrcBuf = vld4_u8(srcBuffer);
315
316        uint8x8_t r1 = Multipling(vSrcBuf.val[NEON_R], vld1_u8(covers));
317        uint8x8_t g1 = Multipling(vSrcBuf.val[NEON_G], vld1_u8(covers));
318        uint8x8_t b1 = Multipling(vSrcBuf.val[NEON_B], vld1_u8(covers));
319        uint8x8_t a1 = Multipling(vSrcBuf.val[NEON_A], vld1_u8(covers));
320
321        uint8x8_t rs = NeonLerp(r0, r1, a1);
322        uint8x8_t gs = NeonLerp(g0, g1, a1);
323        uint8x8_t bs = NeonLerp(b0, b1, a1);
324        uint8x8_t as = NeonPreLerp(a0, a1, a1);
325
326        StoreBuf_ARGB8888(dstBuffer, rs, gs, bs, as);
327    }
328private:
329    void ConstructSrcColor(ColorMode sm, void* srcColor, uint8_t opa,
330                           uint8x8_t& r, uint8x8_t& g, uint8x8_t& b, uint8x8_t& a)
331    {
332        if (sm == ARGB8888) {
333            Color32* color = reinterpret_cast<Color32*>(srcColor);
334            r = vdup_n_u8(color->red);
335            g = vdup_n_u8(color->green);
336            b = vdup_n_u8(color->blue);
337            a = NeonMulDiv255(vdup_n_u8(opa), vdup_n_u8(color->alpha));
338        } else if (sm == XRGB8888) {
339            Color32* color = reinterpret_cast<Color32*>(srcColor);
340            r = vdup_n_u8(color->red);
341            g = vdup_n_u8(color->green);
342            b = vdup_n_u8(color->blue);
343            a = vdup_n_u8(opa);
344        } else if (sm == RGB888) {
345            Color24* color = reinterpret_cast<Color24*>(srcColor);
346            r = vdup_n_u8(color->red);
347            g = vdup_n_u8(color->green);
348            b = vdup_n_u8(color->blue);
349            a = vdup_n_u8(opa);
350        } else if (sm == RGB565) {
351            Color16* color = reinterpret_cast<Color16*>(srcColor);
352            r = vdup_n_u8(color->red);
353            g = vdup_n_u8(color->green);
354            b = vdup_n_u8(color->blue);
355            a = vdup_n_u8(opa);
356        }
357    }
358
359    LoadBuf loadDstFunc_ = nullptr;
360    LoadBufA loadSrcFunc_ = nullptr;
361    NeonBlend blendFunc_ = nullptr;
362    StoreBuf storeDstFunc_ = nullptr;
363    uint8x8_t r1_;
364    uint8x8_t g1_;
365    uint8x8_t b1_;
366    uint8x8_t a1_;
367    uint8x8_t r2_;
368    uint8x8_t g2_;
369    uint8x8_t b2_;
370    uint8x8_t a2_;
371};
372} // namespace OHOS
373#endif
374#endif
375