15bec5421Sopenharmony_cidiff --git a/arm/arm_init.c b/arm/arm_init.c
25bec5421Sopenharmony_ciindex 3a89998ab..05aa2c0d9 100644
35bec5421Sopenharmony_ci--- a/arm/arm_init.c
45bec5421Sopenharmony_ci+++ b/arm/arm_init.c
55bec5421Sopenharmony_ci@@ -113,13 +113,23 @@ png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
65bec5421Sopenharmony_ci     * initialization function.)
75bec5421Sopenharmony_ci     */
85bec5421Sopenharmony_ci    pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
95bec5421Sopenharmony_ci-
105bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
115bec5421Sopenharmony_ci+   // OH ISSUE: png optimize
125bec5421Sopenharmony_ci+   pp->read_filter[PNG_FILTER_VALUE_UP_X2-1] = png_read_filter_row_up_x2_neon;
135bec5421Sopenharmony_ci+#endif
145bec5421Sopenharmony_ci    if (bpp == 3)
155bec5421Sopenharmony_ci    {
165bec5421Sopenharmony_ci       pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
175bec5421Sopenharmony_ci       pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
185bec5421Sopenharmony_ci       pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
195bec5421Sopenharmony_ci          png_read_filter_row_paeth3_neon;
205bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
215bec5421Sopenharmony_ci+      // OH ISSUE: png optimize
225bec5421Sopenharmony_ci+      pp->read_filter[PNG_FILTER_VALUE_AVG_X2-1] =
235bec5421Sopenharmony_ci+         png_read_filter_row_avg3_x2_neon;
245bec5421Sopenharmony_ci+      pp->read_filter[PNG_FILTER_VALUE_PAETH_X2-1] =
255bec5421Sopenharmony_ci+         png_read_filter_row_paeth3_x2_neon;
265bec5421Sopenharmony_ci+#endif
275bec5421Sopenharmony_ci    }
285bec5421Sopenharmony_ci 
295bec5421Sopenharmony_ci    else if (bpp == 4)
305bec5421Sopenharmony_ci@@ -128,6 +138,13 @@ png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
315bec5421Sopenharmony_ci       pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
325bec5421Sopenharmony_ci       pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
335bec5421Sopenharmony_ci           png_read_filter_row_paeth4_neon;
345bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
355bec5421Sopenharmony_ci+      // OH ISSUE: png optimize
365bec5421Sopenharmony_ci+      pp->read_filter[PNG_FILTER_VALUE_AVG_X2-1] =
375bec5421Sopenharmony_ci+         png_read_filter_row_avg4_x2_neon;
385bec5421Sopenharmony_ci+      pp->read_filter[PNG_FILTER_VALUE_PAETH_X2-1] =
395bec5421Sopenharmony_ci+         png_read_filter_row_paeth4_x2_neon;
405bec5421Sopenharmony_ci+#endif
415bec5421Sopenharmony_ci    }
425bec5421Sopenharmony_ci }
435bec5421Sopenharmony_ci #endif /* PNG_ARM_NEON_OPT > 0 */
445bec5421Sopenharmony_cidiff --git a/arm/filter_neon_intrinsics.c b/arm/filter_neon_intrinsics.c
455bec5421Sopenharmony_ciindex 4466d48b2..4ff810a19 100644
465bec5421Sopenharmony_ci--- a/arm/filter_neon_intrinsics.c
475bec5421Sopenharmony_ci+++ b/arm/filter_neon_intrinsics.c
485bec5421Sopenharmony_ci@@ -47,6 +47,7 @@
495bec5421Sopenharmony_ci 
505bec5421Sopenharmony_ci #if PNG_ARM_NEON_OPT > 0
515bec5421Sopenharmony_ci 
525bec5421Sopenharmony_ci+#ifndef PNG_MULTY_LINE_ENABLE
535bec5421Sopenharmony_ci void
545bec5421Sopenharmony_ci png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row,
555bec5421Sopenharmony_ci    png_const_bytep prev_row)
565bec5421Sopenharmony_ci@@ -396,7 +397,1351 @@ png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row,
575bec5421Sopenharmony_ci       vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0);
585bec5421Sopenharmony_ci    }
595bec5421Sopenharmony_ci }
605bec5421Sopenharmony_ci+#else
615bec5421Sopenharmony_ci+// OH ISSUE: png optimize
625bec5421Sopenharmony_ci+// according to definition: row_info->rowbytes = row_width * row_info->channels,
635bec5421Sopenharmony_ci+// the input rowbytes must be 3 or 4 times the channel size, so:
645bec5421Sopenharmony_ci+// for RGB neon process 12 bytes at once,the tail must be 3,6,9;
655bec5421Sopenharmony_ci+// for RGBA neon process 16 or 8 bytes at once,the tail must be 4;
665bec5421Sopenharmony_ci+// filter operators are internal function, row_info and row ensure non empty outside.
675bec5421Sopenharmony_ci+#define STEP_RGB (12) // 3 channel RGB process 12 bytes at once
685bec5421Sopenharmony_ci+#define TAIL_RGB3 (9) // tail 3 pixels have 9 bytes
695bec5421Sopenharmony_ci+#define TAIL_RGB2 (6) // tail 2 pixels have 6 bytes
705bec5421Sopenharmony_ci+#define TAIL_RGB1 (3) // tail 1 pixel have 3 bytes
715bec5421Sopenharmony_ci+#define STEP_RGBA (16) // GBA neon process 16 bytes at once
725bec5421Sopenharmony_ci+#define STEP_RGBA_HALF (8) // GBA neon process 8 bytes at once
735bec5421Sopenharmony_ci+#define TAIL_RGBA (4) // tail 1 pixel have 4 bytes
745bec5421Sopenharmony_ci+#define IND3 (3) // index 3
755bec5421Sopenharmony_ci+#define IND2 (2) // index 2
765bec5421Sopenharmony_ci+#define OFFSET3 (3) // RGB offset 3 bytes
775bec5421Sopenharmony_ci+#define OFFSET6 (6) // RGB offset 6 bytes
785bec5421Sopenharmony_ci+void png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row,
795bec5421Sopenharmony_ci+   png_const_bytep prev_row)
805bec5421Sopenharmony_ci+{
815bec5421Sopenharmony_ci+   png_bytep rp = row;
825bec5421Sopenharmony_ci+   png_const_bytep pp = prev_row;
835bec5421Sopenharmony_ci+   int count = row_info->rowbytes;
845bec5421Sopenharmony_ci+
855bec5421Sopenharmony_ci+   png_debug(1, "in png_read_filter_row_up_neon");
865bec5421Sopenharmony_ci+
875bec5421Sopenharmony_ci+   uint8x16_t qrp, qpp;
885bec5421Sopenharmony_ci+   while (count >= STEP_RGBA) {
895bec5421Sopenharmony_ci+      qrp = vld1q_u8(rp);
905bec5421Sopenharmony_ci+      qpp = vld1q_u8(pp);
915bec5421Sopenharmony_ci+      qrp = vaddq_u8(qrp, qpp);
925bec5421Sopenharmony_ci+      vst1q_u8(rp, qrp);
935bec5421Sopenharmony_ci+      rp += STEP_RGBA;
945bec5421Sopenharmony_ci+      pp += STEP_RGBA;
955bec5421Sopenharmony_ci+      count -= STEP_RGBA;
965bec5421Sopenharmony_ci+   }
975bec5421Sopenharmony_ci+
985bec5421Sopenharmony_ci+   if (count >= STEP_RGBA_HALF) {
995bec5421Sopenharmony_ci+      uint8x8_t qrp1, qpp1;
1005bec5421Sopenharmony_ci+      qrp1 = vld1_u8(rp);
1015bec5421Sopenharmony_ci+      qpp1 = vld1_u8(pp);
1025bec5421Sopenharmony_ci+      qrp1 = vadd_u8(qrp1, qpp1);
1035bec5421Sopenharmony_ci+      vst1_u8(rp, qrp1);
1045bec5421Sopenharmony_ci+      rp += STEP_RGBA_HALF;
1055bec5421Sopenharmony_ci+      pp += STEP_RGBA_HALF;
1065bec5421Sopenharmony_ci+      count -= STEP_RGBA_HALF;
1075bec5421Sopenharmony_ci+   }
1085bec5421Sopenharmony_ci+
1095bec5421Sopenharmony_ci+   for (int i = 0; i < count; i++) {
1105bec5421Sopenharmony_ci+      *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
1115bec5421Sopenharmony_ci+      rp++;
1125bec5421Sopenharmony_ci+   }
1135bec5421Sopenharmony_ci+}
1145bec5421Sopenharmony_ci+
1155bec5421Sopenharmony_ci+void png_read_filter_row_up_x2_neon(png_row_infop row_info, png_bytep row,
1165bec5421Sopenharmony_ci+   png_const_bytep prev_row)
1175bec5421Sopenharmony_ci+{
1185bec5421Sopenharmony_ci+   png_bytep rp = row;
1195bec5421Sopenharmony_ci+   png_const_bytep pp = prev_row;
1205bec5421Sopenharmony_ci+   int count = row_info->rowbytes;
1215bec5421Sopenharmony_ci+   png_bytep np = row + row_info->rowbytes + 1;
1225bec5421Sopenharmony_ci+
1235bec5421Sopenharmony_ci+   png_debug(1, "in png_read_filter_row_up_x2_neon");
1245bec5421Sopenharmony_ci+
1255bec5421Sopenharmony_ci+   uint8x16_t qrp, qpp, qnp;
1265bec5421Sopenharmony_ci+   while (count >= STEP_RGBA) {
1275bec5421Sopenharmony_ci+      qrp = vld1q_u8(rp);
1285bec5421Sopenharmony_ci+      qpp = vld1q_u8(pp);
1295bec5421Sopenharmony_ci+      qnp = vld1q_u8(np);
1305bec5421Sopenharmony_ci+      qrp = vaddq_u8(qrp, qpp);
1315bec5421Sopenharmony_ci+      qnp = vaddq_u8(qnp, qrp);
1325bec5421Sopenharmony_ci+      vst1q_u8(rp, qrp);
1335bec5421Sopenharmony_ci+      vst1q_u8(np, qnp);
1345bec5421Sopenharmony_ci+      rp += STEP_RGBA;
1355bec5421Sopenharmony_ci+      pp += STEP_RGBA;
1365bec5421Sopenharmony_ci+      np += STEP_RGBA;
1375bec5421Sopenharmony_ci+      count -= STEP_RGBA;
1385bec5421Sopenharmony_ci+   }
1395bec5421Sopenharmony_ci+
1405bec5421Sopenharmony_ci+   if (count >= STEP_RGBA_HALF) {
1415bec5421Sopenharmony_ci+      uint8x8_t qrp1, qpp1, qnp1;
1425bec5421Sopenharmony_ci+      qrp1 = vld1_u8(rp);
1435bec5421Sopenharmony_ci+      qpp1 = vld1_u8(pp);
1445bec5421Sopenharmony_ci+      qnp1 = vld1_u8(np);
1455bec5421Sopenharmony_ci+      qrp1 = vadd_u8(qrp1, qpp1);
1465bec5421Sopenharmony_ci+      qnp1 = vadd_u8(qnp1, qrp1);
1475bec5421Sopenharmony_ci+      vst1_u8(rp, qrp1);
1485bec5421Sopenharmony_ci+      vst1_u8(np, qnp1);
1495bec5421Sopenharmony_ci+      rp += STEP_RGBA_HALF;
1505bec5421Sopenharmony_ci+      pp += STEP_RGBA_HALF;
1515bec5421Sopenharmony_ci+      np += STEP_RGBA_HALF;
1525bec5421Sopenharmony_ci+      count -= STEP_RGBA_HALF;
1535bec5421Sopenharmony_ci+   }
1545bec5421Sopenharmony_ci+
1555bec5421Sopenharmony_ci+   for (int i = 0; i < count; i++) {
1565bec5421Sopenharmony_ci+      *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
1575bec5421Sopenharmony_ci+      *np = (png_byte)(((int)(*np) + (int)(*rp++)) & 0xff);
1585bec5421Sopenharmony_ci+      np++;
1595bec5421Sopenharmony_ci+   }
1605bec5421Sopenharmony_ci+}
1615bec5421Sopenharmony_ci+
1625bec5421Sopenharmony_ci+void png_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row,
1635bec5421Sopenharmony_ci+   png_const_bytep prev_row)
1645bec5421Sopenharmony_ci+{
1655bec5421Sopenharmony_ci+   png_bytep rp = row;
1665bec5421Sopenharmony_ci+   png_bytep rp_stop = row + row_info->rowbytes;
1675bec5421Sopenharmony_ci+
1685bec5421Sopenharmony_ci+   uint8x16_t vtmp = vld1q_u8(rp);
1695bec5421Sopenharmony_ci+   uint8x8x2_t *vrpt = png_ptr(uint8x8x2_t, &vtmp);
1705bec5421Sopenharmony_ci+   uint8x8x2_t vrp = *vrpt;
1715bec5421Sopenharmony_ci+
1725bec5421Sopenharmony_ci+   uint8x8x4_t vdest;
1735bec5421Sopenharmony_ci+   vdest.val[IND3] = vdup_n_u8(0);
1745bec5421Sopenharmony_ci+
1755bec5421Sopenharmony_ci+   uint8x8_t vtmp1, vtmp2;
1765bec5421Sopenharmony_ci+   uint32x2_t *temp_pointer;
1775bec5421Sopenharmony_ci+
1785bec5421Sopenharmony_ci+   png_debug(1, "in png_read_filter_row_sub3_neon");
1795bec5421Sopenharmony_ci+
1805bec5421Sopenharmony_ci+   size_t tail_bytes = row_info->rowbytes % STEP_RGB;
1815bec5421Sopenharmony_ci+   png_byte last_byte = *rp_stop;
1825bec5421Sopenharmony_ci+   png_bytep rp_stop_new = rp_stop - tail_bytes;
1835bec5421Sopenharmony_ci+   for (; rp < rp_stop_new;)
1845bec5421Sopenharmony_ci+   {
1855bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
1865bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[IND3], vrp.val[0]);
1875bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6);
1885bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[0], vtmp1);
1895bec5421Sopenharmony_ci+
1905bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
1915bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[1], vtmp2);
1925bec5421Sopenharmony_ci+      vdest.val[IND3] = vadd_u8(vdest.val[IND2], vtmp1);
1935bec5421Sopenharmony_ci+
1945bec5421Sopenharmony_ci+      vtmp = vld1q_u8(rp + STEP_RGB);
1955bec5421Sopenharmony_ci+      vrpt = png_ptr(uint8x8x2_t, &vtmp);
1965bec5421Sopenharmony_ci+      vrp = *vrpt;
1975bec5421Sopenharmony_ci+
1985bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
1995bec5421Sopenharmony_ci+      rp += OFFSET3;
2005bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
2015bec5421Sopenharmony_ci+      rp += OFFSET3;
2025bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0);
2035bec5421Sopenharmony_ci+      rp += OFFSET3;
2045bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND3]), 0);
2055bec5421Sopenharmony_ci+      rp += OFFSET3;
2065bec5421Sopenharmony_ci+   }
2075bec5421Sopenharmony_ci+
2085bec5421Sopenharmony_ci+   if (tail_bytes == TAIL_RGB1) {
2095bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[IND3], vrp.val[0]);
2105bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
2115bec5421Sopenharmony_ci+   } else if (tail_bytes == TAIL_RGB2) {
2125bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
2135bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[IND3], vrp.val[0]);
2145bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[0], vtmp1);
2155bec5421Sopenharmony_ci+
2165bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
2175bec5421Sopenharmony_ci+      rp += OFFSET3;
2185bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
2195bec5421Sopenharmony_ci+   } else if (tail_bytes == TAIL_RGB3) {
2205bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
2215bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[IND3], vrp.val[0]);
2225bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6);
2235bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[0], vtmp1);
2245bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[1], vtmp2);
2255bec5421Sopenharmony_ci+
2265bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
2275bec5421Sopenharmony_ci+      rp += OFFSET3;
2285bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
2295bec5421Sopenharmony_ci+      rp += OFFSET3;
2305bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0);
2315bec5421Sopenharmony_ci+   }
2325bec5421Sopenharmony_ci+   *rp_stop = last_byte;
2335bec5421Sopenharmony_ci+
2345bec5421Sopenharmony_ci+   PNG_UNUSED(prev_row)
2355bec5421Sopenharmony_ci+}
2365bec5421Sopenharmony_ci+
2375bec5421Sopenharmony_ci+void png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row,
2385bec5421Sopenharmony_ci+   png_const_bytep prev_row)
2395bec5421Sopenharmony_ci+{
2405bec5421Sopenharmony_ci+   png_bytep rp = row;
2415bec5421Sopenharmony_ci+   int count = row_info->rowbytes;
2425bec5421Sopenharmony_ci+
2435bec5421Sopenharmony_ci+   uint8x8x4_t vdest;
2445bec5421Sopenharmony_ci+   vdest.val[IND3] = vdup_n_u8(0);
2455bec5421Sopenharmony_ci+
2465bec5421Sopenharmony_ci+   png_debug(1, "in png_read_filter_row_sub4_neon");
2475bec5421Sopenharmony_ci+
2485bec5421Sopenharmony_ci+   uint32x2x4_t vtmp;
2495bec5421Sopenharmony_ci+   uint8x8x4_t *vrpt;
2505bec5421Sopenharmony_ci+   uint8x8x4_t vrp;
2515bec5421Sopenharmony_ci+   uint32x2x4_t vdest_val;
2525bec5421Sopenharmony_ci+   while (count >= STEP_RGBA) {
2535bec5421Sopenharmony_ci+      uint32x2x4_t *temp_pointer;
2545bec5421Sopenharmony_ci+      vtmp = vld4_u32(png_ptr(uint32_t, rp));
2555bec5421Sopenharmony_ci+      vrpt = png_ptr(uint8x8x4_t, &vtmp);
2565bec5421Sopenharmony_ci+      vrp = *vrpt;
2575bec5421Sopenharmony_ci+
2585bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[IND3], vrp.val[0]);
2595bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[0], vrp.val[1]);
2605bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[1], vrp.val[IND2]);
2615bec5421Sopenharmony_ci+      vdest.val[IND3] = vadd_u8(vdest.val[IND2], vrp.val[IND3]);
2625bec5421Sopenharmony_ci+
2635bec5421Sopenharmony_ci+      vdest_val = png_ldr(uint32x2x4_t, &vdest);
2645bec5421Sopenharmony_ci+      vst4_lane_u32(png_ptr(uint32_t, rp), vdest_val, 0);
2655bec5421Sopenharmony_ci+
2665bec5421Sopenharmony_ci+      rp += STEP_RGBA;
2675bec5421Sopenharmony_ci+      count -= STEP_RGBA;
2685bec5421Sopenharmony_ci+   }
2695bec5421Sopenharmony_ci+
2705bec5421Sopenharmony_ci+   if (count >= STEP_RGBA_HALF) {
2715bec5421Sopenharmony_ci+      uint32x2x2_t vtmp1 = vld2_u32(png_ptr(uint32_t, rp));
2725bec5421Sopenharmony_ci+      uint8x8x2_t *vrpt1 = png_ptr(uint8x8x2_t, &vtmp1);
2735bec5421Sopenharmony_ci+      uint8x8x2_t vrp1 = *vrpt1;
2745bec5421Sopenharmony_ci+      uint32x2x2_t *temp_pointer;
2755bec5421Sopenharmony_ci+      uint32x2x2_t vdest_val1;
2765bec5421Sopenharmony_ci+
2775bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[IND3], vrp1.val[0]);
2785bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[0], vrp1.val[1]);
2795bec5421Sopenharmony_ci+      vdest.val[IND3] = vdest.val[1];
2805bec5421Sopenharmony_ci+      vdest_val1 = png_ldr(uint32x2x2_t, &vdest);
2815bec5421Sopenharmony_ci+      vst2_lane_u32(png_ptr(uint32_t, rp), vdest_val1, 0);
2825bec5421Sopenharmony_ci+
2835bec5421Sopenharmony_ci+      rp += STEP_RGBA_HALF;
2845bec5421Sopenharmony_ci+      count -= STEP_RGBA_HALF;
2855bec5421Sopenharmony_ci+   }
2865bec5421Sopenharmony_ci+
2875bec5421Sopenharmony_ci+   if (count == 0) {
2885bec5421Sopenharmony_ci+      return;
2895bec5421Sopenharmony_ci+   }
2905bec5421Sopenharmony_ci+
2915bec5421Sopenharmony_ci+   uint32x2_t vtmp2 = vld1_u32(png_ptr(uint32_t, rp));
2925bec5421Sopenharmony_ci+   uint8x8_t *vrpt2 = png_ptr(uint8x8_t, &vtmp2);
2935bec5421Sopenharmony_ci+   uint8x8_t vrp2 = *vrpt2;
2945bec5421Sopenharmony_ci+   uint32x2_t *temp_pointer;
2955bec5421Sopenharmony_ci+   uint32x2_t vdest_val2;
2965bec5421Sopenharmony_ci+
2975bec5421Sopenharmony_ci+   vdest.val[0] = vadd_u8(vdest.val[IND3], vrp2);
2985bec5421Sopenharmony_ci+   vdest_val2 = png_ldr(uint32x2_t, &vdest);
2995bec5421Sopenharmony_ci+   vst1_lane_u32(png_ptr(uint32_t, rp), vdest_val2, 0);
3005bec5421Sopenharmony_ci+
3015bec5421Sopenharmony_ci+   PNG_UNUSED(prev_row)
3025bec5421Sopenharmony_ci+}
3035bec5421Sopenharmony_ci+
3045bec5421Sopenharmony_ci+void png_read_filter_row_avg3_neon(png_row_infop row_info, png_bytep row,
3055bec5421Sopenharmony_ci+   png_const_bytep prev_row)
3065bec5421Sopenharmony_ci+{
3075bec5421Sopenharmony_ci+   png_bytep rp = row;
3085bec5421Sopenharmony_ci+   png_const_bytep pp = prev_row;
3095bec5421Sopenharmony_ci+   png_bytep rp_stop = row + row_info->rowbytes;
3105bec5421Sopenharmony_ci+
3115bec5421Sopenharmony_ci+   uint8x16_t vtmp;
3125bec5421Sopenharmony_ci+   uint8x8x2_t *vrpt;
3135bec5421Sopenharmony_ci+   uint8x8x2_t vrp;
3145bec5421Sopenharmony_ci+   uint8x8x4_t vdest;
3155bec5421Sopenharmony_ci+   vdest.val[IND3] = vdup_n_u8(0);
3165bec5421Sopenharmony_ci+
3175bec5421Sopenharmony_ci+   vtmp = vld1q_u8(rp);
3185bec5421Sopenharmony_ci+   vrpt = png_ptr(uint8x8x2_t, &vtmp);
3195bec5421Sopenharmony_ci+   vrp = *vrpt;
3205bec5421Sopenharmony_ci+
3215bec5421Sopenharmony_ci+   png_debug(1, "in png_read_filter_row_avg3_neon");
3225bec5421Sopenharmony_ci+
3235bec5421Sopenharmony_ci+   uint8x8_t vtmp1, vtmp2, vtmp3;
3245bec5421Sopenharmony_ci+   uint8x8x2_t *vppt;
3255bec5421Sopenharmony_ci+   uint8x8x2_t vpp;
3265bec5421Sopenharmony_ci+   uint32x2_t *temp_pointer;
3275bec5421Sopenharmony_ci+
3285bec5421Sopenharmony_ci+   size_t tail_bytes = row_info->rowbytes % STEP_RGB;
3295bec5421Sopenharmony_ci+   png_byte last_byte = *rp_stop;
3305bec5421Sopenharmony_ci+   png_bytep rp_stop_new = rp_stop - tail_bytes;
3315bec5421Sopenharmony_ci+   for (; rp < rp_stop_new; pp += STEP_RGB)
3325bec5421Sopenharmony_ci+   {
3335bec5421Sopenharmony_ci+      vtmp = vld1q_u8(pp);
3345bec5421Sopenharmony_ci+      vppt = png_ptr(uint8x8x2_t, &vtmp);
3355bec5421Sopenharmony_ci+      vpp = *vppt;
3365bec5421Sopenharmony_ci+
3375bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
3385bec5421Sopenharmony_ci+      vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]);
3395bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
3405bec5421Sopenharmony_ci+
3415bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3);
3425bec5421Sopenharmony_ci+      vtmp3 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6);
3435bec5421Sopenharmony_ci+      vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2);
3445bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
3455bec5421Sopenharmony_ci+
3465bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6);
3475bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
3485bec5421Sopenharmony_ci+
3495bec5421Sopenharmony_ci+      vtmp = vld1q_u8(rp + STEP_RGB);
3505bec5421Sopenharmony_ci+      vrpt = png_ptr(uint8x8x2_t, &vtmp);
3515bec5421Sopenharmony_ci+      vrp = *vrpt;
3525bec5421Sopenharmony_ci+
3535bec5421Sopenharmony_ci+      vdest.val[IND2] = vhadd_u8(vdest.val[1], vtmp2);
3545bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp3);
3555bec5421Sopenharmony_ci+
3565bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
3575bec5421Sopenharmony_ci+
3585bec5421Sopenharmony_ci+      vdest.val[IND3] = vhadd_u8(vdest.val[IND2], vtmp2);
3595bec5421Sopenharmony_ci+      vdest.val[IND3] = vadd_u8(vdest.val[IND3], vtmp1);
3605bec5421Sopenharmony_ci+
3615bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
3625bec5421Sopenharmony_ci+      rp += OFFSET3;
3635bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
3645bec5421Sopenharmony_ci+      rp += OFFSET3;
3655bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0);
3665bec5421Sopenharmony_ci+      rp += OFFSET3;
3675bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND3]), 0);
3685bec5421Sopenharmony_ci+      rp += OFFSET3;
3695bec5421Sopenharmony_ci+   }
3705bec5421Sopenharmony_ci+
3715bec5421Sopenharmony_ci+   vtmp = vld1q_u8(pp);
3725bec5421Sopenharmony_ci+   vppt = png_ptr(uint8x8x2_t, &vtmp);
3735bec5421Sopenharmony_ci+   vpp = *vppt;
3745bec5421Sopenharmony_ci+
3755bec5421Sopenharmony_ci+   if (tail_bytes == TAIL_RGB1) {
3765bec5421Sopenharmony_ci+      vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]);
3775bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
3785bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
3795bec5421Sopenharmony_ci+   } else if (tail_bytes == TAIL_RGB2) {
3805bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
3815bec5421Sopenharmony_ci+      vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]);
3825bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
3835bec5421Sopenharmony_ci+
3845bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3);
3855bec5421Sopenharmony_ci+      vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2);
3865bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
3875bec5421Sopenharmony_ci+
3885bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
3895bec5421Sopenharmony_ci+      rp += OFFSET3;
3905bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
3915bec5421Sopenharmony_ci+   } else if (tail_bytes == TAIL_RGB3) {
3925bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
3935bec5421Sopenharmony_ci+      vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]);
3945bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
3955bec5421Sopenharmony_ci+
3965bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3);
3975bec5421Sopenharmony_ci+      vtmp3 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6);
3985bec5421Sopenharmony_ci+      vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2);
3995bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
4005bec5421Sopenharmony_ci+
4015bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6);
4025bec5421Sopenharmony_ci+
4035bec5421Sopenharmony_ci+      vdest.val[IND2] = vhadd_u8(vdest.val[1], vtmp2);
4045bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp3);
4055bec5421Sopenharmony_ci+
4065bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
4075bec5421Sopenharmony_ci+      rp += OFFSET3;
4085bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
4095bec5421Sopenharmony_ci+      rp += OFFSET3;
4105bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0);
4115bec5421Sopenharmony_ci+   }
4125bec5421Sopenharmony_ci+   *rp_stop = last_byte;
4135bec5421Sopenharmony_ci+}
4145bec5421Sopenharmony_ci+
4155bec5421Sopenharmony_ci+void png_read_filter_row_avg3_x2_neon(png_row_infop row_info, png_bytep row,
4165bec5421Sopenharmony_ci+   png_const_bytep prev_row)
4175bec5421Sopenharmony_ci+{
4185bec5421Sopenharmony_ci+   png_bytep rp = row;
4195bec5421Sopenharmony_ci+   png_const_bytep pp = prev_row;
4205bec5421Sopenharmony_ci+   png_bytep rp_stop = row + row_info->rowbytes;
4215bec5421Sopenharmony_ci+   png_bytep np = rp_stop + 1;
4225bec5421Sopenharmony_ci+
4235bec5421Sopenharmony_ci+   uint8x16_t vtmp;
4245bec5421Sopenharmony_ci+   uint8x8x2_t *vrpt;
4255bec5421Sopenharmony_ci+   uint8x8x2_t vrp;
4265bec5421Sopenharmony_ci+   uint8x8x4_t vdest;
4275bec5421Sopenharmony_ci+   vdest.val[IND3] = vdup_n_u8(0);
4285bec5421Sopenharmony_ci+
4295bec5421Sopenharmony_ci+   vtmp = vld1q_u8(rp);
4305bec5421Sopenharmony_ci+   vrpt = png_ptr(uint8x8x2_t, &vtmp);
4315bec5421Sopenharmony_ci+   vrp = *vrpt;
4325bec5421Sopenharmony_ci+
4335bec5421Sopenharmony_ci+   uint8x8x2_t *vnpt;
4345bec5421Sopenharmony_ci+   uint8x8x2_t vnp;
4355bec5421Sopenharmony_ci+   uint8x8x4_t vdestN;
4365bec5421Sopenharmony_ci+   vdestN.val[IND3] = vdup_n_u8(0);
4375bec5421Sopenharmony_ci+
4385bec5421Sopenharmony_ci+   vtmp = vld1q_u8(np);
4395bec5421Sopenharmony_ci+   vnpt = png_ptr(uint8x8x2_t, &vtmp);
4405bec5421Sopenharmony_ci+   vnp = *vnpt;
4415bec5421Sopenharmony_ci+
4425bec5421Sopenharmony_ci+   png_debug(1, "in png_read_filter_row_x2_avg3_neon");
4435bec5421Sopenharmony_ci+
4445bec5421Sopenharmony_ci+   uint8x8_t vtmp1, vtmp2, vtmp3;
4455bec5421Sopenharmony_ci+   uint8x8x2_t *vppt;
4465bec5421Sopenharmony_ci+   uint8x8x2_t vpp;
4475bec5421Sopenharmony_ci+   uint32x2_t *temp_pointer;
4485bec5421Sopenharmony_ci+
4495bec5421Sopenharmony_ci+   size_t tail_bytes = row_info->rowbytes % STEP_RGB;
4505bec5421Sopenharmony_ci+   png_byte last_byte = *rp_stop;
4515bec5421Sopenharmony_ci+   png_byte last_byte_next = *(rp_stop + row_info->rowbytes + 1);
4525bec5421Sopenharmony_ci+   png_bytep rp_stop_new = rp_stop - tail_bytes;
4535bec5421Sopenharmony_ci+   for (; rp < rp_stop_new; pp += STEP_RGB)
4545bec5421Sopenharmony_ci+   {
4555bec5421Sopenharmony_ci+      vtmp = vld1q_u8(pp);
4565bec5421Sopenharmony_ci+      vppt = png_ptr(uint8x8x2_t, &vtmp);
4575bec5421Sopenharmony_ci+      vpp = *vppt;
4585bec5421Sopenharmony_ci+
4595bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
4605bec5421Sopenharmony_ci+      vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]);
4615bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
4625bec5421Sopenharmony_ci+
4635bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3);
4645bec5421Sopenharmony_ci+      vtmp3 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6);
4655bec5421Sopenharmony_ci+      vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2);
4665bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
4675bec5421Sopenharmony_ci+
4685bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6);
4695bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
4705bec5421Sopenharmony_ci+
4715bec5421Sopenharmony_ci+      vtmp = vld1q_u8(rp + STEP_RGB);
4725bec5421Sopenharmony_ci+      vrpt = png_ptr(uint8x8x2_t, &vtmp);
4735bec5421Sopenharmony_ci+      vrp = *vrpt;
4745bec5421Sopenharmony_ci+
4755bec5421Sopenharmony_ci+      vdest.val[IND2] = vhadd_u8(vdest.val[1], vtmp2);
4765bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp3);
4775bec5421Sopenharmony_ci+
4785bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
4795bec5421Sopenharmony_ci+
4805bec5421Sopenharmony_ci+      vdest.val[IND3] = vhadd_u8(vdest.val[IND2], vtmp2);
4815bec5421Sopenharmony_ci+      vdest.val[IND3] = vadd_u8(vdest.val[IND3], vtmp1);
4825bec5421Sopenharmony_ci+
4835bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
4845bec5421Sopenharmony_ci+      rp += OFFSET3;
4855bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
4865bec5421Sopenharmony_ci+      rp += OFFSET3;
4875bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0);
4885bec5421Sopenharmony_ci+      rp += OFFSET3;
4895bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND3]), 0);
4905bec5421Sopenharmony_ci+      rp += OFFSET3;
4915bec5421Sopenharmony_ci+
4925bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET3);
4935bec5421Sopenharmony_ci+      vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]);
4945bec5421Sopenharmony_ci+      vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]);
4955bec5421Sopenharmony_ci+
4965bec5421Sopenharmony_ci+      vtmp3 = vext_u8(vnp.val[0], vnp.val[1], OFFSET6);
4975bec5421Sopenharmony_ci+      vdestN.val[1] = vhadd_u8(vdestN.val[0], vdest.val[1]);
4985bec5421Sopenharmony_ci+      vdestN.val[1] = vadd_u8(vdestN.val[1], vtmp1);
4995bec5421Sopenharmony_ci+
5005bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vnp.val[1], vnp.val[1], 1);
5015bec5421Sopenharmony_ci+
5025bec5421Sopenharmony_ci+      vtmp = vld1q_u8(np + STEP_RGB);
5035bec5421Sopenharmony_ci+      vnpt = png_ptr(uint8x8x2_t, &vtmp);
5045bec5421Sopenharmony_ci+      vnp = *vnpt;
5055bec5421Sopenharmony_ci+
5065bec5421Sopenharmony_ci+      vdestN.val[IND2] = vhadd_u8(vdestN.val[1], vdest.val[IND2]);
5075bec5421Sopenharmony_ci+      vdestN.val[IND2] = vadd_u8(vdestN.val[IND2], vtmp3);
5085bec5421Sopenharmony_ci+
5095bec5421Sopenharmony_ci+      vdestN.val[IND3] = vhadd_u8(vdestN.val[IND2], vdest.val[IND3]);
5105bec5421Sopenharmony_ci+      vdestN.val[IND3] = vadd_u8(vdestN.val[IND3], vtmp1);
5115bec5421Sopenharmony_ci+
5125bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0);
5135bec5421Sopenharmony_ci+      np += OFFSET3;
5145bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[1]), 0);
5155bec5421Sopenharmony_ci+      np += OFFSET3;
5165bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[IND2]), 0);
5175bec5421Sopenharmony_ci+      np += OFFSET3;
5185bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[IND3]), 0);
5195bec5421Sopenharmony_ci+      np += OFFSET3;
5205bec5421Sopenharmony_ci+   }
5215bec5421Sopenharmony_ci+
5225bec5421Sopenharmony_ci+   vtmp = vld1q_u8(pp);
5235bec5421Sopenharmony_ci+   vppt = png_ptr(uint8x8x2_t, &vtmp);
5245bec5421Sopenharmony_ci+   vpp = *vppt;
5255bec5421Sopenharmony_ci+
5265bec5421Sopenharmony_ci+   if (tail_bytes == TAIL_RGB1) {
5275bec5421Sopenharmony_ci+      vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]);
5285bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
5295bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
5305bec5421Sopenharmony_ci+
5315bec5421Sopenharmony_ci+      vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]);
5325bec5421Sopenharmony_ci+      vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]);
5335bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0);
5345bec5421Sopenharmony_ci+   } else if (tail_bytes == TAIL_RGB2) {
5355bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
5365bec5421Sopenharmony_ci+      vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]);
5375bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
5385bec5421Sopenharmony_ci+
5395bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3);
5405bec5421Sopenharmony_ci+      vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2);
5415bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
5425bec5421Sopenharmony_ci+
5435bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
5445bec5421Sopenharmony_ci+      rp += OFFSET3;
5455bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
5465bec5421Sopenharmony_ci+
5475bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET3);
5485bec5421Sopenharmony_ci+      vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]);
5495bec5421Sopenharmony_ci+      vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]);
5505bec5421Sopenharmony_ci+
5515bec5421Sopenharmony_ci+      vdestN.val[1] = vhadd_u8(vdestN.val[0], vdest.val[1]);
5525bec5421Sopenharmony_ci+      vdestN.val[1] = vadd_u8(vdestN.val[1], vtmp1);
5535bec5421Sopenharmony_ci+
5545bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0);
5555bec5421Sopenharmony_ci+      np += OFFSET3;
5565bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[1]), 0);
5575bec5421Sopenharmony_ci+   } else if (tail_bytes == TAIL_RGB3) {
5585bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
5595bec5421Sopenharmony_ci+      vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]);
5605bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
5615bec5421Sopenharmony_ci+
5625bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3);
5635bec5421Sopenharmony_ci+      vtmp3 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6);
5645bec5421Sopenharmony_ci+      vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2);
5655bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
5665bec5421Sopenharmony_ci+
5675bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6);
5685bec5421Sopenharmony_ci+
5695bec5421Sopenharmony_ci+      vdest.val[IND2] = vhadd_u8(vdest.val[1], vtmp2);
5705bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp3);
5715bec5421Sopenharmony_ci+
5725bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
5735bec5421Sopenharmony_ci+      rp += OFFSET3;
5745bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
5755bec5421Sopenharmony_ci+      rp += OFFSET3;
5765bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0);
5775bec5421Sopenharmony_ci+
5785bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET3);
5795bec5421Sopenharmony_ci+      vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]);
5805bec5421Sopenharmony_ci+      vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]);
5815bec5421Sopenharmony_ci+
5825bec5421Sopenharmony_ci+      vtmp3 = vext_u8(vnp.val[0], vnp.val[1], OFFSET6);
5835bec5421Sopenharmony_ci+      vdestN.val[1] = vhadd_u8(vdestN.val[0], vdest.val[1]);
5845bec5421Sopenharmony_ci+      vdestN.val[1] = vadd_u8(vdestN.val[1], vtmp1);
5855bec5421Sopenharmony_ci+
5865bec5421Sopenharmony_ci+      vdestN.val[IND2] = vhadd_u8(vdestN.val[1], vdest.val[IND2]);
5875bec5421Sopenharmony_ci+      vdestN.val[IND2] = vadd_u8(vdestN.val[IND2], vtmp3);
5885bec5421Sopenharmony_ci+
5895bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0);
5905bec5421Sopenharmony_ci+      np += OFFSET3;
5915bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[1]), 0);
5925bec5421Sopenharmony_ci+      np += OFFSET3;
5935bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[IND2]), 0);
5945bec5421Sopenharmony_ci+   }
5955bec5421Sopenharmony_ci+   *rp_stop = last_byte;
5965bec5421Sopenharmony_ci+   *(rp_stop + row_info->rowbytes + 1) = last_byte_next;
5975bec5421Sopenharmony_ci+}
5985bec5421Sopenharmony_ci+
5995bec5421Sopenharmony_ci+void png_read_filter_row_avg4_neon(png_row_infop row_info, png_bytep row,
6005bec5421Sopenharmony_ci+   png_const_bytep prev_row)
6015bec5421Sopenharmony_ci+{
6025bec5421Sopenharmony_ci+   png_bytep rp = row;
6035bec5421Sopenharmony_ci+   png_const_bytep pp = prev_row;
6045bec5421Sopenharmony_ci+   int count = row_info->rowbytes;
6055bec5421Sopenharmony_ci+
6065bec5421Sopenharmony_ci+   uint8x8x4_t vdest;
6075bec5421Sopenharmony_ci+   vdest.val[IND3] = vdup_n_u8(0);
6085bec5421Sopenharmony_ci+
6095bec5421Sopenharmony_ci+   png_debug(1, "in png_read_filter_row_avg4_neon");
6105bec5421Sopenharmony_ci+
6115bec5421Sopenharmony_ci+   uint32x2x4_t vtmp;
6125bec5421Sopenharmony_ci+   uint8x8x4_t *vrpt, *vppt;
6135bec5421Sopenharmony_ci+   uint8x8x4_t vrp, vpp;
6145bec5421Sopenharmony_ci+   uint32x2x4_t vdest_val;
6155bec5421Sopenharmony_ci+   while (count >= STEP_RGBA) {
6165bec5421Sopenharmony_ci+      uint32x2x4_t *temp_pointer;
6175bec5421Sopenharmony_ci+      vtmp = vld4_u32(png_ptr(uint32_t, rp));
6185bec5421Sopenharmony_ci+      vrpt = png_ptr(uint8x8x4_t, &vtmp);
6195bec5421Sopenharmony_ci+      vrp = *vrpt;
6205bec5421Sopenharmony_ci+      vtmp = vld4_u32(png_ptrc(uint32_t, pp));
6215bec5421Sopenharmony_ci+      vppt = png_ptr(uint8x8x4_t, &vtmp);
6225bec5421Sopenharmony_ci+      vpp = *vppt;
6235bec5421Sopenharmony_ci+
6245bec5421Sopenharmony_ci+      vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]);
6255bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
6265bec5421Sopenharmony_ci+      vdest.val[1] = vhadd_u8(vdest.val[0], vpp.val[1]);
6275bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
6285bec5421Sopenharmony_ci+      vdest.val[IND2] = vhadd_u8(vdest.val[1], vpp.val[IND2]);
6295bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[IND2], vrp.val[IND2]);
6305bec5421Sopenharmony_ci+      vdest.val[IND3] = vhadd_u8(vdest.val[IND2], vpp.val[IND3]);
6315bec5421Sopenharmony_ci+      vdest.val[IND3] = vadd_u8(vdest.val[IND3], vrp.val[IND3]);
6325bec5421Sopenharmony_ci+
6335bec5421Sopenharmony_ci+      vdest_val = png_ldr(uint32x2x4_t, &vdest);
6345bec5421Sopenharmony_ci+      vst4_lane_u32(png_ptr(uint32_t, rp), vdest_val, 0);
6355bec5421Sopenharmony_ci+
6365bec5421Sopenharmony_ci+      rp += STEP_RGBA;
6375bec5421Sopenharmony_ci+      pp += STEP_RGBA;
6385bec5421Sopenharmony_ci+      count -= STEP_RGBA;
6395bec5421Sopenharmony_ci+   }
6405bec5421Sopenharmony_ci+
6415bec5421Sopenharmony_ci+   if (count >= STEP_RGBA_HALF) {
6425bec5421Sopenharmony_ci+      uint32x2x2_t vtmp1;
6435bec5421Sopenharmony_ci+      uint8x8x2_t *vrpt1, *vppt1;
6445bec5421Sopenharmony_ci+      uint8x8x2_t vrp1, vpp1;
6455bec5421Sopenharmony_ci+      uint32x2x2_t *temp_pointer;
6465bec5421Sopenharmony_ci+      uint32x2x2_t vdest_val1;
6475bec5421Sopenharmony_ci+
6485bec5421Sopenharmony_ci+      vtmp1 = vld2_u32(png_ptr(uint32_t, rp));
6495bec5421Sopenharmony_ci+      vrpt1 = png_ptr(uint8x8x2_t, &vtmp1);
6505bec5421Sopenharmony_ci+      vrp1 = *vrpt1;
6515bec5421Sopenharmony_ci+      vtmp1 = vld2_u32(png_ptrc(uint32_t, pp));
6525bec5421Sopenharmony_ci+      vppt1 = png_ptr(uint8x8x2_t, &vtmp1);
6535bec5421Sopenharmony_ci+      vpp1 = *vppt1;
6545bec5421Sopenharmony_ci+
6555bec5421Sopenharmony_ci+      vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp1.val[0]);
6565bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp1.val[0]);
6575bec5421Sopenharmony_ci+      vdest.val[1] = vhadd_u8(vdest.val[0], vpp1.val[1]);
6585bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vrp1.val[1]);
6595bec5421Sopenharmony_ci+      vdest.val[IND3] = vdest.val[1];
6605bec5421Sopenharmony_ci+      vdest_val1 = png_ldr(uint32x2x2_t, &vdest);
6615bec5421Sopenharmony_ci+      vst2_lane_u32(png_ptr(uint32_t, rp), vdest_val1, 0);
6625bec5421Sopenharmony_ci+
6635bec5421Sopenharmony_ci+      rp += STEP_RGBA_HALF;
6645bec5421Sopenharmony_ci+      pp += STEP_RGBA_HALF;
6655bec5421Sopenharmony_ci+      count -= STEP_RGBA_HALF;
6665bec5421Sopenharmony_ci+   }
6675bec5421Sopenharmony_ci+
6685bec5421Sopenharmony_ci+   if (count == 0) {
6695bec5421Sopenharmony_ci+      return;
6705bec5421Sopenharmony_ci+   }
6715bec5421Sopenharmony_ci+
6725bec5421Sopenharmony_ci+   uint32x2_t vtmp2;
6735bec5421Sopenharmony_ci+   uint8x8_t *vrpt2, *vppt2;
6745bec5421Sopenharmony_ci+   uint8x8_t vrp2, vpp2;
6755bec5421Sopenharmony_ci+   uint32x2_t *temp_pointer;
6765bec5421Sopenharmony_ci+   uint32x2_t vdest_val2;
6775bec5421Sopenharmony_ci+
6785bec5421Sopenharmony_ci+   vtmp2 = vld1_u32(png_ptr(uint32_t, rp));
6795bec5421Sopenharmony_ci+   vrpt2 = png_ptr(uint8x8_t, &vtmp2);
6805bec5421Sopenharmony_ci+   vrp2 = *vrpt2;
6815bec5421Sopenharmony_ci+   vtmp2 = vld1_u32(png_ptrc(uint32_t, pp));
6825bec5421Sopenharmony_ci+   vppt2 = png_ptr(uint8x8_t, &vtmp2);
6835bec5421Sopenharmony_ci+   vpp2 = *vppt2;
6845bec5421Sopenharmony_ci+
6855bec5421Sopenharmony_ci+   vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp2);
6865bec5421Sopenharmony_ci+   vdest.val[0] = vadd_u8(vdest.val[0], vrp2);
6875bec5421Sopenharmony_ci 
6885bec5421Sopenharmony_ci+   vdest_val2 = png_ldr(uint32x2_t, &vdest);
6895bec5421Sopenharmony_ci+   vst1_lane_u32(png_ptr(uint32_t, rp), vdest_val2, 0);
6905bec5421Sopenharmony_ci+}
6915bec5421Sopenharmony_ci+
6925bec5421Sopenharmony_ci+void png_read_filter_row_avg4_x2_neon(png_row_infop row_info, png_bytep row,
6935bec5421Sopenharmony_ci+   png_const_bytep prev_row)
6945bec5421Sopenharmony_ci+{
6955bec5421Sopenharmony_ci+   png_bytep rp = row;
6965bec5421Sopenharmony_ci+   png_const_bytep pp = prev_row;
6975bec5421Sopenharmony_ci+   int count = row_info->rowbytes;
6985bec5421Sopenharmony_ci+   png_bytep np = row + count + 1;
6995bec5421Sopenharmony_ci+
7005bec5421Sopenharmony_ci+   uint8x8x4_t vdest;
7015bec5421Sopenharmony_ci+   vdest.val[IND3] = vdup_n_u8(0);
7025bec5421Sopenharmony_ci+
7035bec5421Sopenharmony_ci+   png_debug(1, "in png_read_filter_row_avg4_x2_neon");
7045bec5421Sopenharmony_ci+
7055bec5421Sopenharmony_ci+   uint32x2x4_t vtmp;
7065bec5421Sopenharmony_ci+   uint8x8x4_t *vrpt, *vppt;
7075bec5421Sopenharmony_ci+   uint8x8x4_t vrp, vpp;
7085bec5421Sopenharmony_ci+   uint32x2x4_t vdest_val;
7095bec5421Sopenharmony_ci+
7105bec5421Sopenharmony_ci+   uint8x8x4_t *vnpt;
7115bec5421Sopenharmony_ci+   uint8x8x4_t vnp;
7125bec5421Sopenharmony_ci+   uint8x8x4_t vdestN;
7135bec5421Sopenharmony_ci+   vdestN.val[IND3] = vdup_n_u8(0);
7145bec5421Sopenharmony_ci+
7155bec5421Sopenharmony_ci+   while (count >= STEP_RGBA) {
7165bec5421Sopenharmony_ci+      uint32x2x4_t *temp_pointer;
7175bec5421Sopenharmony_ci+      vtmp = vld4_u32(png_ptr(uint32_t, rp));
7185bec5421Sopenharmony_ci+      vrpt = png_ptr(uint8x8x4_t, &vtmp);
7195bec5421Sopenharmony_ci+      vrp = *vrpt;
7205bec5421Sopenharmony_ci+      vtmp = vld4_u32(png_ptrc(uint32_t, pp));
7215bec5421Sopenharmony_ci+      vppt = png_ptr(uint8x8x4_t, &vtmp);
7225bec5421Sopenharmony_ci+      vpp = *vppt;
7235bec5421Sopenharmony_ci+      vtmp = vld4_u32(png_ptrc(uint32_t, np));
7245bec5421Sopenharmony_ci+      vnpt = png_ptr(uint8x8x4_t, &vtmp);
7255bec5421Sopenharmony_ci+      vnp = *vnpt;
7265bec5421Sopenharmony_ci+
7275bec5421Sopenharmony_ci+      vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]);
7285bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
7295bec5421Sopenharmony_ci+      vdest.val[1] = vhadd_u8(vdest.val[0], vpp.val[1]);
7305bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
7315bec5421Sopenharmony_ci+      vdest.val[IND2] = vhadd_u8(vdest.val[1], vpp.val[IND2]);
7325bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[IND2], vrp.val[IND2]);
7335bec5421Sopenharmony_ci+      vdest.val[IND3] = vhadd_u8(vdest.val[IND2], vpp.val[IND3]);
7345bec5421Sopenharmony_ci+      vdest.val[IND3] = vadd_u8(vdest.val[IND3], vrp.val[IND3]);
7355bec5421Sopenharmony_ci+
7365bec5421Sopenharmony_ci+      vdest_val = png_ldr(uint32x2x4_t, &vdest);
7375bec5421Sopenharmony_ci+      vst4_lane_u32(png_ptr(uint32_t, rp), vdest_val, 0);
7385bec5421Sopenharmony_ci+
7395bec5421Sopenharmony_ci+      vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]);
7405bec5421Sopenharmony_ci+      vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]);
7415bec5421Sopenharmony_ci+      vdestN.val[1] = vhadd_u8(vdestN.val[0], vdest.val[1]);
7425bec5421Sopenharmony_ci+      vdestN.val[1] = vadd_u8(vdestN.val[1], vnp.val[1]);
7435bec5421Sopenharmony_ci+      vdestN.val[IND2] = vhadd_u8(vdestN.val[1], vdest.val[IND2]);
7445bec5421Sopenharmony_ci+      vdestN.val[IND2] = vadd_u8(vdestN.val[IND2], vnp.val[IND2]);
7455bec5421Sopenharmony_ci+      vdestN.val[IND3] = vhadd_u8(vdestN.val[IND2], vdest.val[IND3]);
7465bec5421Sopenharmony_ci+      vdestN.val[IND3] = vadd_u8(vdestN.val[IND3], vnp.val[IND3]);
7475bec5421Sopenharmony_ci+
7485bec5421Sopenharmony_ci+      vdest_val = png_ldr(uint32x2x4_t, &vdestN);
7495bec5421Sopenharmony_ci+      vst4_lane_u32(png_ptr(uint32_t, np), vdest_val, 0);
7505bec5421Sopenharmony_ci+
7515bec5421Sopenharmony_ci+      rp += STEP_RGBA;
7525bec5421Sopenharmony_ci+      pp += STEP_RGBA;
7535bec5421Sopenharmony_ci+      np += STEP_RGBA;
7545bec5421Sopenharmony_ci+      count -= STEP_RGBA;
7555bec5421Sopenharmony_ci+   }
7565bec5421Sopenharmony_ci+
7575bec5421Sopenharmony_ci+   if (count >= STEP_RGBA_HALF) {
7585bec5421Sopenharmony_ci+      uint32x2x2_t vtmp1;
7595bec5421Sopenharmony_ci+      uint8x8x2_t *vrpt1, *vppt1, *vnpt1;
7605bec5421Sopenharmony_ci+      uint8x8x2_t vrp1, vpp1, vnp1;
7615bec5421Sopenharmony_ci+      uint32x2x2_t *temp_pointer;
7625bec5421Sopenharmony_ci+      uint32x2x2_t vdest_val1;
7635bec5421Sopenharmony_ci+
7645bec5421Sopenharmony_ci+      vtmp1 = vld2_u32(png_ptr(uint32_t, rp));
7655bec5421Sopenharmony_ci+      vrpt1 = png_ptr(uint8x8x2_t, &vtmp1);
7665bec5421Sopenharmony_ci+      vrp1 = *vrpt1;
7675bec5421Sopenharmony_ci+      vtmp1 = vld2_u32(png_ptrc(uint32_t, pp));
7685bec5421Sopenharmony_ci+      vppt1 = png_ptr(uint8x8x2_t, &vtmp1);
7695bec5421Sopenharmony_ci+      vpp1 = *vppt1;
7705bec5421Sopenharmony_ci+      vtmp1 = vld2_u32(png_ptrc(uint32_t, np));
7715bec5421Sopenharmony_ci+      vnpt1 = png_ptr(uint8x8x2_t, &vtmp1);
7725bec5421Sopenharmony_ci+      vnp1 = *vnpt1;
7735bec5421Sopenharmony_ci+
7745bec5421Sopenharmony_ci+      vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp1.val[0]);
7755bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp1.val[0]);
7765bec5421Sopenharmony_ci+      vdest.val[1] = vhadd_u8(vdest.val[0], vpp1.val[1]);
7775bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vrp1.val[1]);
7785bec5421Sopenharmony_ci+      vdest.val[IND3] = vdest.val[1];
7795bec5421Sopenharmony_ci+      vdest_val1 = png_ldr(uint32x2x2_t, &vdest);
7805bec5421Sopenharmony_ci+      vst2_lane_u32(png_ptr(uint32_t, rp), vdest_val1, 0);
7815bec5421Sopenharmony_ci+
7825bec5421Sopenharmony_ci+      vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]);
7835bec5421Sopenharmony_ci+      vdestN.val[0] = vadd_u8(vdestN.val[0], vnp1.val[0]);
7845bec5421Sopenharmony_ci+      vdestN.val[1] = vhadd_u8(vdestN.val[0], vdest.val[1]);
7855bec5421Sopenharmony_ci+      vdestN.val[1] = vadd_u8(vdestN.val[1], vnp1.val[1]);
7865bec5421Sopenharmony_ci+      vdestN.val[IND3] = vdestN.val[1];
7875bec5421Sopenharmony_ci+      vdest_val1 = png_ldr(uint32x2x2_t, &vdestN);
7885bec5421Sopenharmony_ci+      vst2_lane_u32(png_ptr(uint32_t, np), vdest_val1, 0);
7895bec5421Sopenharmony_ci+
7905bec5421Sopenharmony_ci+      rp += STEP_RGBA_HALF;
7915bec5421Sopenharmony_ci+      pp += STEP_RGBA_HALF;
7925bec5421Sopenharmony_ci+      np += STEP_RGBA_HALF;
7935bec5421Sopenharmony_ci+      count -= STEP_RGBA_HALF;
7945bec5421Sopenharmony_ci+   }
7955bec5421Sopenharmony_ci+
7965bec5421Sopenharmony_ci+   if (count == 0) {
7975bec5421Sopenharmony_ci+      return;
7985bec5421Sopenharmony_ci+   }
7995bec5421Sopenharmony_ci+
8005bec5421Sopenharmony_ci+   uint32x2_t vtmp2;
8015bec5421Sopenharmony_ci+   uint8x8_t *vrpt2, *vppt2, *vnpt2;
8025bec5421Sopenharmony_ci+   uint8x8_t vrp2, vpp2, vnp2;
8035bec5421Sopenharmony_ci+   uint32x2_t *temp_pointer;
8045bec5421Sopenharmony_ci+   uint32x2_t vdest_val2;
8055bec5421Sopenharmony_ci+
8065bec5421Sopenharmony_ci+   vtmp2 = vld1_u32(png_ptr(uint32_t, rp));
8075bec5421Sopenharmony_ci+   vrpt2 = png_ptr(uint8x8_t, &vtmp2);
8085bec5421Sopenharmony_ci+   vrp2 = *vrpt2;
8095bec5421Sopenharmony_ci+   vtmp2 = vld1_u32(png_ptrc(uint32_t, pp));
8105bec5421Sopenharmony_ci+   vppt2 = png_ptr(uint8x8_t, &vtmp2);
8115bec5421Sopenharmony_ci+   vpp2 = *vppt2;
8125bec5421Sopenharmony_ci+   vtmp2 = vld1_u32(png_ptrc(uint32_t, np));
8135bec5421Sopenharmony_ci+   vnpt2 = png_ptr(uint8x8_t, &vtmp2);
8145bec5421Sopenharmony_ci+   vnp2 = *vnpt2;
8155bec5421Sopenharmony_ci+
8165bec5421Sopenharmony_ci+   vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp2);
8175bec5421Sopenharmony_ci+   vdest.val[0] = vadd_u8(vdest.val[0], vrp2);
8185bec5421Sopenharmony_ci+
8195bec5421Sopenharmony_ci+   vdest_val2 = png_ldr(uint32x2_t, &vdest);
8205bec5421Sopenharmony_ci+   vst1_lane_u32(png_ptr(uint32_t, rp), vdest_val2, 0);
8215bec5421Sopenharmony_ci+
8225bec5421Sopenharmony_ci+   vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]);
8235bec5421Sopenharmony_ci+   vdestN.val[0] = vadd_u8(vdestN.val[0], vnp2);
8245bec5421Sopenharmony_ci+
8255bec5421Sopenharmony_ci+   vdest_val2 = png_ldr(uint32x2_t, &vdestN);
8265bec5421Sopenharmony_ci+   vst1_lane_u32(png_ptr(uint32_t, np), vdest_val2, 0);
8275bec5421Sopenharmony_ci+}
8285bec5421Sopenharmony_ci+
8295bec5421Sopenharmony_ci+static uint8x8_t paeth(uint8x8_t a, uint8x8_t b, uint8x8_t c)
8305bec5421Sopenharmony_ci+{
8315bec5421Sopenharmony_ci+   uint8x8_t d, e;
8325bec5421Sopenharmony_ci+   uint16x8_t p1, pa, pb, pc;
8335bec5421Sopenharmony_ci+
8345bec5421Sopenharmony_ci+   p1 = vaddl_u8(a, b); /* a + b */
8355bec5421Sopenharmony_ci+   pc = vaddl_u8(c, c); /* c * 2 */
8365bec5421Sopenharmony_ci+   pa = vabdl_u8(b, c); /* pa */
8375bec5421Sopenharmony_ci+   pb = vabdl_u8(a, c); /* pb */
8385bec5421Sopenharmony_ci+   pc = vabdq_u16(p1, pc); /* pc */
8395bec5421Sopenharmony_ci+
8405bec5421Sopenharmony_ci+   p1 = vcleq_u16(pa, pb); /* pa <= pb */
8415bec5421Sopenharmony_ci+   pa = vcleq_u16(pa, pc); /* pa <= pc */
8425bec5421Sopenharmony_ci+   pb = vcleq_u16(pb, pc); /* pb <= pc */
8435bec5421Sopenharmony_ci+
8445bec5421Sopenharmony_ci+   p1 = vandq_u16(p1, pa); /* pa <= pb && pa <= pc */
8455bec5421Sopenharmony_ci+
8465bec5421Sopenharmony_ci+   d = vmovn_u16(pb);
8475bec5421Sopenharmony_ci+   e = vmovn_u16(p1);
8485bec5421Sopenharmony_ci+
8495bec5421Sopenharmony_ci+   d = vbsl_u8(d, b, c);
8505bec5421Sopenharmony_ci+   e = vbsl_u8(e, a, d);
8515bec5421Sopenharmony_ci+
8525bec5421Sopenharmony_ci+   return e;
8535bec5421Sopenharmony_ci+}
8545bec5421Sopenharmony_ci+
8555bec5421Sopenharmony_ci+void png_read_filter_row_paeth3_neon(png_row_infop row_info, png_bytep row,
8565bec5421Sopenharmony_ci+   png_const_bytep prev_row)
8575bec5421Sopenharmony_ci+{
8585bec5421Sopenharmony_ci+   png_bytep rp = row;
8595bec5421Sopenharmony_ci+   png_const_bytep pp = prev_row;
8605bec5421Sopenharmony_ci+   png_bytep rp_stop = row + row_info->rowbytes;
8615bec5421Sopenharmony_ci+
8625bec5421Sopenharmony_ci+   uint8x16_t vtmp;
8635bec5421Sopenharmony_ci+   uint8x8x2_t *vrpt;
8645bec5421Sopenharmony_ci+   uint8x8x2_t vrp;
8655bec5421Sopenharmony_ci+   uint8x8_t vlast = vdup_n_u8(0);
8665bec5421Sopenharmony_ci+   uint8x8x4_t vdest;
8675bec5421Sopenharmony_ci+   vdest.val[IND3] = vdup_n_u8(0);
8685bec5421Sopenharmony_ci+
8695bec5421Sopenharmony_ci+   vtmp = vld1q_u8(rp);
8705bec5421Sopenharmony_ci+   vrpt = png_ptr(uint8x8x2_t, &vtmp);
8715bec5421Sopenharmony_ci+   vrp = *vrpt;
8725bec5421Sopenharmony_ci+
8735bec5421Sopenharmony_ci+   uint8x8x2_t *vppt;
8745bec5421Sopenharmony_ci+   uint8x8x2_t vpp;
8755bec5421Sopenharmony_ci+   uint8x8_t vtmp1, vtmp2, vtmp3;
8765bec5421Sopenharmony_ci+   uint32x2_t *temp_pointer;
8775bec5421Sopenharmony_ci+
8785bec5421Sopenharmony_ci+   png_debug(1, "in png_read_filter_row_paeth3_neon");
8795bec5421Sopenharmony_ci+
8805bec5421Sopenharmony_ci+   size_t tail_bytes = row_info->rowbytes % STEP_RGB;
8815bec5421Sopenharmony_ci+   png_byte last_byte = *rp_stop;
8825bec5421Sopenharmony_ci+   png_bytep rp_stop_new = rp_stop - tail_bytes;
8835bec5421Sopenharmony_ci+   for (; rp < rp_stop_new; pp += STEP_RGB)
8845bec5421Sopenharmony_ci+   {
8855bec5421Sopenharmony_ci+      vtmp = vld1q_u8(pp);
8865bec5421Sopenharmony_ci+      vppt = png_ptr(uint8x8x2_t, &vtmp);
8875bec5421Sopenharmony_ci+      vpp = *vppt;
8885bec5421Sopenharmony_ci+
8895bec5421Sopenharmony_ci+      vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast);
8905bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
8915bec5421Sopenharmony_ci+
8925bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
8935bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3);
8945bec5421Sopenharmony_ci+      vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]);
8955bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
8965bec5421Sopenharmony_ci+
8975bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6);
8985bec5421Sopenharmony_ci+      vtmp3 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6);
8995bec5421Sopenharmony_ci+      vdest.val[IND2] = paeth(vdest.val[1], vtmp3, vtmp2);
9005bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp1);
9015bec5421Sopenharmony_ci+
9025bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
9035bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
9045bec5421Sopenharmony_ci+
9055bec5421Sopenharmony_ci+      vtmp = vld1q_u8(rp + STEP_RGB);
9065bec5421Sopenharmony_ci+      vrpt = png_ptr(uint8x8x2_t, &vtmp);
9075bec5421Sopenharmony_ci+      vrp = *vrpt;
9085bec5421Sopenharmony_ci+
9095bec5421Sopenharmony_ci+      vdest.val[IND3] = paeth(vdest.val[IND2], vtmp2, vtmp3);
9105bec5421Sopenharmony_ci+      vdest.val[IND3] = vadd_u8(vdest.val[IND3], vtmp1);
9115bec5421Sopenharmony_ci+
9125bec5421Sopenharmony_ci+      vlast = vtmp2;
9135bec5421Sopenharmony_ci+
9145bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
9155bec5421Sopenharmony_ci+      rp += OFFSET3;
9165bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
9175bec5421Sopenharmony_ci+      rp += OFFSET3;
9185bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0);
9195bec5421Sopenharmony_ci+      rp += OFFSET3;
9205bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND3]), 0);
9215bec5421Sopenharmony_ci+      rp += OFFSET3;
9225bec5421Sopenharmony_ci+   }
9235bec5421Sopenharmony_ci+
9245bec5421Sopenharmony_ci+   vtmp = vld1q_u8(pp);
9255bec5421Sopenharmony_ci+   vppt = png_ptr(uint8x8x2_t, &vtmp);
9265bec5421Sopenharmony_ci+   vpp = *vppt;
9275bec5421Sopenharmony_ci+
9285bec5421Sopenharmony_ci+   if (tail_bytes == TAIL_RGB1) {
9295bec5421Sopenharmony_ci+      vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast);
9305bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
9315bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
9325bec5421Sopenharmony_ci+   } else if (tail_bytes == TAIL_RGB2) {
9335bec5421Sopenharmony_ci+      vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast);
9345bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
9355bec5421Sopenharmony_ci+
9365bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
9375bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3);
9385bec5421Sopenharmony_ci+      vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]);
9395bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
9405bec5421Sopenharmony_ci+
9415bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
9425bec5421Sopenharmony_ci+      rp += OFFSET3;
9435bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
9445bec5421Sopenharmony_ci+   } else if (tail_bytes == TAIL_RGB3) {
9455bec5421Sopenharmony_ci+      vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast);
9465bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
9475bec5421Sopenharmony_ci+
9485bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
9495bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3);
9505bec5421Sopenharmony_ci+      vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]);
9515bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
9525bec5421Sopenharmony_ci+
9535bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6);
9545bec5421Sopenharmony_ci+      vtmp3 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6);
9555bec5421Sopenharmony_ci+      vdest.val[IND2] = paeth(vdest.val[1], vtmp3, vtmp2);
9565bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp1);
9575bec5421Sopenharmony_ci+
9585bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
9595bec5421Sopenharmony_ci+      rp += OFFSET3;
9605bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
9615bec5421Sopenharmony_ci+      rp += OFFSET3;
9625bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0);
9635bec5421Sopenharmony_ci+   }
9645bec5421Sopenharmony_ci+   *rp_stop = last_byte;
9655bec5421Sopenharmony_ci+}
9665bec5421Sopenharmony_ci+
9675bec5421Sopenharmony_ci+void png_read_filter_row_paeth3_x2_neon(png_row_infop row_info, png_bytep row,
9685bec5421Sopenharmony_ci+   png_const_bytep prev_row)
9695bec5421Sopenharmony_ci+{
9705bec5421Sopenharmony_ci+   png_bytep rp = row;
9715bec5421Sopenharmony_ci+   png_const_bytep pp = prev_row;
9725bec5421Sopenharmony_ci+   png_bytep rp_stop = row + row_info->rowbytes;
9735bec5421Sopenharmony_ci+   png_bytep np = rp_stop + 1;
9745bec5421Sopenharmony_ci+
9755bec5421Sopenharmony_ci+   uint8x16_t vtmp;
9765bec5421Sopenharmony_ci+   uint8x8x2_t *vrpt;
9775bec5421Sopenharmony_ci+   uint8x8x2_t vrp;
9785bec5421Sopenharmony_ci+   uint8x8_t vlast = vdup_n_u8(0);
9795bec5421Sopenharmony_ci+   uint8x8x4_t vdest;
9805bec5421Sopenharmony_ci+   vdest.val[IND3] = vdup_n_u8(0);
9815bec5421Sopenharmony_ci+
9825bec5421Sopenharmony_ci+   vtmp = vld1q_u8(rp);
9835bec5421Sopenharmony_ci+   vrpt = png_ptr(uint8x8x2_t, &vtmp);
9845bec5421Sopenharmony_ci+   vrp = *vrpt;
9855bec5421Sopenharmony_ci+
9865bec5421Sopenharmony_ci+   uint8x8x2_t *vppt;
9875bec5421Sopenharmony_ci+   uint8x8x2_t vpp;
9885bec5421Sopenharmony_ci+   uint8x8_t vtmp1, vtmp2, vtmp3;
9895bec5421Sopenharmony_ci+   uint32x2_t *temp_pointer;
9905bec5421Sopenharmony_ci+
9915bec5421Sopenharmony_ci+   uint8x8x2_t *vnpt;
9925bec5421Sopenharmony_ci+   uint8x8x2_t vnp;
9935bec5421Sopenharmony_ci+   uint8x8_t vlastN = vdup_n_u8(0);
9945bec5421Sopenharmony_ci+   uint8x8x4_t vdestN;
9955bec5421Sopenharmony_ci+   vdestN.val[IND3] = vdup_n_u8(0);
9965bec5421Sopenharmony_ci+
9975bec5421Sopenharmony_ci+   vtmp = vld1q_u8(np);
9985bec5421Sopenharmony_ci+   vnpt = png_ptr(uint8x8x2_t, &vtmp);
9995bec5421Sopenharmony_ci+   vnp = *vnpt;
10005bec5421Sopenharmony_ci+
10015bec5421Sopenharmony_ci+   png_debug(1, "in png_read_filter_row_paeth3_x2_neon");
10025bec5421Sopenharmony_ci+
10035bec5421Sopenharmony_ci+   size_t tail_bytes = row_info->rowbytes % STEP_RGB;
10045bec5421Sopenharmony_ci+   png_byte last_byte = *rp_stop;
10055bec5421Sopenharmony_ci+   png_byte last_byte_next = *(rp_stop + row_info->rowbytes + 1);
10065bec5421Sopenharmony_ci+   png_bytep rp_stop_new = rp_stop - tail_bytes;
10075bec5421Sopenharmony_ci+
10085bec5421Sopenharmony_ci+   for (; rp < rp_stop_new; pp += STEP_RGB)
10095bec5421Sopenharmony_ci+   {
10105bec5421Sopenharmony_ci+      vtmp = vld1q_u8(pp);
10115bec5421Sopenharmony_ci+      vppt = png_ptr(uint8x8x2_t, &vtmp);
10125bec5421Sopenharmony_ci+      vpp = *vppt;
10135bec5421Sopenharmony_ci+
10145bec5421Sopenharmony_ci+      vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast);
10155bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
10165bec5421Sopenharmony_ci+
10175bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
10185bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3);
10195bec5421Sopenharmony_ci+      vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]);
10205bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
10215bec5421Sopenharmony_ci+
10225bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6);
10235bec5421Sopenharmony_ci+      vtmp3 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6);
10245bec5421Sopenharmony_ci+      vdest.val[IND2] = paeth(vdest.val[1], vtmp3, vtmp2);
10255bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp1);
10265bec5421Sopenharmony_ci+
10275bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
10285bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
10295bec5421Sopenharmony_ci+
10305bec5421Sopenharmony_ci+      vtmp = vld1q_u8(rp + STEP_RGB);
10315bec5421Sopenharmony_ci+      vrpt = png_ptr(uint8x8x2_t, &vtmp);
10325bec5421Sopenharmony_ci+      vrp = *vrpt;
10335bec5421Sopenharmony_ci+
10345bec5421Sopenharmony_ci+      vdest.val[IND3] = paeth(vdest.val[IND2], vtmp2, vtmp3);
10355bec5421Sopenharmony_ci+      vdest.val[IND3] = vadd_u8(vdest.val[IND3], vtmp1);
10365bec5421Sopenharmony_ci+
10375bec5421Sopenharmony_ci+      vlast = vtmp2;
10385bec5421Sopenharmony_ci+
10395bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
10405bec5421Sopenharmony_ci+      rp += OFFSET3;
10415bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
10425bec5421Sopenharmony_ci+      rp += OFFSET3;
10435bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0);
10445bec5421Sopenharmony_ci+      rp += OFFSET3;
10455bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND3]), 0);
10465bec5421Sopenharmony_ci+      rp += OFFSET3;
10475bec5421Sopenharmony_ci+
10485bec5421Sopenharmony_ci+      vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN);
10495bec5421Sopenharmony_ci+      vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]);
10505bec5421Sopenharmony_ci+
10515bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET3);
10525bec5421Sopenharmony_ci+      vdestN.val[1] = paeth(vdestN.val[0], vdest.val[1], vdest.val[0]);
10535bec5421Sopenharmony_ci+      vdestN.val[1] = vadd_u8(vdestN.val[1], vtmp1);
10545bec5421Sopenharmony_ci+
10555bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET6);
10565bec5421Sopenharmony_ci+      vdestN.val[IND2] = paeth(vdestN.val[1], vdest.val[IND2], vdest.val[1]);
10575bec5421Sopenharmony_ci+      vdestN.val[IND2] = vadd_u8(vdestN.val[IND2], vtmp1);
10585bec5421Sopenharmony_ci+
10595bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vnp.val[1], vnp.val[1], 1);
10605bec5421Sopenharmony_ci+
10615bec5421Sopenharmony_ci+      vtmp = vld1q_u8(np + STEP_RGB);
10625bec5421Sopenharmony_ci+      vnpt = png_ptr(uint8x8x2_t, &vtmp);
10635bec5421Sopenharmony_ci+      vnp = *vnpt;
10645bec5421Sopenharmony_ci+
10655bec5421Sopenharmony_ci+      vdestN.val[IND3] = paeth(vdestN.val[IND2], vdest.val[IND3], vdest.val[IND2]);
10665bec5421Sopenharmony_ci+      vdestN.val[IND3] = vadd_u8(vdestN.val[IND3], vtmp1);
10675bec5421Sopenharmony_ci+
10685bec5421Sopenharmony_ci+      vlastN = vdest.val[IND3];
10695bec5421Sopenharmony_ci+
10705bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0);
10715bec5421Sopenharmony_ci+      np += OFFSET3;
10725bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[1]), 0);
10735bec5421Sopenharmony_ci+      np += OFFSET3;
10745bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[IND2]), 0);
10755bec5421Sopenharmony_ci+      np += OFFSET3;
10765bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[IND3]), 0);
10775bec5421Sopenharmony_ci+      np += OFFSET3;
10785bec5421Sopenharmony_ci+   }
10795bec5421Sopenharmony_ci+
10805bec5421Sopenharmony_ci+   vtmp = vld1q_u8(pp);
10815bec5421Sopenharmony_ci+   vppt = png_ptr(uint8x8x2_t, &vtmp);
10825bec5421Sopenharmony_ci+   vpp = *vppt;
10835bec5421Sopenharmony_ci+
10845bec5421Sopenharmony_ci+   if (tail_bytes == TAIL_RGB1) {
10855bec5421Sopenharmony_ci+      vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast);
10865bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
10875bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
10885bec5421Sopenharmony_ci+
10895bec5421Sopenharmony_ci+      vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN);
10905bec5421Sopenharmony_ci+      vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]);
10915bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0);
10925bec5421Sopenharmony_ci+   } else if (tail_bytes == TAIL_RGB2) {
10935bec5421Sopenharmony_ci+      vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast);
10945bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
10955bec5421Sopenharmony_ci+
10965bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
10975bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3);
10985bec5421Sopenharmony_ci+      vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]);
10995bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
11005bec5421Sopenharmony_ci+
11015bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
11025bec5421Sopenharmony_ci+      rp += OFFSET3;
11035bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
11045bec5421Sopenharmony_ci+
11055bec5421Sopenharmony_ci+      vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN);
11065bec5421Sopenharmony_ci+      vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]);
11075bec5421Sopenharmony_ci+
11085bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET3);
11095bec5421Sopenharmony_ci+      vdestN.val[1] = paeth(vdestN.val[0], vdest.val[1], vdest.val[0]);
11105bec5421Sopenharmony_ci+      vdestN.val[1] = vadd_u8(vdestN.val[1], vtmp1);
11115bec5421Sopenharmony_ci+
11125bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0);
11135bec5421Sopenharmony_ci+      np += OFFSET3;
11145bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[1]), 0);
11155bec5421Sopenharmony_ci+   } else if (tail_bytes == TAIL_RGB3) {
11165bec5421Sopenharmony_ci+      vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast);
11175bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
11185bec5421Sopenharmony_ci+
11195bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3);
11205bec5421Sopenharmony_ci+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3);
11215bec5421Sopenharmony_ci+      vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]);
11225bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
11235bec5421Sopenharmony_ci+
11245bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6);
11255bec5421Sopenharmony_ci+      vtmp3 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6);
11265bec5421Sopenharmony_ci+      vdest.val[IND2] = paeth(vdest.val[1], vtmp3, vtmp2);
11275bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp1);
11285bec5421Sopenharmony_ci+
11295bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0);
11305bec5421Sopenharmony_ci+      rp += OFFSET3;
11315bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0);
11325bec5421Sopenharmony_ci+      rp += OFFSET3;
11335bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0);
11345bec5421Sopenharmony_ci+
11355bec5421Sopenharmony_ci+      vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN);
11365bec5421Sopenharmony_ci+      vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]);
11375bec5421Sopenharmony_ci+
11385bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET3);
11395bec5421Sopenharmony_ci+      vdestN.val[1] = paeth(vdestN.val[0], vdest.val[1], vdest.val[0]);
11405bec5421Sopenharmony_ci+      vdestN.val[1] = vadd_u8(vdestN.val[1], vtmp1);
11415bec5421Sopenharmony_ci+
11425bec5421Sopenharmony_ci+      vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET6);
11435bec5421Sopenharmony_ci+      vdestN.val[IND2] = paeth(vdestN.val[1], vdest.val[IND2], vdest.val[1]);
11445bec5421Sopenharmony_ci+      vdestN.val[IND2] = vadd_u8(vdestN.val[IND2], vtmp1);
11455bec5421Sopenharmony_ci+
11465bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0);
11475bec5421Sopenharmony_ci+      np += OFFSET3;
11485bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[1]), 0);
11495bec5421Sopenharmony_ci+      np += OFFSET3;
11505bec5421Sopenharmony_ci+      vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[IND2]), 0);
11515bec5421Sopenharmony_ci+   }
11525bec5421Sopenharmony_ci+   *rp_stop = last_byte;
11535bec5421Sopenharmony_ci+   *(rp_stop + row_info->rowbytes + 1) = last_byte_next;
11545bec5421Sopenharmony_ci+}
11555bec5421Sopenharmony_ci+
11565bec5421Sopenharmony_ci+void png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row,
11575bec5421Sopenharmony_ci+   png_const_bytep prev_row)
11585bec5421Sopenharmony_ci+{
11595bec5421Sopenharmony_ci+   png_bytep rp = row;
11605bec5421Sopenharmony_ci+   int count = row_info->rowbytes;
11615bec5421Sopenharmony_ci+   png_const_bytep pp = prev_row;
11625bec5421Sopenharmony_ci+
11635bec5421Sopenharmony_ci+   uint8x8_t vlast = vdup_n_u8(0);
11645bec5421Sopenharmony_ci+   uint8x8x4_t vdest;
11655bec5421Sopenharmony_ci+   vdest.val[IND3] = vdup_n_u8(0);
11665bec5421Sopenharmony_ci+
11675bec5421Sopenharmony_ci+   png_debug(1, "in png_read_filter_row_paeth4_neon");
11685bec5421Sopenharmony_ci+
11695bec5421Sopenharmony_ci+   uint32x2x4_t vtmp;
11705bec5421Sopenharmony_ci+   uint8x8x4_t *vrpt, *vppt;
11715bec5421Sopenharmony_ci+   uint8x8x4_t vrp, vpp;
11725bec5421Sopenharmony_ci+   uint32x2x4_t vdest_val;
11735bec5421Sopenharmony_ci+   while (count >= STEP_RGBA) {
11745bec5421Sopenharmony_ci+      uint32x2x4_t *temp_pointer;
11755bec5421Sopenharmony_ci+      vtmp = vld4_u32(png_ptr(uint32_t, rp));
11765bec5421Sopenharmony_ci+      vrpt = png_ptr(uint8x8x4_t, &vtmp);
11775bec5421Sopenharmony_ci+      vrp = *vrpt;
11785bec5421Sopenharmony_ci+      vtmp = vld4_u32(png_ptrc(uint32_t, pp));
11795bec5421Sopenharmony_ci+      vppt = png_ptr(uint8x8x4_t, &vtmp);
11805bec5421Sopenharmony_ci+      vpp = *vppt;
11815bec5421Sopenharmony_ci+
11825bec5421Sopenharmony_ci+      vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast);
11835bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
11845bec5421Sopenharmony_ci+      vdest.val[1] = paeth(vdest.val[0], vpp.val[1], vpp.val[0]);
11855bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
11865bec5421Sopenharmony_ci+      vdest.val[IND2] = paeth(vdest.val[1], vpp.val[IND2], vpp.val[1]);
11875bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[IND2], vrp.val[IND2]);
11885bec5421Sopenharmony_ci+      vdest.val[IND3] = paeth(vdest.val[IND2], vpp.val[IND3], vpp.val[IND2]);
11895bec5421Sopenharmony_ci+      vdest.val[IND3] = vadd_u8(vdest.val[IND3], vrp.val[IND3]);
11905bec5421Sopenharmony_ci+
11915bec5421Sopenharmony_ci+      vlast = vpp.val[IND3];
11925bec5421Sopenharmony_ci+
11935bec5421Sopenharmony_ci+      vdest_val = png_ldr(uint32x2x4_t, &vdest);
11945bec5421Sopenharmony_ci+      vst4_lane_u32(png_ptr(uint32_t, rp), vdest_val, 0);
11955bec5421Sopenharmony_ci+
11965bec5421Sopenharmony_ci+      rp += STEP_RGBA;
11975bec5421Sopenharmony_ci+      pp += STEP_RGBA;
11985bec5421Sopenharmony_ci+      count -= STEP_RGBA;
11995bec5421Sopenharmony_ci+   }
12005bec5421Sopenharmony_ci+
12015bec5421Sopenharmony_ci+   if (count >= STEP_RGBA_HALF) {
12025bec5421Sopenharmony_ci+      uint32x2x2_t vtmp1;
12035bec5421Sopenharmony_ci+      uint8x8x2_t *vrpt1, *vppt1;
12045bec5421Sopenharmony_ci+      uint8x8x2_t vrp1, vpp1;
12055bec5421Sopenharmony_ci+      uint32x2x2_t *temp_pointer;
12065bec5421Sopenharmony_ci+      uint32x2x2_t vdest_val1;
12075bec5421Sopenharmony_ci+
12085bec5421Sopenharmony_ci+      vtmp1 = vld2_u32(png_ptr(uint32_t, rp));
12095bec5421Sopenharmony_ci+      vrpt1 = png_ptr(uint8x8x2_t, &vtmp1);
12105bec5421Sopenharmony_ci+      vrp1 = *vrpt1;
12115bec5421Sopenharmony_ci+      vtmp1 = vld2_u32(png_ptrc(uint32_t, pp));
12125bec5421Sopenharmony_ci+      vppt1 = png_ptr(uint8x8x2_t, &vtmp1);
12135bec5421Sopenharmony_ci+      vpp1 = *vppt1;
12145bec5421Sopenharmony_ci+
12155bec5421Sopenharmony_ci+      vdest.val[0] = paeth(vdest.val[IND3], vpp1.val[0], vlast);
12165bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp1.val[0]);
12175bec5421Sopenharmony_ci+      vdest.val[1] = paeth(vdest.val[0], vpp1.val[1], vpp1.val[0]);
12185bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vrp1.val[1]);
12195bec5421Sopenharmony_ci+      vlast = vpp1.val[1];
12205bec5421Sopenharmony_ci+
12215bec5421Sopenharmony_ci+      vdest_val1 = png_ldr(uint32x2x2_t, &vdest);
12225bec5421Sopenharmony_ci+      vst2_lane_u32(png_ptr(uint32_t, rp), vdest_val1, 0);
12235bec5421Sopenharmony_ci+      vdest.val[IND3] = vdest.val[1];
12245bec5421Sopenharmony_ci+
12255bec5421Sopenharmony_ci+      rp += STEP_RGBA_HALF;
12265bec5421Sopenharmony_ci+      pp += STEP_RGBA_HALF;
12275bec5421Sopenharmony_ci+      count -= STEP_RGBA_HALF;
12285bec5421Sopenharmony_ci+   }
12295bec5421Sopenharmony_ci+
12305bec5421Sopenharmony_ci+   if (count == 0) {
12315bec5421Sopenharmony_ci+      return;
12325bec5421Sopenharmony_ci+   }
12335bec5421Sopenharmony_ci+
12345bec5421Sopenharmony_ci+   uint32x2_t vtmp2;
12355bec5421Sopenharmony_ci+   uint8x8_t *vrpt2, *vppt2;
12365bec5421Sopenharmony_ci+   uint8x8_t vrp2, vpp2;
12375bec5421Sopenharmony_ci+   uint32x2_t *temp_pointer;
12385bec5421Sopenharmony_ci+   uint32x2_t vdest_val2;
12395bec5421Sopenharmony_ci+
12405bec5421Sopenharmony_ci+   vtmp2 = vld1_u32(png_ptr(uint32_t, rp));
12415bec5421Sopenharmony_ci+   vrpt2 = png_ptr(uint8x8_t, &vtmp2);
12425bec5421Sopenharmony_ci+   vrp2 = *vrpt2;
12435bec5421Sopenharmony_ci+   vtmp2 = vld1_u32(png_ptrc(uint32_t, pp));
12445bec5421Sopenharmony_ci+   vppt2 = png_ptr(uint8x8_t, &vtmp2);
12455bec5421Sopenharmony_ci+   vpp2 = *vppt2;
12465bec5421Sopenharmony_ci+
12475bec5421Sopenharmony_ci+   vdest.val[0] = paeth(vdest.val[IND3], vpp2, vlast);
12485bec5421Sopenharmony_ci+   vdest.val[0] = vadd_u8(vdest.val[0], vrp2);
12495bec5421Sopenharmony_ci+
12505bec5421Sopenharmony_ci+   vdest_val2 = png_ldr(uint32x2_t, &vdest);
12515bec5421Sopenharmony_ci+   vst1_lane_u32(png_ptr(uint32_t, rp), vdest_val2, 0);
12525bec5421Sopenharmony_ci+}
12535bec5421Sopenharmony_ci+
12545bec5421Sopenharmony_ci+void png_read_filter_row_paeth4_x2_neon(png_row_infop row_info, png_bytep row,
12555bec5421Sopenharmony_ci+   png_const_bytep prev_row)
12565bec5421Sopenharmony_ci+{
12575bec5421Sopenharmony_ci+   png_bytep rp = row;
12585bec5421Sopenharmony_ci+   int count = row_info->rowbytes;
12595bec5421Sopenharmony_ci+   png_const_bytep pp = prev_row;
12605bec5421Sopenharmony_ci+   png_bytep np = row + row_info->rowbytes + 1;
12615bec5421Sopenharmony_ci+
12625bec5421Sopenharmony_ci+   uint8x8_t vlast = vdup_n_u8(0);
12635bec5421Sopenharmony_ci+   uint8x8x4_t vdest;
12645bec5421Sopenharmony_ci+   vdest.val[IND3] = vdup_n_u8(0);
12655bec5421Sopenharmony_ci+
12665bec5421Sopenharmony_ci+   png_debug(1, "in png_read_filter_row_paeth4_x2_neon");
12675bec5421Sopenharmony_ci+
12685bec5421Sopenharmony_ci+   uint32x2x4_t vtmp;
12695bec5421Sopenharmony_ci+   uint8x8x4_t *vrpt, *vppt;
12705bec5421Sopenharmony_ci+   uint8x8x4_t vrp, vpp;
12715bec5421Sopenharmony_ci+   uint32x2x4_t vdest_val;
12725bec5421Sopenharmony_ci+
12735bec5421Sopenharmony_ci+   uint8x8x4_t *vnpt;
12745bec5421Sopenharmony_ci+   uint8x8x4_t vnp;
12755bec5421Sopenharmony_ci+   uint8x8_t vlastN = vdup_n_u8(0);
12765bec5421Sopenharmony_ci+   uint8x8x4_t vdestN;
12775bec5421Sopenharmony_ci+   vdestN.val[IND3] = vdup_n_u8(0);
12785bec5421Sopenharmony_ci+
12795bec5421Sopenharmony_ci+   while (count >= STEP_RGBA) {
12805bec5421Sopenharmony_ci+      uint32x2x4_t *temp_pointer;
12815bec5421Sopenharmony_ci+      vtmp = vld4_u32(png_ptr(uint32_t, rp));
12825bec5421Sopenharmony_ci+      vrpt = png_ptr(uint8x8x4_t, &vtmp);
12835bec5421Sopenharmony_ci+      vrp = *vrpt;
12845bec5421Sopenharmony_ci+      vtmp = vld4_u32(png_ptrc(uint32_t, pp));
12855bec5421Sopenharmony_ci+      vppt = png_ptr(uint8x8x4_t, &vtmp);
12865bec5421Sopenharmony_ci+      vpp = *vppt;
12875bec5421Sopenharmony_ci+      vtmp = vld4_u32(png_ptrc(uint32_t, np));
12885bec5421Sopenharmony_ci+      vnpt = png_ptr(uint8x8x4_t, &vtmp);
12895bec5421Sopenharmony_ci+      vnp = *vnpt;
12905bec5421Sopenharmony_ci+
12915bec5421Sopenharmony_ci+      vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast);
12925bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
12935bec5421Sopenharmony_ci+      vdest.val[1] = paeth(vdest.val[0], vpp.val[1], vpp.val[0]);
12945bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
12955bec5421Sopenharmony_ci+      vdest.val[IND2] = paeth(vdest.val[1], vpp.val[IND2], vpp.val[1]);
12965bec5421Sopenharmony_ci+      vdest.val[IND2] = vadd_u8(vdest.val[IND2], vrp.val[IND2]);
12975bec5421Sopenharmony_ci+      vdest.val[IND3] = paeth(vdest.val[IND2], vpp.val[IND3], vpp.val[IND2]);
12985bec5421Sopenharmony_ci+      vdest.val[IND3] = vadd_u8(vdest.val[IND3], vrp.val[IND3]);
12995bec5421Sopenharmony_ci+
13005bec5421Sopenharmony_ci+      vlast = vpp.val[IND3];
13015bec5421Sopenharmony_ci+
13025bec5421Sopenharmony_ci+      vdest_val = png_ldr(uint32x2x4_t, &vdest);
13035bec5421Sopenharmony_ci+      vst4_lane_u32(png_ptr(uint32_t, rp), vdest_val, 0);
13045bec5421Sopenharmony_ci+
13055bec5421Sopenharmony_ci+      vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN);
13065bec5421Sopenharmony_ci+      vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]);
13075bec5421Sopenharmony_ci+      vdestN.val[1] = paeth(vdestN.val[0], vdest.val[1], vdest.val[0]);
13085bec5421Sopenharmony_ci+      vdestN.val[1] = vadd_u8(vdestN.val[1], vnp.val[1]);
13095bec5421Sopenharmony_ci+      vdestN.val[IND2] = paeth(vdestN.val[1], vdest.val[IND2], vdest.val[1]);
13105bec5421Sopenharmony_ci+      vdestN.val[IND2] = vadd_u8(vdestN.val[IND2], vnp.val[IND2]);
13115bec5421Sopenharmony_ci+      vdestN.val[IND3] = paeth(vdestN.val[IND2], vdest.val[IND3], vdest.val[IND2]);
13125bec5421Sopenharmony_ci+      vdestN.val[IND3] = vadd_u8(vdestN.val[IND3], vnp.val[IND3]);
13135bec5421Sopenharmony_ci+
13145bec5421Sopenharmony_ci+      vlastN = vdest.val[IND3];
13155bec5421Sopenharmony_ci+
13165bec5421Sopenharmony_ci+      vdest_val = png_ldr(uint32x2x4_t, &vdestN);
13175bec5421Sopenharmony_ci+      vst4_lane_u32(png_ptr(uint32_t, np), vdest_val, 0);
13185bec5421Sopenharmony_ci+
13195bec5421Sopenharmony_ci+      rp += STEP_RGBA;
13205bec5421Sopenharmony_ci+      pp += STEP_RGBA;
13215bec5421Sopenharmony_ci+      np += STEP_RGBA;
13225bec5421Sopenharmony_ci+      count -= STEP_RGBA;
13235bec5421Sopenharmony_ci+   }
13245bec5421Sopenharmony_ci+
13255bec5421Sopenharmony_ci+   if (count >= STEP_RGBA_HALF) {
13265bec5421Sopenharmony_ci+      uint32x2x2_t vtmp1;
13275bec5421Sopenharmony_ci+      uint8x8x2_t *vrpt1, *vppt1, *vnpt1;
13285bec5421Sopenharmony_ci+      uint8x8x2_t vrp1, vpp1, vnp1;
13295bec5421Sopenharmony_ci+      uint32x2x2_t *temp_pointer;
13305bec5421Sopenharmony_ci+      uint32x2x2_t vdest_val1;
13315bec5421Sopenharmony_ci+
13325bec5421Sopenharmony_ci+      vtmp1 = vld2_u32(png_ptr(uint32_t, rp));
13335bec5421Sopenharmony_ci+      vrpt1 = png_ptr(uint8x8x2_t, &vtmp1);
13345bec5421Sopenharmony_ci+      vrp1 = *vrpt1;
13355bec5421Sopenharmony_ci+      vtmp1 = vld2_u32(png_ptrc(uint32_t, pp));
13365bec5421Sopenharmony_ci+      vppt1 = png_ptr(uint8x8x2_t, &vtmp1);
13375bec5421Sopenharmony_ci+      vpp1 = *vppt1;
13385bec5421Sopenharmony_ci+      vtmp1 = vld2_u32(png_ptrc(uint32_t, np));
13395bec5421Sopenharmony_ci+      vnpt1 = png_ptr(uint8x8x2_t, &vtmp1);
13405bec5421Sopenharmony_ci+      vnp1 = *vnpt1;
13415bec5421Sopenharmony_ci+
13425bec5421Sopenharmony_ci+      vdest.val[0] = paeth(vdest.val[IND3], vpp1.val[0], vlast);
13435bec5421Sopenharmony_ci+      vdest.val[0] = vadd_u8(vdest.val[0], vrp1.val[0]);
13445bec5421Sopenharmony_ci+      vdest.val[1] = paeth(vdest.val[0], vpp1.val[1], vpp1.val[0]);
13455bec5421Sopenharmony_ci+      vdest.val[1] = vadd_u8(vdest.val[1], vrp1.val[1]);
13465bec5421Sopenharmony_ci+
13475bec5421Sopenharmony_ci+      vlast = vpp1.val[1];
13485bec5421Sopenharmony_ci+
13495bec5421Sopenharmony_ci+      vdest_val1 = png_ldr(uint32x2x2_t, &vdest);
13505bec5421Sopenharmony_ci+      vst2_lane_u32(png_ptr(uint32_t, rp), vdest_val1, 0);
13515bec5421Sopenharmony_ci+
13525bec5421Sopenharmony_ci+      vdest.val[IND3] = vdest.val[1];
13535bec5421Sopenharmony_ci+
13545bec5421Sopenharmony_ci+      vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN);
13555bec5421Sopenharmony_ci+      vdestN.val[0] = vadd_u8(vdestN.val[0], vnp1.val[0]);
13565bec5421Sopenharmony_ci+      vdestN.val[1] = paeth(vdestN.val[0], vdest.val[1], vdest.val[0]);
13575bec5421Sopenharmony_ci+      vdestN.val[1] = vadd_u8(vdestN.val[1], vnp1.val[1]);
13585bec5421Sopenharmony_ci+
13595bec5421Sopenharmony_ci+      vlastN = vdest.val[1];
13605bec5421Sopenharmony_ci+
13615bec5421Sopenharmony_ci+      vdest_val1 = png_ldr(uint32x2x2_t, &vdestN);
13625bec5421Sopenharmony_ci+      vst2_lane_u32(png_ptr(uint32_t, np), vdest_val1, 0);
13635bec5421Sopenharmony_ci+
13645bec5421Sopenharmony_ci+      vdestN.val[IND3] = vdestN.val[1];
13655bec5421Sopenharmony_ci+
13665bec5421Sopenharmony_ci+      rp += STEP_RGBA_HALF;
13675bec5421Sopenharmony_ci+      pp += STEP_RGBA_HALF;
13685bec5421Sopenharmony_ci+      np += STEP_RGBA_HALF;
13695bec5421Sopenharmony_ci+      count -= STEP_RGBA_HALF;
13705bec5421Sopenharmony_ci+   }
13715bec5421Sopenharmony_ci+
13725bec5421Sopenharmony_ci+   if (count == 0) {
13735bec5421Sopenharmony_ci+      return;
13745bec5421Sopenharmony_ci+   }
13755bec5421Sopenharmony_ci+
13765bec5421Sopenharmony_ci+   uint32x2_t vtmp2;
13775bec5421Sopenharmony_ci+   uint8x8_t *vrpt2, *vppt2, *vnpt2;
13785bec5421Sopenharmony_ci+   uint8x8_t vrp2, vpp2, vnp2;
13795bec5421Sopenharmony_ci+   uint32x2_t *temp_pointer;
13805bec5421Sopenharmony_ci+   uint32x2_t vdest_val2;
13815bec5421Sopenharmony_ci+
13825bec5421Sopenharmony_ci+   vtmp2 = vld1_u32(png_ptr(uint32_t, rp));
13835bec5421Sopenharmony_ci+   vrpt2 = png_ptr(uint8x8_t, &vtmp2);
13845bec5421Sopenharmony_ci+   vrp2 = *vrpt2;
13855bec5421Sopenharmony_ci+   vtmp2 = vld1_u32(png_ptrc(uint32_t, pp));
13865bec5421Sopenharmony_ci+   vppt2 = png_ptr(uint8x8_t, &vtmp2);
13875bec5421Sopenharmony_ci+   vpp2 = *vppt2;
13885bec5421Sopenharmony_ci+   vtmp2 = vld1_u32(png_ptrc(uint32_t, np));
13895bec5421Sopenharmony_ci+   vnpt2 = png_ptr(uint8x8_t, &vtmp2);
13905bec5421Sopenharmony_ci+   vnp2 = *vnpt2;
13915bec5421Sopenharmony_ci+
13925bec5421Sopenharmony_ci+   vdest.val[0] = paeth(vdest.val[IND3], vpp2, vlast);
13935bec5421Sopenharmony_ci+   vdest.val[0] = vadd_u8(vdest.val[0], vrp2);
13945bec5421Sopenharmony_ci+
13955bec5421Sopenharmony_ci+   vdest_val2 = png_ldr(uint32x2_t, &vdest);
13965bec5421Sopenharmony_ci+   vst1_lane_u32(png_ptr(uint32_t, rp), vdest_val2, 0);
13975bec5421Sopenharmony_ci+
13985bec5421Sopenharmony_ci+   vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN);
13995bec5421Sopenharmony_ci+   vdestN.val[0] = vadd_u8(vdestN.val[0], vnp2);
14005bec5421Sopenharmony_ci+
14015bec5421Sopenharmony_ci+   vdest_val2 = png_ldr(uint32x2_t, &vdestN);
14025bec5421Sopenharmony_ci+   vst1_lane_u32(png_ptr(uint32_t, np), vdest_val2, 0);
14035bec5421Sopenharmony_ci+}
14045bec5421Sopenharmony_ci+#endif /* PNG_MULTY_LINE_ENABLE */
14055bec5421Sopenharmony_ci #endif /* PNG_ARM_NEON_OPT > 0 */
14065bec5421Sopenharmony_ci #endif /* PNG_ARM_NEON_IMPLEMENTATION == 1 (intrinsics) */
14075bec5421Sopenharmony_ci #endif /* READ */
14085bec5421Sopenharmony_cidiff --git a/pngpread.c b/pngpread.c
14095bec5421Sopenharmony_ciindex e283627b7..bb12f61ea 100644
14105bec5421Sopenharmony_ci--- a/pngpread.c
14115bec5421Sopenharmony_ci+++ b/pngpread.c
14125bec5421Sopenharmony_ci@@ -264,9 +264,22 @@ png_push_read_chunk(png_structrp png_ptr, png_inforp info_ptr)
14135bec5421Sopenharmony_ci       png_ptr->idat_size = png_ptr->push_length;
14145bec5421Sopenharmony_ci       png_ptr->process_mode = PNG_READ_IDAT_MODE;
14155bec5421Sopenharmony_ci       png_push_have_info(png_ptr, info_ptr);
14165bec5421Sopenharmony_ci-      png_ptr->zstream.avail_out =
14175bec5421Sopenharmony_ci-          (uInt) PNG_ROWBYTES(png_ptr->pixel_depth,
14185bec5421Sopenharmony_ci-          png_ptr->iwidth) + 1;
14195bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
14205bec5421Sopenharmony_ci+      // OH ISSUE: png optimize
14215bec5421Sopenharmony_ci+      if (png_ptr->interlaced == 0 && png_ptr->bit_depth == 8 &&
14225bec5421Sopenharmony_ci+         (png_ptr->transformations & PNG_CHECK) == 0) {
14235bec5421Sopenharmony_ci+         int rest = png_ptr->num_rows - png_ptr->row_number;
14245bec5421Sopenharmony_ci+         int row_num = rest < PNG_INFLATE_ROWS ? rest : PNG_INFLATE_ROWS;
14255bec5421Sopenharmony_ci+         png_ptr->zstream.avail_out = (uInt)(PNG_ROWBYTES(png_ptr->pixel_depth,
14265bec5421Sopenharmony_ci+             png_ptr->iwidth) + 1) * row_num;
14275bec5421Sopenharmony_ci+      }
14285bec5421Sopenharmony_ci+      else
14295bec5421Sopenharmony_ci+#endif
14305bec5421Sopenharmony_ci+      {
14315bec5421Sopenharmony_ci+         png_ptr->zstream.avail_out =
14325bec5421Sopenharmony_ci+            (uInt) PNG_ROWBYTES(png_ptr->pixel_depth,
14335bec5421Sopenharmony_ci+            png_ptr->iwidth) + 1;
14345bec5421Sopenharmony_ci+      }
14355bec5421Sopenharmony_ci       png_ptr->zstream.next_out = png_ptr->row_buf;
14365bec5421Sopenharmony_ci       return;
14375bec5421Sopenharmony_ci    }
14385bec5421Sopenharmony_ci@@ -623,6 +636,92 @@ png_push_read_IDAT(png_structrp png_ptr)
14395bec5421Sopenharmony_ci    }
14405bec5421Sopenharmony_ci }
14415bec5421Sopenharmony_ci 
14425bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
14435bec5421Sopenharmony_ci+// OH ISSUE: png optimize
14445bec5421Sopenharmony_ci+static void png_push_process_row_x2(png_structrp png_ptr,
14455bec5421Sopenharmony_ci+   png_row_info row_info_in)
14465bec5421Sopenharmony_ci+{
14475bec5421Sopenharmony_ci+   png_debug(1, "in png_push_process_row_x2");
14485bec5421Sopenharmony_ci+   png_row_info row_info = row_info_in;
14495bec5421Sopenharmony_ci+   png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1,
14505bec5421Sopenharmony_ci+      png_ptr->prev_row + 1, png_ptr->row_buf[0] + 4);
14515bec5421Sopenharmony_ci+
14525bec5421Sopenharmony_ci+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
14535bec5421Sopenharmony_ci+   if (png_ptr->transformations != 0)
14545bec5421Sopenharmony_ci+      png_do_read_transformations(png_ptr, &row_info);
14555bec5421Sopenharmony_ci+#endif
14565bec5421Sopenharmony_ci+
14575bec5421Sopenharmony_ci+   if (png_ptr->transformed_pixel_depth == 0)
14585bec5421Sopenharmony_ci+   {
14595bec5421Sopenharmony_ci+      png_ptr->transformed_pixel_depth = row_info.pixel_depth;
14605bec5421Sopenharmony_ci+      if (row_info.pixel_depth > png_ptr->maximum_pixel_depth)
14615bec5421Sopenharmony_ci+         png_error(png_ptr, "progressive row overflow");
14625bec5421Sopenharmony_ci+   }
14635bec5421Sopenharmony_ci+
14645bec5421Sopenharmony_ci+   png_push_have_row(png_ptr, png_ptr->row_buf + 1);
14655bec5421Sopenharmony_ci+   png_read_push_finish_row(png_ptr);
14665bec5421Sopenharmony_ci+
14675bec5421Sopenharmony_ci+   png_ptr->row_buf = png_ptr->row_buf + png_ptr->rowbytes + 1;
14685bec5421Sopenharmony_ci+
14695bec5421Sopenharmony_ci+   // do it again
14705bec5421Sopenharmony_ci+   if (png_ptr->transformations != 0)
14715bec5421Sopenharmony_ci+   {
14725bec5421Sopenharmony_ci+      memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1);
14735bec5421Sopenharmony_ci+   }
14745bec5421Sopenharmony_ci+   else
14755bec5421Sopenharmony_ci+   {
14765bec5421Sopenharmony_ci+      png_ptr->prev_row = png_ptr->row_buf;
14775bec5421Sopenharmony_ci+   }
14785bec5421Sopenharmony_ci+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
14795bec5421Sopenharmony_ci+   if (png_ptr->transformations != 0)
14805bec5421Sopenharmony_ci+      png_do_read_transformations(png_ptr, &row_info);
14815bec5421Sopenharmony_ci+#endif
14825bec5421Sopenharmony_ci+
14835bec5421Sopenharmony_ci+   png_push_have_row(png_ptr, png_ptr->row_buf + 1);
14845bec5421Sopenharmony_ci+   png_read_push_finish_row(png_ptr);
14855bec5421Sopenharmony_ci+}
14865bec5421Sopenharmony_ci+
14875bec5421Sopenharmony_ci+static void png_push_process_multi_rows(png_structrp png_ptr, int row_num)
14885bec5421Sopenharmony_ci+{
14895bec5421Sopenharmony_ci+   png_debug(1, "in png_push_process_multi_rows");
14905bec5421Sopenharmony_ci+   uInt row_bytes =  png_ptr->rowbytes + 1;
14915bec5421Sopenharmony_ci+
14925bec5421Sopenharmony_ci+   png_row_info row_info;
14935bec5421Sopenharmony_ci+   row_info.width = png_ptr->iwidth;
14945bec5421Sopenharmony_ci+   row_info.color_type = png_ptr->color_type;
14955bec5421Sopenharmony_ci+   row_info.bit_depth = png_ptr->bit_depth;
14965bec5421Sopenharmony_ci+   row_info.channels = png_ptr->channels;
14975bec5421Sopenharmony_ci+   row_info.pixel_depth = png_ptr->pixel_depth;
14985bec5421Sopenharmony_ci+   row_info.rowbytes = png_ptr->rowbytes;
14995bec5421Sopenharmony_ci+
15005bec5421Sopenharmony_ci+   png_bytep temp_row = png_ptr->row_buf;
15015bec5421Sopenharmony_ci+   png_bytep temp_prev_row = png_ptr->prev_row;
15025bec5421Sopenharmony_ci+
15035bec5421Sopenharmony_ci+   for (int i = 0; i < row_num; i++) {
15045bec5421Sopenharmony_ci+      // check if the x2_filter is effective: only supports channels 3 or 4
15055bec5421Sopenharmony_ci+      if ((png_ptr->channels == 3 || png_ptr->channels == 4) &&
15065bec5421Sopenharmony_ci+          i < row_num -1 && png_ptr->row_buf[0] > PNG_FILTER_VALUE_SUB &&
15075bec5421Sopenharmony_ci+          png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST &&
15085bec5421Sopenharmony_ci+          png_ptr->row_buf[0] == png_ptr->row_buf[row_bytes])
15095bec5421Sopenharmony_ci+      {
15105bec5421Sopenharmony_ci+         png_push_process_row_x2(png_ptr, row_info);
15115bec5421Sopenharmony_ci+         png_ptr->row_buf = png_ptr->row_buf + row_bytes;
15125bec5421Sopenharmony_ci+         i++;
15135bec5421Sopenharmony_ci+         continue;
15145bec5421Sopenharmony_ci+      }
15155bec5421Sopenharmony_ci+      png_push_process_row(png_ptr);
15165bec5421Sopenharmony_ci+      png_ptr->row_buf = png_ptr->row_buf + row_bytes;
15175bec5421Sopenharmony_ci+   }
15185bec5421Sopenharmony_ci+
15195bec5421Sopenharmony_ci+   if (png_ptr->transformations == 0 && png_ptr->interlaced == 0)
15205bec5421Sopenharmony_ci+   {
15215bec5421Sopenharmony_ci+      png_ptr->prev_row = temp_prev_row;
15225bec5421Sopenharmony_ci+      memcpy(png_ptr->prev_row, png_ptr->row_buf - row_bytes, row_bytes);
15235bec5421Sopenharmony_ci+   }
15245bec5421Sopenharmony_ci+   png_ptr->row_buf = temp_row;
15255bec5421Sopenharmony_ci+}
15265bec5421Sopenharmony_ci+#endif
15275bec5421Sopenharmony_ci+
15285bec5421Sopenharmony_ci void /* PRIVATE */
15295bec5421Sopenharmony_ci png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer,
15305bec5421Sopenharmony_ci     size_t buffer_length)
15315bec5421Sopenharmony_ci@@ -639,6 +738,17 @@ png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer,
15325bec5421Sopenharmony_ci    /* TODO: WARNING: TRUNCATION ERROR: DANGER WILL ROBINSON: */
15335bec5421Sopenharmony_ci    png_ptr->zstream.avail_in = (uInt)buffer_length;
15345bec5421Sopenharmony_ci 
15355bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
15365bec5421Sopenharmony_ci+   // OH ISSUE: png optimize
15375bec5421Sopenharmony_ci+   int row_num = 1;
15385bec5421Sopenharmony_ci+   if (png_ptr->interlaced == 0 && png_ptr->bit_depth == 8 &&
15395bec5421Sopenharmony_ci+       (png_ptr->transformations & PNG_CHECK) == 0)
15405bec5421Sopenharmony_ci+   {
15415bec5421Sopenharmony_ci+      int rest = png_ptr->num_rows - png_ptr->row_number;
15425bec5421Sopenharmony_ci+      row_num = rest < PNG_INFLATE_ROWS ? rest : PNG_INFLATE_ROWS;
15435bec5421Sopenharmony_ci+   }
15445bec5421Sopenharmony_ci+#endif
15455bec5421Sopenharmony_ci+
15465bec5421Sopenharmony_ci    /* Keep going until the decompressed data is all processed
15475bec5421Sopenharmony_ci     * or the stream marked as finished.
15485bec5421Sopenharmony_ci     */
15495bec5421Sopenharmony_ci@@ -655,9 +765,20 @@ png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer,
15505bec5421Sopenharmony_ci       if (!(png_ptr->zstream.avail_out > 0))
15515bec5421Sopenharmony_ci       {
15525bec5421Sopenharmony_ci          /* TODO: WARNING: TRUNCATION ERROR: DANGER WILL ROBINSON: */
15535bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
15545bec5421Sopenharmony_ci+         // OH ISSUE: png optimize
15555bec5421Sopenharmony_ci+         if (png_ptr->interlaced == 0 && png_ptr->bit_depth == 8 &&
15565bec5421Sopenharmony_ci+             (png_ptr->transformations & PNG_CHECK) == 0)
15575bec5421Sopenharmony_ci+         {
15585bec5421Sopenharmony_ci+            int rest = png_ptr->num_rows - png_ptr->row_number;
15595bec5421Sopenharmony_ci+            row_num = rest < PNG_INFLATE_ROWS ? rest : PNG_INFLATE_ROWS;
15605bec5421Sopenharmony_ci+         }
15615bec5421Sopenharmony_ci+         png_ptr->zstream.avail_out = (uInt)(PNG_ROWBYTES(png_ptr->pixel_depth,
15625bec5421Sopenharmony_ci+             png_ptr->iwidth) + 1) * row_num;
15635bec5421Sopenharmony_ci+#else
15645bec5421Sopenharmony_ci          png_ptr->zstream.avail_out = (uInt)(PNG_ROWBYTES(png_ptr->pixel_depth,
15655bec5421Sopenharmony_ci              png_ptr->iwidth) + 1);
15665bec5421Sopenharmony_ci-
15675bec5421Sopenharmony_ci+#endif
15685bec5421Sopenharmony_ci          png_ptr->zstream.next_out = png_ptr->row_buf;
15695bec5421Sopenharmony_ci       }
15705bec5421Sopenharmony_ci 
15715bec5421Sopenharmony_ci@@ -719,7 +840,12 @@ png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer,
15725bec5421Sopenharmony_ci 
15735bec5421Sopenharmony_ci          /* Do we have a complete row? */
15745bec5421Sopenharmony_ci          if (png_ptr->zstream.avail_out == 0)
15755bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
15765bec5421Sopenharmony_ci+            // OH ISSUE: png optimize
15775bec5421Sopenharmony_ci+            png_push_process_multi_rows(png_ptr, row_num);
15785bec5421Sopenharmony_ci+#else
15795bec5421Sopenharmony_ci             png_push_process_row(png_ptr);
15805bec5421Sopenharmony_ci+#endif
15815bec5421Sopenharmony_ci       }
15825bec5421Sopenharmony_ci 
15835bec5421Sopenharmony_ci       /* And check for the end of the stream. */
15845bec5421Sopenharmony_ci@@ -738,6 +864,7 @@ png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer,
15855bec5421Sopenharmony_ci void /* PRIVATE */
15865bec5421Sopenharmony_ci png_push_process_row(png_structrp png_ptr)
15875bec5421Sopenharmony_ci {
15885bec5421Sopenharmony_ci+   png_debug(1, "in png_push_process_row");
15895bec5421Sopenharmony_ci    /* 1.5.6: row_info moved out of png_struct to a local here. */
15905bec5421Sopenharmony_ci    png_row_info row_info;
15915bec5421Sopenharmony_ci 
15925bec5421Sopenharmony_ci@@ -762,8 +889,17 @@ png_push_process_row(png_structrp png_ptr)
15935bec5421Sopenharmony_ci     * it may not be in the future, so this was changed just to copy the
15945bec5421Sopenharmony_ci     * interlaced row count:
15955bec5421Sopenharmony_ci     */
15965bec5421Sopenharmony_ci-   memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1);
15975bec5421Sopenharmony_ci-
15985bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
15995bec5421Sopenharmony_ci+   // OH ISSUE: png optimize
16005bec5421Sopenharmony_ci+   if (png_ptr->transformations == 0 && png_ptr->interlaced == 0)
16015bec5421Sopenharmony_ci+   {
16025bec5421Sopenharmony_ci+      png_ptr->prev_row = png_ptr->row_buf;
16035bec5421Sopenharmony_ci+   }
16045bec5421Sopenharmony_ci+   else
16055bec5421Sopenharmony_ci+#endif
16065bec5421Sopenharmony_ci+   {
16075bec5421Sopenharmony_ci+      memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1);
16085bec5421Sopenharmony_ci+   }
16095bec5421Sopenharmony_ci #ifdef PNG_READ_TRANSFORMS_SUPPORTED
16105bec5421Sopenharmony_ci    if (png_ptr->transformations != 0)
16115bec5421Sopenharmony_ci       png_do_read_transformations(png_ptr, &row_info);
16125bec5421Sopenharmony_cidiff --git a/pngpriv.h b/pngpriv.h
16135bec5421Sopenharmony_ciindex fb521cf00..81300fbd8 100644
16145bec5421Sopenharmony_ci--- a/pngpriv.h
16155bec5421Sopenharmony_ci+++ b/pngpriv.h
16165bec5421Sopenharmony_ci@@ -189,6 +189,19 @@
16175bec5421Sopenharmony_ci #     define PNG_ARM_NEON_IMPLEMENTATION 0
16185bec5421Sopenharmony_ci #endif /* PNG_ARM_NEON_OPT > 0 */
16195bec5421Sopenharmony_ci 
16205bec5421Sopenharmony_ci+#if defined(PNG_ARM_NEON_IMPLEMENTATION) && defined(PNG_ARM_NEON)
16215bec5421Sopenharmony_ci+// OH ISSUE: png optimize
16225bec5421Sopenharmony_ci+#  if PNG_ARM_NEON_IMPLEMENTATION == 1
16235bec5421Sopenharmony_ci+#    define PNG_MULTY_LINE_ENABLE
16245bec5421Sopenharmony_ci+#    define PNG_INFLATE_MAX_SIZE (65536)
16255bec5421Sopenharmony_ci+#    define PNG_INFLATE_ROWS (50)
16265bec5421Sopenharmony_ci+#    define PNG_CHECK (PNG_EXPAND | PNG_STRIP_ALPHA | PNG_RGB_TO_GRAY | PNG_ENCODE_ALPHA | \
16275bec5421Sopenharmony_ci+       PNG_PACKSWAP | PNG_GRAY_TO_RGB | PNG_COMPOSE | PNG_SCALE_16_TO_8 | PNG_16_TO_8 | \
16285bec5421Sopenharmony_ci+       PNG_BACKGROUND_EXPAND | PNG_EXPAND_16 | PNG_PACK | PNG_ADD_ALPHA | PNG_EXPAND_tRNS | \
16295bec5421Sopenharmony_ci+       PNG_RGB_TO_GRAY_ERR | PNG_RGB_TO_GRAY_WARN | PNG_FILLER | PNG_USER_TRANSFORM)
16305bec5421Sopenharmony_ci+#  endif
16315bec5421Sopenharmony_ci+#endif
16325bec5421Sopenharmony_ci+
16335bec5421Sopenharmony_ci #ifndef PNG_MIPS_MSA_OPT
16345bec5421Sopenharmony_ci #  if defined(__mips_msa) && (__mips_isa_rev >= 5) && defined(PNG_ALIGNED_MEMORY_SUPPORTED)
16355bec5421Sopenharmony_ci #     define PNG_MIPS_MSA_OPT 2
16365bec5421Sopenharmony_ci@@ -351,8 +364,14 @@
16375bec5421Sopenharmony_ci #endif
16385bec5421Sopenharmony_ci 
16395bec5421Sopenharmony_ci #ifndef PNG_INTERNAL_FUNCTION
16405bec5421Sopenharmony_ci+// OH ISSUE: png optimize
16415bec5421Sopenharmony_ci+#  ifdef PNG_MULTY_LINE_ENABLE
16425bec5421Sopenharmony_ci+#    define PNG_HIDE __attribute__((visibility("hidden")))
16435bec5421Sopenharmony_ci+#  else
16445bec5421Sopenharmony_ci+#    define PNG_HIDE
16455bec5421Sopenharmony_ci+#  endif
16465bec5421Sopenharmony_ci #  define PNG_INTERNAL_FUNCTION(type, name, args, attributes)\
16475bec5421Sopenharmony_ci-      PNG_LINKAGE_FUNCTION PNG_FUNCTION(type, name, args, PNG_EMPTY attributes)
16485bec5421Sopenharmony_ci+      PNG_LINKAGE_FUNCTION PNG_FUNCTION(type, name, args, PNG_HIDE attributes)
16495bec5421Sopenharmony_ci #endif
16505bec5421Sopenharmony_ci 
16515bec5421Sopenharmony_ci #ifndef PNG_INTERNAL_CALLBACK
16525bec5421Sopenharmony_ci@@ -1304,6 +1323,19 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_neon,(png_row_infop
16535bec5421Sopenharmony_ci     row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
16545bec5421Sopenharmony_ci PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_neon,(png_row_infop
16555bec5421Sopenharmony_ci     row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
16565bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
16575bec5421Sopenharmony_ci+// OH ISSUE: png optimize
16585bec5421Sopenharmony_ci+PNG_INTERNAL_FUNCTION(void, png_read_filter_row_up_x2_neon, (png_row_infop
16595bec5421Sopenharmony_ci+    row_info, png_bytep row, png_const_bytep prev_row), PNG_EMPTY);
16605bec5421Sopenharmony_ci+PNG_INTERNAL_FUNCTION(void, png_read_filter_row_avg3_x2_neon, (png_row_infop
16615bec5421Sopenharmony_ci+    row_info, png_bytep row, png_const_bytep prev_row), PNG_EMPTY);
16625bec5421Sopenharmony_ci+PNG_INTERNAL_FUNCTION(void, png_read_filter_row_avg4_x2_neon, (png_row_infop
16635bec5421Sopenharmony_ci+    row_info, png_bytep row, png_const_bytep prev_row), PNG_EMPTY);
16645bec5421Sopenharmony_ci+PNG_INTERNAL_FUNCTION(void, png_read_filter_row_paeth3_x2_neon, (png_row_infop
16655bec5421Sopenharmony_ci+    row_info, png_bytep row, png_const_bytep prev_row), PNG_EMPTY);
16665bec5421Sopenharmony_ci+PNG_INTERNAL_FUNCTION(void, png_read_filter_row_paeth4_x2_neon, (png_row_infop
16675bec5421Sopenharmony_ci+    row_info, png_bytep row, png_const_bytep prev_row), PNG_EMPTY);
16685bec5421Sopenharmony_ci+#endif
16695bec5421Sopenharmony_ci #endif
16705bec5421Sopenharmony_ci 
16715bec5421Sopenharmony_ci #if PNG_MIPS_MSA_OPT > 0
16725bec5421Sopenharmony_cidiff --git a/pngread.c b/pngread.c
16735bec5421Sopenharmony_ciindex 8fa7d9f16..ed5a25307 100644
16745bec5421Sopenharmony_ci--- a/pngread.c
16755bec5421Sopenharmony_ci+++ b/pngread.c
16765bec5421Sopenharmony_ci@@ -54,7 +54,12 @@ png_create_read_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr,
16775bec5421Sopenharmony_ci        * required (it will be zero in a write structure.)
16785bec5421Sopenharmony_ci        */
16795bec5421Sopenharmony_ci #     ifdef PNG_SEQUENTIAL_READ_SUPPORTED
16805bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
16815bec5421Sopenharmony_ci+         // OH ISSUE: png optimize
16825bec5421Sopenharmony_ci+         png_ptr->IDAT_read_size = PNG_INFLATE_MAX_SIZE;
16835bec5421Sopenharmony_ci+#else
16845bec5421Sopenharmony_ci          png_ptr->IDAT_read_size = PNG_IDAT_READ_SIZE;
16855bec5421Sopenharmony_ci+#endif
16865bec5421Sopenharmony_ci #     endif
16875bec5421Sopenharmony_ci 
16885bec5421Sopenharmony_ci #     ifdef PNG_BENIGN_READ_ERRORS_SUPPORTED
16895bec5421Sopenharmony_ci@@ -684,6 +689,224 @@ png_read_rows(png_structrp png_ptr, png_bytepp row,
16905bec5421Sopenharmony_ci #endif /* SEQUENTIAL_READ */
16915bec5421Sopenharmony_ci 
16925bec5421Sopenharmony_ci #ifdef PNG_SEQUENTIAL_READ_SUPPORTED
16935bec5421Sopenharmony_ci+
16945bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
16955bec5421Sopenharmony_ci+// OH ISSUE: png optimize
16965bec5421Sopenharmony_ci+static void png_read_two_rows(png_structrp png_ptr, png_bytepp rows, png_uint_32 i,
16975bec5421Sopenharmony_ci+                         png_row_info row_info)
16985bec5421Sopenharmony_ci+{
16995bec5421Sopenharmony_ci+   png_debug1(1, "in png_read_two_rows %d", png_ptr->row_buf[0]);
17005bec5421Sopenharmony_ci+   png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1,
17015bec5421Sopenharmony_ci+      png_ptr->prev_row + 1, png_ptr->row_buf[0] + 4);
17025bec5421Sopenharmony_ci+
17035bec5421Sopenharmony_ci+#ifdef PNG_MNG_FEATURES_SUPPORTED
17045bec5421Sopenharmony_ci+   if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
17055bec5421Sopenharmony_ci+      (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
17065bec5421Sopenharmony_ci+   {
17075bec5421Sopenharmony_ci+      /* Intrapixel differencing */
17085bec5421Sopenharmony_ci+      png_do_read_intrapixel(&row_info, png_ptr->row_buf + 1);
17095bec5421Sopenharmony_ci+   }
17105bec5421Sopenharmony_ci+#endif
17115bec5421Sopenharmony_ci+
17125bec5421Sopenharmony_ci+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
17135bec5421Sopenharmony_ci+   if (png_ptr->transformations
17145bec5421Sopenharmony_ci+#       ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
17155bec5421Sopenharmony_ci+         || png_ptr->num_palette_max >= 0
17165bec5421Sopenharmony_ci+#       endif
17175bec5421Sopenharmony_ci+      )
17185bec5421Sopenharmony_ci+      png_do_read_transformations(png_ptr, &row_info);
17195bec5421Sopenharmony_ci+#endif
17205bec5421Sopenharmony_ci+
17215bec5421Sopenharmony_ci+   /* The transformed pixel depth should match the depth now in row_info. */
17225bec5421Sopenharmony_ci+   if (png_ptr->transformed_pixel_depth == 0)
17235bec5421Sopenharmony_ci+   {
17245bec5421Sopenharmony_ci+      png_ptr->transformed_pixel_depth = row_info.pixel_depth;
17255bec5421Sopenharmony_ci+      if (row_info.pixel_depth > png_ptr->maximum_pixel_depth)
17265bec5421Sopenharmony_ci+         png_error(png_ptr, "sequential row overflow");
17275bec5421Sopenharmony_ci+   }
17285bec5421Sopenharmony_ci+
17295bec5421Sopenharmony_ci+   else if (png_ptr->transformed_pixel_depth != row_info.pixel_depth)
17305bec5421Sopenharmony_ci+      png_error(png_ptr, "internal sequential row size calculation error");
17315bec5421Sopenharmony_ci+
17325bec5421Sopenharmony_ci+   if (rows[i] != NULL)
17335bec5421Sopenharmony_ci+      png_combine_row(png_ptr, rows[i], -1);
17345bec5421Sopenharmony_ci+
17355bec5421Sopenharmony_ci+   png_read_finish_row(png_ptr);
17365bec5421Sopenharmony_ci+
17375bec5421Sopenharmony_ci+   if (png_ptr->read_row_fn != NULL)
17385bec5421Sopenharmony_ci+      (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
17395bec5421Sopenharmony_ci+
17405bec5421Sopenharmony_ci+   png_ptr->row_buf = png_ptr->row_buf + row_info.rowbytes + 1;
17415bec5421Sopenharmony_ci+
17425bec5421Sopenharmony_ci+   // do again next line
17435bec5421Sopenharmony_ci+   memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1);
17445bec5421Sopenharmony_ci+
17455bec5421Sopenharmony_ci+#ifdef PNG_MNG_FEATURES_SUPPORTED
17465bec5421Sopenharmony_ci+   if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
17475bec5421Sopenharmony_ci+      (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
17485bec5421Sopenharmony_ci+   {
17495bec5421Sopenharmony_ci+      /* Intrapixel differencing */
17505bec5421Sopenharmony_ci+      png_do_read_intrapixel(&row_info, png_ptr->row_buf + 1);
17515bec5421Sopenharmony_ci+   }
17525bec5421Sopenharmony_ci+#endif
17535bec5421Sopenharmony_ci+
17545bec5421Sopenharmony_ci+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
17555bec5421Sopenharmony_ci+   if (png_ptr->transformations
17565bec5421Sopenharmony_ci+#       ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
17575bec5421Sopenharmony_ci+         || png_ptr->num_palette_max >= 0
17585bec5421Sopenharmony_ci+#       endif
17595bec5421Sopenharmony_ci+      )
17605bec5421Sopenharmony_ci+      png_do_read_transformations(png_ptr, &row_info);
17615bec5421Sopenharmony_ci+#endif
17625bec5421Sopenharmony_ci+
17635bec5421Sopenharmony_ci+   /* The transformed pixel depth should match the depth now in row_info. */
17645bec5421Sopenharmony_ci+   if (png_ptr->transformed_pixel_depth == 0)
17655bec5421Sopenharmony_ci+   {
17665bec5421Sopenharmony_ci+      png_ptr->transformed_pixel_depth = row_info.pixel_depth;
17675bec5421Sopenharmony_ci+      if (row_info.pixel_depth > png_ptr->maximum_pixel_depth)
17685bec5421Sopenharmony_ci+         png_error(png_ptr, "sequential row overflow");
17695bec5421Sopenharmony_ci+   }
17705bec5421Sopenharmony_ci+
17715bec5421Sopenharmony_ci+   else if (png_ptr->transformed_pixel_depth != row_info.pixel_depth)
17725bec5421Sopenharmony_ci+      png_error(png_ptr, "internal sequential row size calculation error");
17735bec5421Sopenharmony_ci+
17745bec5421Sopenharmony_ci+   if (rows[i+1] != NULL)
17755bec5421Sopenharmony_ci+      png_combine_row(png_ptr, rows[i+1], -1);
17765bec5421Sopenharmony_ci+
17775bec5421Sopenharmony_ci+   png_read_finish_row(png_ptr);
17785bec5421Sopenharmony_ci+
17795bec5421Sopenharmony_ci+   if (png_ptr->read_row_fn != NULL)
17805bec5421Sopenharmony_ci+      (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
17815bec5421Sopenharmony_ci+
17825bec5421Sopenharmony_ci+   png_ptr->row_buf = png_ptr->row_buf + row_info.rowbytes + 1;
17835bec5421Sopenharmony_ci+}
17845bec5421Sopenharmony_ci+
17855bec5421Sopenharmony_ci+static void png_read_muilty_rows(png_structrp png_ptr, png_bytepp rows,
17865bec5421Sopenharmony_ci+   png_uint_32 row_num, png_row_info row_info_in)
17875bec5421Sopenharmony_ci+{
17885bec5421Sopenharmony_ci+   if (png_ptr == NULL)
17895bec5421Sopenharmony_ci+      return;
17905bec5421Sopenharmony_ci+
17915bec5421Sopenharmony_ci+   png_debug2(1, "in png_read_muilty_rows (row %lu, pass %d)",
17925bec5421Sopenharmony_ci+       (unsigned long)png_ptr->row_number, png_ptr->pass);
17935bec5421Sopenharmony_ci+
17945bec5421Sopenharmony_ci+   if ((png_ptr->mode & PNG_HAVE_IDAT) == 0)
17955bec5421Sopenharmony_ci+         png_error(png_ptr, "Invalid attempt to read row data");
17965bec5421Sopenharmony_ci+
17975bec5421Sopenharmony_ci+   /* Fill the row with IDAT data: */
17985bec5421Sopenharmony_ci+   uInt row_bytes =  row_info_in.rowbytes;
17995bec5421Sopenharmony_ci+   png_ptr->row_buf[0]=255; /* 255 to force error if no data was found */
18005bec5421Sopenharmony_ci+   png_read_IDAT_data(png_ptr, png_ptr->row_buf, (row_bytes + 1) * row_num);
18015bec5421Sopenharmony_ci+   png_bytep temp_row = png_ptr->row_buf;
18025bec5421Sopenharmony_ci+
18035bec5421Sopenharmony_ci+   for (png_uint_32 i = 0; i < row_num; i++) {
18045bec5421Sopenharmony_ci+      png_row_info row_info = row_info_in;
18055bec5421Sopenharmony_ci+      // check if the x2_filter is effective: only supports channels 3 or 4
18065bec5421Sopenharmony_ci+      if ((row_info_in.channels == 3 || row_info_in.channels == 4) &&
18075bec5421Sopenharmony_ci+          i < row_num -1 && png_ptr->row_buf[0] > PNG_FILTER_VALUE_SUB &&
18085bec5421Sopenharmony_ci+          png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST &&
18095bec5421Sopenharmony_ci+          png_ptr->row_buf[0] == png_ptr->row_buf[row_info_in.rowbytes + 1])
18105bec5421Sopenharmony_ci+      {
18115bec5421Sopenharmony_ci+         png_read_two_rows(png_ptr, rows, i, row_info);
18125bec5421Sopenharmony_ci+         i++;
18135bec5421Sopenharmony_ci+         continue;
18145bec5421Sopenharmony_ci+      }
18155bec5421Sopenharmony_ci+      if (png_ptr->row_buf[0] > PNG_FILTER_VALUE_NONE)
18165bec5421Sopenharmony_ci+      {
18175bec5421Sopenharmony_ci+         if (png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST)
18185bec5421Sopenharmony_ci+            png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1,
18195bec5421Sopenharmony_ci+               png_ptr->prev_row + 1, png_ptr->row_buf[0]);
18205bec5421Sopenharmony_ci+         else
18215bec5421Sopenharmony_ci+            png_debug1(1, "bad adaptive filter value %d", png_ptr->row_buf[0]);
18225bec5421Sopenharmony_ci+      }
18235bec5421Sopenharmony_ci+
18245bec5421Sopenharmony_ci+      memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info_in.rowbytes + 1);
18255bec5421Sopenharmony_ci+
18265bec5421Sopenharmony_ci+#ifdef PNG_MNG_FEATURES_SUPPORTED
18275bec5421Sopenharmony_ci+      if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
18285bec5421Sopenharmony_ci+         (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
18295bec5421Sopenharmony_ci+      {
18305bec5421Sopenharmony_ci+         /* Intrapixel differencing */
18315bec5421Sopenharmony_ci+         png_do_read_intrapixel(&row_info, png_ptr->row_buf + 1);
18325bec5421Sopenharmony_ci+      }
18335bec5421Sopenharmony_ci+#endif
18345bec5421Sopenharmony_ci+
18355bec5421Sopenharmony_ci+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
18365bec5421Sopenharmony_ci+      if (png_ptr->transformations
18375bec5421Sopenharmony_ci+#        ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
18385bec5421Sopenharmony_ci+            || png_ptr->num_palette_max >= 0
18395bec5421Sopenharmony_ci+#        endif
18405bec5421Sopenharmony_ci+         )
18415bec5421Sopenharmony_ci+         png_do_read_transformations(png_ptr, &row_info);
18425bec5421Sopenharmony_ci+#endif
18435bec5421Sopenharmony_ci+
18445bec5421Sopenharmony_ci+      /* The transformed pixel depth should match the depth now in row_info. */
18455bec5421Sopenharmony_ci+      if (png_ptr->transformed_pixel_depth == 0)
18465bec5421Sopenharmony_ci+      {
18475bec5421Sopenharmony_ci+         png_ptr->transformed_pixel_depth = row_info.pixel_depth;
18485bec5421Sopenharmony_ci+         if (row_info.pixel_depth > png_ptr->maximum_pixel_depth)
18495bec5421Sopenharmony_ci+            png_error(png_ptr, "sequential row overflow");
18505bec5421Sopenharmony_ci+      }
18515bec5421Sopenharmony_ci+
18525bec5421Sopenharmony_ci+      else if (png_ptr->transformed_pixel_depth != row_info.pixel_depth)
18535bec5421Sopenharmony_ci+         png_error(png_ptr, "internal sequential row size calculation error");
18545bec5421Sopenharmony_ci+
18555bec5421Sopenharmony_ci+      if (rows[i] != NULL)
18565bec5421Sopenharmony_ci+         png_combine_row(png_ptr, rows[i], -1);
18575bec5421Sopenharmony_ci+
18585bec5421Sopenharmony_ci+      png_read_finish_row(png_ptr);
18595bec5421Sopenharmony_ci+
18605bec5421Sopenharmony_ci+      if (png_ptr->read_row_fn != NULL)
18615bec5421Sopenharmony_ci+         (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
18625bec5421Sopenharmony_ci+
18635bec5421Sopenharmony_ci+      png_ptr->row_buf = png_ptr->row_buf + row_bytes + 1;
18645bec5421Sopenharmony_ci+   }
18655bec5421Sopenharmony_ci+   png_ptr->row_buf = temp_row;
18665bec5421Sopenharmony_ci+}
18675bec5421Sopenharmony_ci+
18685bec5421Sopenharmony_ci+static void png_warn_check(png_structrp png_ptr)
18695bec5421Sopenharmony_ci+{
18705bec5421Sopenharmony_ci+#ifdef PNG_WARNINGS_SUPPORTED
18715bec5421Sopenharmony_ci+   /* Check for transforms that have been set but were defined out */
18725bec5421Sopenharmony_ci+#if defined(PNG_WRITE_INVERT_SUPPORTED) && !defined(PNG_READ_INVERT_SUPPORTED)
18735bec5421Sopenharmony_ci+   if ((png_ptr->transformations & PNG_INVERT_MONO) != 0)
18745bec5421Sopenharmony_ci+      png_warning(png_ptr, "PNG_READ_INVERT_SUPPORTED is not defined");
18755bec5421Sopenharmony_ci+#endif
18765bec5421Sopenharmony_ci+
18775bec5421Sopenharmony_ci+#if defined(PNG_WRITE_FILLER_SUPPORTED) && !defined(PNG_READ_FILLER_SUPPORTED)
18785bec5421Sopenharmony_ci+   if ((png_ptr->transformations & PNG_FILLER) != 0)
18795bec5421Sopenharmony_ci+      png_warning(png_ptr, "PNG_READ_FILLER_SUPPORTED is not defined");
18805bec5421Sopenharmony_ci+#endif
18815bec5421Sopenharmony_ci+
18825bec5421Sopenharmony_ci+#if defined(PNG_WRITE_PACKSWAP_SUPPORTED) && \
18835bec5421Sopenharmony_ci+    !defined(PNG_READ_PACKSWAP_SUPPORTED)
18845bec5421Sopenharmony_ci+   if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
18855bec5421Sopenharmony_ci+      png_warning(png_ptr, "PNG_READ_PACKSWAP_SUPPORTED is not defined");
18865bec5421Sopenharmony_ci+#endif
18875bec5421Sopenharmony_ci+
18885bec5421Sopenharmony_ci+#if defined(PNG_WRITE_PACK_SUPPORTED) && !defined(PNG_READ_PACK_SUPPORTED)
18895bec5421Sopenharmony_ci+   if ((png_ptr->transformations & PNG_PACK) != 0)
18905bec5421Sopenharmony_ci+      png_warning(png_ptr, "PNG_READ_PACK_SUPPORTED is not defined");
18915bec5421Sopenharmony_ci+#endif
18925bec5421Sopenharmony_ci+
18935bec5421Sopenharmony_ci+#if defined(PNG_WRITE_SHIFT_SUPPORTED) && !defined(PNG_READ_SHIFT_SUPPORTED)
18945bec5421Sopenharmony_ci+   if ((png_ptr->transformations & PNG_SHIFT) != 0)
18955bec5421Sopenharmony_ci+      png_warning(png_ptr, "PNG_READ_SHIFT_SUPPORTED is not defined");
18965bec5421Sopenharmony_ci+#endif
18975bec5421Sopenharmony_ci+
18985bec5421Sopenharmony_ci+#if defined(PNG_WRITE_BGR_SUPPORTED) && !defined(PNG_READ_BGR_SUPPORTED)
18995bec5421Sopenharmony_ci+   if ((png_ptr->transformations & PNG_BGR) != 0)
19005bec5421Sopenharmony_ci+      png_warning(png_ptr, "PNG_READ_BGR_SUPPORTED is not defined");
19015bec5421Sopenharmony_ci+#endif
19025bec5421Sopenharmony_ci+
19035bec5421Sopenharmony_ci+#if defined(PNG_WRITE_SWAP_SUPPORTED) && !defined(PNG_READ_SWAP_SUPPORTED)
19045bec5421Sopenharmony_ci+   if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0)
19055bec5421Sopenharmony_ci+      png_warning(png_ptr, "PNG_READ_SWAP_SUPPORTED is not defined");
19065bec5421Sopenharmony_ci+#endif
19075bec5421Sopenharmony_ci+#endif /* WARNINGS */
19085bec5421Sopenharmony_ci+}
19095bec5421Sopenharmony_ci+#endif // PNG_MULTY_LINE_ENABLE
19105bec5421Sopenharmony_ci+
19115bec5421Sopenharmony_ci /* Read the entire image.  If the image has an alpha channel or a tRNS
19125bec5421Sopenharmony_ci  * chunk, and you have called png_handle_alpha()[*], you will need to
19135bec5421Sopenharmony_ci  * initialize the image to the current image that PNG will be overlaying.
19145bec5421Sopenharmony_ci@@ -745,13 +968,45 @@ png_read_image(png_structrp png_ptr, png_bytepp image)
19155bec5421Sopenharmony_ci 
19165bec5421Sopenharmony_ci    image_height=png_ptr->height;
19175bec5421Sopenharmony_ci 
19185bec5421Sopenharmony_ci-   for (j = 0; j < pass; j++)
19195bec5421Sopenharmony_ci-   {
19205bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
19215bec5421Sopenharmony_ci+   // OH ISSUE: png optimize
19225bec5421Sopenharmony_ci+   if (png_ptr->interlaced == 0 && png_ptr->bit_depth == 8 &&
19235bec5421Sopenharmony_ci+       (png_ptr->transformations & PNG_CHECK) == 0) {
19245bec5421Sopenharmony_ci+      if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0)
19255bec5421Sopenharmony_ci+         png_read_start_row(png_ptr);
19265bec5421Sopenharmony_ci+
19275bec5421Sopenharmony_ci+      png_warn_check(png_ptr);
19285bec5421Sopenharmony_ci+      png_row_info row_info;
19295bec5421Sopenharmony_ci+      row_info.width = png_ptr->iwidth;
19305bec5421Sopenharmony_ci+      row_info.color_type = png_ptr->color_type;
19315bec5421Sopenharmony_ci+      row_info.bit_depth = png_ptr->bit_depth;
19325bec5421Sopenharmony_ci+      row_info.channels = png_ptr->channels;
19335bec5421Sopenharmony_ci+      row_info.pixel_depth = png_ptr->pixel_depth;
19345bec5421Sopenharmony_ci+      row_info.rowbytes = png_ptr->rowbytes;
19355bec5421Sopenharmony_ci+
19365bec5421Sopenharmony_ci       rp = image;
19375bec5421Sopenharmony_ci-      for (i = 0; i < image_height; i++)
19385bec5421Sopenharmony_ci+      int row_num = PNG_INFLATE_ROWS;
19395bec5421Sopenharmony_ci+      for (i = 0; i < image_height; i += PNG_INFLATE_ROWS)
19405bec5421Sopenharmony_ci       {
19415bec5421Sopenharmony_ci-         png_read_row(png_ptr, *rp, NULL);
19425bec5421Sopenharmony_ci-         rp++;
19435bec5421Sopenharmony_ci+         if (image_height - i < PNG_INFLATE_ROWS)
19445bec5421Sopenharmony_ci+         {
19455bec5421Sopenharmony_ci+            row_num = image_height - i;
19465bec5421Sopenharmony_ci+         }
19475bec5421Sopenharmony_ci+         png_read_muilty_rows(png_ptr, rp, row_num, row_info);
19485bec5421Sopenharmony_ci+         rp += row_num;
19495bec5421Sopenharmony_ci+      }
19505bec5421Sopenharmony_ci+   }
19515bec5421Sopenharmony_ci+   else
19525bec5421Sopenharmony_ci+#endif
19535bec5421Sopenharmony_ci+   {
19545bec5421Sopenharmony_ci+      for (j = 0; j < pass; j++)
19555bec5421Sopenharmony_ci+      {
19565bec5421Sopenharmony_ci+         rp = image;
19575bec5421Sopenharmony_ci+         for (i = 0; i < image_height; i++)
19585bec5421Sopenharmony_ci+         {
19595bec5421Sopenharmony_ci+            png_read_row(png_ptr, *rp, NULL);
19605bec5421Sopenharmony_ci+            rp++;
19615bec5421Sopenharmony_ci+         }
19625bec5421Sopenharmony_ci       }
19635bec5421Sopenharmony_ci    }
19645bec5421Sopenharmony_ci }
19655bec5421Sopenharmony_cidiff --git a/pngrutil.c b/pngrutil.c
19665bec5421Sopenharmony_ciindex 9ac8ec11f..f9c65927d 100644
19675bec5421Sopenharmony_ci--- a/pngrutil.c
19685bec5421Sopenharmony_ci+++ b/pngrutil.c
19695bec5421Sopenharmony_ci@@ -4134,7 +4134,12 @@ png_read_filter_row(png_structrp pp, png_row_infop row_info, png_bytep row,
19705bec5421Sopenharmony_ci     * PNG_FILTER_OPTIMIZATIONS to a function that overrides the generic
19715bec5421Sopenharmony_ci     * implementations.  See png_init_filter_functions above.
19725bec5421Sopenharmony_ci     */
19735bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
19745bec5421Sopenharmony_ci+   // OH ISSUE: png optimize
19755bec5421Sopenharmony_ci+   if (filter > PNG_FILTER_VALUE_NONE && filter < PNG_FILTER_VALUE_LAST_X2)
19765bec5421Sopenharmony_ci+#else
19775bec5421Sopenharmony_ci    if (filter > PNG_FILTER_VALUE_NONE && filter < PNG_FILTER_VALUE_LAST)
19785bec5421Sopenharmony_ci+#endif
19795bec5421Sopenharmony_ci    {
19805bec5421Sopenharmony_ci       if (pp->read_filter[0] == NULL)
19815bec5421Sopenharmony_ci          png_init_filter_functions(pp);
19825bec5421Sopenharmony_ci@@ -4606,7 +4611,24 @@ defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
19835bec5421Sopenharmony_ci              row_bytes + 48);
19845bec5421Sopenharmony_ci 
19855bec5421Sopenharmony_ci       else
19865bec5421Sopenharmony_ci+      {
19875bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
19885bec5421Sopenharmony_ci+         // OH ISSUE: png optimize
19895bec5421Sopenharmony_ci+         png_uint_32 row_num = 1;
19905bec5421Sopenharmony_ci+         if (png_ptr->bit_depth == 8 &&
19915bec5421Sopenharmony_ci+             (png_ptr->transformations & PNG_CHECK) == 0)
19925bec5421Sopenharmony_ci+         {
19935bec5421Sopenharmony_ci+            row_num = png_ptr->height < PNG_INFLATE_ROWS ?
19945bec5421Sopenharmony_ci+               png_ptr->height : PNG_INFLATE_ROWS;
19955bec5421Sopenharmony_ci+         }
19965bec5421Sopenharmony_ci+         png_ptr->big_row_buf = (png_bytep)png_malloc(
19975bec5421Sopenharmony_ci+            png_ptr, row_bytes * row_num + 48);
19985bec5421Sopenharmony_ci+         if (png_ptr->big_row_buf == NULL)
19995bec5421Sopenharmony_ci+            png_error(png_ptr, "png_malloc failed");
20005bec5421Sopenharmony_ci+#else
20015bec5421Sopenharmony_ci          png_ptr->big_row_buf = (png_bytep)png_malloc(png_ptr, row_bytes + 48);
20025bec5421Sopenharmony_ci+#endif
20035bec5421Sopenharmony_ci+      }
20045bec5421Sopenharmony_ci 
20055bec5421Sopenharmony_ci       png_ptr->big_prev_row = (png_bytep)png_malloc(png_ptr, row_bytes + 48);
20065bec5421Sopenharmony_ci 
20075bec5421Sopenharmony_cidiff --git a/pngstruct.h b/pngstruct.h
20085bec5421Sopenharmony_ciindex e591d94d5..7c3846475 100644
20095bec5421Sopenharmony_ci--- a/pngstruct.h
20105bec5421Sopenharmony_ci+++ b/pngstruct.h
20115bec5421Sopenharmony_ci@@ -140,6 +140,14 @@ typedef const png_colorspace * PNG_RESTRICT png_const_colorspacerp;
20125bec5421Sopenharmony_ci #define PNG_COLORSPACE_CANCEL(flags)        (0xffff ^ (flags))
20135bec5421Sopenharmony_ci #endif /* COLORSPACE || GAMMA */
20145bec5421Sopenharmony_ci 
20155bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
20165bec5421Sopenharmony_ci+// OH ISSUE: png optimize
20175bec5421Sopenharmony_ci+#define PNG_FILTER_VALUE_UP_X2      (6) // PNG_FILTER_VALUE_UP + 4
20185bec5421Sopenharmony_ci+#define PNG_FILTER_VALUE_AVG_X2     (7) // PNG_FILTER_VALUE_AVG + 4
20195bec5421Sopenharmony_ci+#define PNG_FILTER_VALUE_PAETH_X2   (8) // PNG_FILTER_VALUE_PAETH + 4
20205bec5421Sopenharmony_ci+#define PNG_FILTER_VALUE_LAST_X2    (9) // PNG_FILTER_VALUE_LAST + 4
20215bec5421Sopenharmony_ci+#endif
20225bec5421Sopenharmony_ci+
20235bec5421Sopenharmony_ci struct png_struct_def
20245bec5421Sopenharmony_ci {
20255bec5421Sopenharmony_ci #ifdef PNG_SETJMP_SUPPORTED
20265bec5421Sopenharmony_ci@@ -467,8 +475,14 @@ struct png_struct_def
20275bec5421Sopenharmony_ci    png_bytep big_prev_row;
20285bec5421Sopenharmony_ci 
20295bec5421Sopenharmony_ci /* New member added in libpng-1.5.7 */
20305bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
20315bec5421Sopenharmony_ci+   // OH ISSUE: png optimize
20325bec5421Sopenharmony_ci+   void (*read_filter[PNG_FILTER_VALUE_LAST_X2 - 1])(png_row_infop row_info,
20335bec5421Sopenharmony_ci+      png_bytep row, png_const_bytep prev_row);
20345bec5421Sopenharmony_ci+#else
20355bec5421Sopenharmony_ci    void (*read_filter[PNG_FILTER_VALUE_LAST-1])(png_row_infop row_info,
20365bec5421Sopenharmony_ci       png_bytep row, png_const_bytep prev_row);
20375bec5421Sopenharmony_ci+#endif
20385bec5421Sopenharmony_ci 
20395bec5421Sopenharmony_ci #ifdef PNG_READ_SUPPORTED
20405bec5421Sopenharmony_ci #if defined(PNG_COLORSPACE_SUPPORTED) || defined(PNG_GAMMA_SUPPORTED)
20415bec5421Sopenharmony_cidiff --git a/pngtrans.c b/pngtrans.c
20425bec5421Sopenharmony_ciindex 1100f46eb..9addf3423 100644
20435bec5421Sopenharmony_ci--- a/pngtrans.c
20445bec5421Sopenharmony_ci+++ b/pngtrans.c
20455bec5421Sopenharmony_ci@@ -13,6 +13,19 @@
20465bec5421Sopenharmony_ci 
20475bec5421Sopenharmony_ci #include "pngpriv.h"
20485bec5421Sopenharmony_ci 
20495bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
20505bec5421Sopenharmony_ci+#  if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64)
20515bec5421Sopenharmony_ci+#    include <arm64_neon.h>
20525bec5421Sopenharmony_ci+#  else
20535bec5421Sopenharmony_ci+#    include <arm_neon.h>
20545bec5421Sopenharmony_ci+#  endif
20555bec5421Sopenharmony_ci+#  define STEP_GRAY (16)
20565bec5421Sopenharmony_ci+#  define STEP_GA (32)
20575bec5421Sopenharmony_ci+#  define STEP_RGB (48)
20585bec5421Sopenharmony_ci+#  define STEP_RGBA (64)
20595bec5421Sopenharmony_ci+#  define INDEX2 (2)
20605bec5421Sopenharmony_ci+#endif
20615bec5421Sopenharmony_ci+
20625bec5421Sopenharmony_ci #if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
20635bec5421Sopenharmony_ci 
20645bec5421Sopenharmony_ci #if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
20655bec5421Sopenharmony_ci@@ -269,13 +282,19 @@ png_do_invert(png_row_infop row_info, png_bytep row)
20665bec5421Sopenharmony_ci    if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
20675bec5421Sopenharmony_ci    {
20685bec5421Sopenharmony_ci       png_bytep rp = row;
20695bec5421Sopenharmony_ci-      size_t i;
20705bec5421Sopenharmony_ci-      size_t istop = row_info->rowbytes;
20715bec5421Sopenharmony_ci-
20725bec5421Sopenharmony_ci-      for (i = 0; i < istop; i++)
20735bec5421Sopenharmony_ci+      png_bytep rp_stop = row + row_info->rowbytes;
20745bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
20755bec5421Sopenharmony_ci+      png_bytep rp_stop_neon = rp_stop - STEP_GRAY;
20765bec5421Sopenharmony_ci+      for (; rp < rp_stop_neon; rp += STEP_GRAY)
20775bec5421Sopenharmony_ci+      {
20785bec5421Sopenharmony_ci+         uint8x16_t gray = vld1q_u8(rp);
20795bec5421Sopenharmony_ci+         gray = ~gray;
20805bec5421Sopenharmony_ci+         vst1q_u8(rp, gray);
20815bec5421Sopenharmony_ci+      }
20825bec5421Sopenharmony_ci+#endif
20835bec5421Sopenharmony_ci+      for (; rp < rp_stop; rp++)
20845bec5421Sopenharmony_ci       {
20855bec5421Sopenharmony_ci          *rp = (png_byte)(~(*rp));
20865bec5421Sopenharmony_ci-         rp++;
20875bec5421Sopenharmony_ci       }
20885bec5421Sopenharmony_ci    }
20895bec5421Sopenharmony_ci 
20905bec5421Sopenharmony_ci@@ -283,13 +302,19 @@ png_do_invert(png_row_infop row_info, png_bytep row)
20915bec5421Sopenharmony_ci       row_info->bit_depth == 8)
20925bec5421Sopenharmony_ci    {
20935bec5421Sopenharmony_ci       png_bytep rp = row;
20945bec5421Sopenharmony_ci-      size_t i;
20955bec5421Sopenharmony_ci-      size_t istop = row_info->rowbytes;
20965bec5421Sopenharmony_ci-
20975bec5421Sopenharmony_ci-      for (i = 0; i < istop; i += 2)
20985bec5421Sopenharmony_ci+      png_bytep rp_stop = row + row_info->rowbytes;
20995bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
21005bec5421Sopenharmony_ci+      png_bytep rp_stop_neon = rp_stop - STEP_GA;
21015bec5421Sopenharmony_ci+      for (; rp < rp_stop_neon; rp += STEP_GA)
21025bec5421Sopenharmony_ci+      {
21035bec5421Sopenharmony_ci+         uint8x16x2_t gray_alpha = vld2q_u8(rp);
21045bec5421Sopenharmony_ci+         gray_alpha.val[0] = ~gray_alpha.val[0];
21055bec5421Sopenharmony_ci+         vst2q_u8(rp, gray_alpha);
21065bec5421Sopenharmony_ci+      }
21075bec5421Sopenharmony_ci+#endif
21085bec5421Sopenharmony_ci+      for (; rp < rp_stop; rp += 2)
21095bec5421Sopenharmony_ci       {
21105bec5421Sopenharmony_ci          *rp = (png_byte)(~(*rp));
21115bec5421Sopenharmony_ci-         rp += 2;
21125bec5421Sopenharmony_ci       }
21135bec5421Sopenharmony_ci    }
21145bec5421Sopenharmony_ci 
21155bec5421Sopenharmony_ci@@ -298,14 +323,21 @@ png_do_invert(png_row_infop row_info, png_bytep row)
21165bec5421Sopenharmony_ci       row_info->bit_depth == 16)
21175bec5421Sopenharmony_ci    {
21185bec5421Sopenharmony_ci       png_bytep rp = row;
21195bec5421Sopenharmony_ci-      size_t i;
21205bec5421Sopenharmony_ci-      size_t istop = row_info->rowbytes;
21215bec5421Sopenharmony_ci-
21225bec5421Sopenharmony_ci-      for (i = 0; i < istop; i += 4)
21235bec5421Sopenharmony_ci+      png_bytep rp_stop = row + row_info->rowbytes;
21245bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
21255bec5421Sopenharmony_ci+      png_bytep rp_stop_neon = rp_stop - STEP_RGBA;
21265bec5421Sopenharmony_ci+      for (; rp < rp_stop_neon; rp += STEP_RGBA)
21275bec5421Sopenharmony_ci+      {
21285bec5421Sopenharmony_ci+         uint8x16x4_t gray_alpha = vld4q_u8(rp);
21295bec5421Sopenharmony_ci+         gray_alpha.val[0] = ~gray_alpha.val[0];
21305bec5421Sopenharmony_ci+         gray_alpha.val[1] = ~gray_alpha.val[1];
21315bec5421Sopenharmony_ci+         vst4q_u8(rp, gray_alpha);
21325bec5421Sopenharmony_ci+      }
21335bec5421Sopenharmony_ci+#endif
21345bec5421Sopenharmony_ci+      for (; rp < rp_stop; rp += 4)
21355bec5421Sopenharmony_ci       {
21365bec5421Sopenharmony_ci          *rp = (png_byte)(~(*rp));
21375bec5421Sopenharmony_ci          *(rp + 1) = (png_byte)(~(*(rp + 1)));
21385bec5421Sopenharmony_ci-         rp += 4;
21395bec5421Sopenharmony_ci       }
21405bec5421Sopenharmony_ci    }
21415bec5421Sopenharmony_ci #endif
21425bec5421Sopenharmony_ci@@ -323,10 +355,19 @@ png_do_swap(png_row_infop row_info, png_bytep row)
21435bec5421Sopenharmony_ci    if (row_info->bit_depth == 16)
21445bec5421Sopenharmony_ci    {
21455bec5421Sopenharmony_ci       png_bytep rp = row;
21465bec5421Sopenharmony_ci-      png_uint_32 i;
21475bec5421Sopenharmony_ci-      png_uint_32 istop= row_info->width * row_info->channels;
21485bec5421Sopenharmony_ci-
21495bec5421Sopenharmony_ci-      for (i = 0; i < istop; i++, rp += 2)
21505bec5421Sopenharmony_ci+      png_bytep rp_stop = row + row_info->rowbytes;
21515bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
21525bec5421Sopenharmony_ci+      png_bytep rp_stop_neon = rp_stop - STEP_GA;
21535bec5421Sopenharmony_ci+      for (; rp < rp_stop_neon; rp += STEP_GA)
21545bec5421Sopenharmony_ci+      {
21555bec5421Sopenharmony_ci+         uint8x16x2_t gray = vld2q_u8(rp);
21565bec5421Sopenharmony_ci+         uint8x16_t tmp = gray.val[0];
21575bec5421Sopenharmony_ci+         gray.val[0] = gray.val[1];
21585bec5421Sopenharmony_ci+         gray.val[1] = tmp;
21595bec5421Sopenharmony_ci+         vst2q_u8(rp, gray);
21605bec5421Sopenharmony_ci+      }
21615bec5421Sopenharmony_ci+#endif
21625bec5421Sopenharmony_ci+      for (; rp < rp_stop; rp += 2)
21635bec5421Sopenharmony_ci       {
21645bec5421Sopenharmony_ci #ifdef PNG_BUILTIN_BSWAP16_SUPPORTED
21655bec5421Sopenharmony_ci          /* Feature added to libpng-1.6.11 for testing purposes, not
21665bec5421Sopenharmony_ci@@ -622,15 +663,24 @@ png_do_bgr(png_row_infop row_info, png_bytep row)
21675bec5421Sopenharmony_ci 
21685bec5421Sopenharmony_ci    if ((row_info->color_type & PNG_COLOR_MASK_COLOR) != 0)
21695bec5421Sopenharmony_ci    {
21705bec5421Sopenharmony_ci-      png_uint_32 row_width = row_info->width;
21715bec5421Sopenharmony_ci       if (row_info->bit_depth == 8)
21725bec5421Sopenharmony_ci       {
21735bec5421Sopenharmony_ci          if (row_info->color_type == PNG_COLOR_TYPE_RGB)
21745bec5421Sopenharmony_ci          {
21755bec5421Sopenharmony_ci-            png_bytep rp;
21765bec5421Sopenharmony_ci-            png_uint_32 i;
21775bec5421Sopenharmony_ci-
21785bec5421Sopenharmony_ci-            for (i = 0, rp = row; i < row_width; i++, rp += 3)
21795bec5421Sopenharmony_ci+            png_bytep rp = row;
21805bec5421Sopenharmony_ci+            png_bytep rp_stop = row + row_info->rowbytes;
21815bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
21825bec5421Sopenharmony_ci+            png_bytep rp_stop_neon = rp_stop - STEP_RGB;
21835bec5421Sopenharmony_ci+            for (; rp < rp_stop_neon; rp += STEP_RGB)
21845bec5421Sopenharmony_ci+            {
21855bec5421Sopenharmony_ci+               uint8x16x3_t bgr = vld3q_u8(rp);
21865bec5421Sopenharmony_ci+               uint8x16_t tmp = bgr.val[INDEX2];
21875bec5421Sopenharmony_ci+               bgr.val[INDEX2] = bgr.val[0];
21885bec5421Sopenharmony_ci+               bgr.val[0] = tmp;
21895bec5421Sopenharmony_ci+               vst3q_u8(rp, bgr);
21905bec5421Sopenharmony_ci+            }
21915bec5421Sopenharmony_ci+#endif
21925bec5421Sopenharmony_ci+            for (; rp < rp_stop; rp += 3)
21935bec5421Sopenharmony_ci             {
21945bec5421Sopenharmony_ci                png_byte save = *rp;
21955bec5421Sopenharmony_ci                *rp = *(rp + 2);
21965bec5421Sopenharmony_ci@@ -640,10 +690,20 @@ png_do_bgr(png_row_infop row_info, png_bytep row)
21975bec5421Sopenharmony_ci 
21985bec5421Sopenharmony_ci          else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
21995bec5421Sopenharmony_ci          {
22005bec5421Sopenharmony_ci-            png_bytep rp;
22015bec5421Sopenharmony_ci-            png_uint_32 i;
22025bec5421Sopenharmony_ci-
22035bec5421Sopenharmony_ci-            for (i = 0, rp = row; i < row_width; i++, rp += 4)
22045bec5421Sopenharmony_ci+            png_bytep rp = row;
22055bec5421Sopenharmony_ci+            png_bytep rp_stop = row + row_info->rowbytes;
22065bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
22075bec5421Sopenharmony_ci+            png_bytep rp_stop_neon = rp_stop - STEP_RGBA;
22085bec5421Sopenharmony_ci+            for (; rp < rp_stop_neon; rp += STEP_RGBA)
22095bec5421Sopenharmony_ci+            {
22105bec5421Sopenharmony_ci+               uint8x16x4_t bgra = vld4q_u8(rp);
22115bec5421Sopenharmony_ci+               uint8x16_t tmp = bgra.val[INDEX2];
22125bec5421Sopenharmony_ci+               bgra.val[INDEX2] = bgra.val[0];
22135bec5421Sopenharmony_ci+               bgra.val[0] = tmp;
22145bec5421Sopenharmony_ci+               vst4q_u8(rp, bgra);
22155bec5421Sopenharmony_ci+            }
22165bec5421Sopenharmony_ci+#endif
22175bec5421Sopenharmony_ci+            for (; rp < rp_stop; rp += 4)
22185bec5421Sopenharmony_ci             {
22195bec5421Sopenharmony_ci                png_byte save = *rp;
22205bec5421Sopenharmony_ci                *rp = *(rp + 2);
22215bec5421Sopenharmony_ci@@ -657,10 +717,20 @@ png_do_bgr(png_row_infop row_info, png_bytep row)
22225bec5421Sopenharmony_ci       {
22235bec5421Sopenharmony_ci          if (row_info->color_type == PNG_COLOR_TYPE_RGB)
22245bec5421Sopenharmony_ci          {
22255bec5421Sopenharmony_ci-            png_bytep rp;
22265bec5421Sopenharmony_ci-            png_uint_32 i;
22275bec5421Sopenharmony_ci-
22285bec5421Sopenharmony_ci-            for (i = 0, rp = row; i < row_width; i++, rp += 6)
22295bec5421Sopenharmony_ci+            png_bytep rp = row;
22305bec5421Sopenharmony_ci+            png_bytep rp_stop = row + row_info->rowbytes;
22315bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
22325bec5421Sopenharmony_ci+            png_bytep rp_stop_neon = rp_stop - STEP_RGB;
22335bec5421Sopenharmony_ci+            for (; rp < rp_stop_neon; rp += STEP_RGB)
22345bec5421Sopenharmony_ci+            {
22355bec5421Sopenharmony_ci+               uint16x8x3_t bgr = vld3q_u16((unsigned short *)rp);
22365bec5421Sopenharmony_ci+               uint16x8_t tmp = bgr.val[INDEX2];
22375bec5421Sopenharmony_ci+               bgr.val[INDEX2] = bgr.val[0];
22385bec5421Sopenharmony_ci+               bgr.val[0] = tmp;
22395bec5421Sopenharmony_ci+               vst3q_u16((unsigned short *)rp, bgr);
22405bec5421Sopenharmony_ci+            }
22415bec5421Sopenharmony_ci+#endif
22425bec5421Sopenharmony_ci+            for (; rp < rp_stop; rp += 6)
22435bec5421Sopenharmony_ci             {
22445bec5421Sopenharmony_ci                png_byte save = *rp;
22455bec5421Sopenharmony_ci                *rp = *(rp + 4);
22465bec5421Sopenharmony_ci@@ -673,10 +743,20 @@ png_do_bgr(png_row_infop row_info, png_bytep row)
22475bec5421Sopenharmony_ci 
22485bec5421Sopenharmony_ci          else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
22495bec5421Sopenharmony_ci          {
22505bec5421Sopenharmony_ci-            png_bytep rp;
22515bec5421Sopenharmony_ci-            png_uint_32 i;
22525bec5421Sopenharmony_ci-
22535bec5421Sopenharmony_ci-            for (i = 0, rp = row; i < row_width; i++, rp += 8)
22545bec5421Sopenharmony_ci+            png_bytep rp = row;
22555bec5421Sopenharmony_ci+            png_bytep rp_stop = row + row_info->rowbytes;
22565bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE
22575bec5421Sopenharmony_ci+            png_bytep rp_stop_neon = rp_stop - STEP_RGBA;
22585bec5421Sopenharmony_ci+            for (; rp < rp_stop_neon; rp += STEP_RGBA)
22595bec5421Sopenharmony_ci+            {
22605bec5421Sopenharmony_ci+               uint16x8x4_t bgra = vld4q_u16((unsigned short *)rp);
22615bec5421Sopenharmony_ci+               uint16x8_t tmp = bgra.val[INDEX2];
22625bec5421Sopenharmony_ci+               bgra.val[INDEX2] = bgra.val[0];
22635bec5421Sopenharmony_ci+               bgra.val[0] = tmp;
22645bec5421Sopenharmony_ci+               vst4q_u16((unsigned short *)rp, bgra);
22655bec5421Sopenharmony_ci+            }
22665bec5421Sopenharmony_ci+#endif
22675bec5421Sopenharmony_ci+            for (; rp < rp_stop; rp += 8)
22685bec5421Sopenharmony_ci             {
22695bec5421Sopenharmony_ci                png_byte save = *rp;
22705bec5421Sopenharmony_ci                *rp = *(rp + 4);
2271