15bec5421Sopenharmony_cidiff --git a/arm/arm_init.c b/arm/arm_init.c 25bec5421Sopenharmony_ciindex 3a89998ab..05aa2c0d9 100644 35bec5421Sopenharmony_ci--- a/arm/arm_init.c 45bec5421Sopenharmony_ci+++ b/arm/arm_init.c 55bec5421Sopenharmony_ci@@ -113,13 +113,23 @@ png_init_filter_functions_neon(png_structp pp, unsigned int bpp) 65bec5421Sopenharmony_ci * initialization function.) 75bec5421Sopenharmony_ci */ 85bec5421Sopenharmony_ci pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon; 95bec5421Sopenharmony_ci- 105bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 115bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 125bec5421Sopenharmony_ci+ pp->read_filter[PNG_FILTER_VALUE_UP_X2-1] = png_read_filter_row_up_x2_neon; 135bec5421Sopenharmony_ci+#endif 145bec5421Sopenharmony_ci if (bpp == 3) 155bec5421Sopenharmony_ci { 165bec5421Sopenharmony_ci pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon; 175bec5421Sopenharmony_ci pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon; 185bec5421Sopenharmony_ci pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = 195bec5421Sopenharmony_ci png_read_filter_row_paeth3_neon; 205bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 215bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 225bec5421Sopenharmony_ci+ pp->read_filter[PNG_FILTER_VALUE_AVG_X2-1] = 235bec5421Sopenharmony_ci+ png_read_filter_row_avg3_x2_neon; 245bec5421Sopenharmony_ci+ pp->read_filter[PNG_FILTER_VALUE_PAETH_X2-1] = 255bec5421Sopenharmony_ci+ png_read_filter_row_paeth3_x2_neon; 265bec5421Sopenharmony_ci+#endif 275bec5421Sopenharmony_ci } 285bec5421Sopenharmony_ci 295bec5421Sopenharmony_ci else if (bpp == 4) 305bec5421Sopenharmony_ci@@ -128,6 +138,13 @@ png_init_filter_functions_neon(png_structp pp, unsigned int bpp) 315bec5421Sopenharmony_ci pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon; 325bec5421Sopenharmony_ci pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = 335bec5421Sopenharmony_ci png_read_filter_row_paeth4_neon; 345bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 355bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 365bec5421Sopenharmony_ci+ pp->read_filter[PNG_FILTER_VALUE_AVG_X2-1] = 375bec5421Sopenharmony_ci+ png_read_filter_row_avg4_x2_neon; 385bec5421Sopenharmony_ci+ pp->read_filter[PNG_FILTER_VALUE_PAETH_X2-1] = 395bec5421Sopenharmony_ci+ png_read_filter_row_paeth4_x2_neon; 405bec5421Sopenharmony_ci+#endif 415bec5421Sopenharmony_ci } 425bec5421Sopenharmony_ci } 435bec5421Sopenharmony_ci #endif /* PNG_ARM_NEON_OPT > 0 */ 445bec5421Sopenharmony_cidiff --git a/arm/filter_neon_intrinsics.c b/arm/filter_neon_intrinsics.c 455bec5421Sopenharmony_ciindex 4466d48b2..4ff810a19 100644 465bec5421Sopenharmony_ci--- a/arm/filter_neon_intrinsics.c 475bec5421Sopenharmony_ci+++ b/arm/filter_neon_intrinsics.c 485bec5421Sopenharmony_ci@@ -47,6 +47,7 @@ 495bec5421Sopenharmony_ci 505bec5421Sopenharmony_ci #if PNG_ARM_NEON_OPT > 0 515bec5421Sopenharmony_ci 525bec5421Sopenharmony_ci+#ifndef PNG_MULTY_LINE_ENABLE 535bec5421Sopenharmony_ci void 545bec5421Sopenharmony_ci png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row, 555bec5421Sopenharmony_ci png_const_bytep prev_row) 565bec5421Sopenharmony_ci@@ -396,7 +397,1351 @@ png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row, 575bec5421Sopenharmony_ci vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0); 585bec5421Sopenharmony_ci } 595bec5421Sopenharmony_ci } 605bec5421Sopenharmony_ci+#else 615bec5421Sopenharmony_ci+// OH ISSUE: png optimize 625bec5421Sopenharmony_ci+// according to definition: row_info->rowbytes = row_width * row_info->channels, 635bec5421Sopenharmony_ci+// the input rowbytes must be 3 or 4 times the channel size, so: 645bec5421Sopenharmony_ci+// for RGB neon process 12 bytes at once,the tail must be 3,6,9; 655bec5421Sopenharmony_ci+// for RGBA neon process 16 or 8 bytes at once,the tail must be 4; 665bec5421Sopenharmony_ci+// filter operators are internal function, row_info and row ensure non empty outside. 675bec5421Sopenharmony_ci+#define STEP_RGB (12) // 3 channel RGB process 12 bytes at once 685bec5421Sopenharmony_ci+#define TAIL_RGB3 (9) // tail 3 pixels have 9 bytes 695bec5421Sopenharmony_ci+#define TAIL_RGB2 (6) // tail 2 pixels have 6 bytes 705bec5421Sopenharmony_ci+#define TAIL_RGB1 (3) // tail 1 pixel have 3 bytes 715bec5421Sopenharmony_ci+#define STEP_RGBA (16) // GBA neon process 16 bytes at once 725bec5421Sopenharmony_ci+#define STEP_RGBA_HALF (8) // GBA neon process 8 bytes at once 735bec5421Sopenharmony_ci+#define TAIL_RGBA (4) // tail 1 pixel have 4 bytes 745bec5421Sopenharmony_ci+#define IND3 (3) // index 3 755bec5421Sopenharmony_ci+#define IND2 (2) // index 2 765bec5421Sopenharmony_ci+#define OFFSET3 (3) // RGB offset 3 bytes 775bec5421Sopenharmony_ci+#define OFFSET6 (6) // RGB offset 6 bytes 785bec5421Sopenharmony_ci+void png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row, 795bec5421Sopenharmony_ci+ png_const_bytep prev_row) 805bec5421Sopenharmony_ci+{ 815bec5421Sopenharmony_ci+ png_bytep rp = row; 825bec5421Sopenharmony_ci+ png_const_bytep pp = prev_row; 835bec5421Sopenharmony_ci+ int count = row_info->rowbytes; 845bec5421Sopenharmony_ci+ 855bec5421Sopenharmony_ci+ png_debug(1, "in png_read_filter_row_up_neon"); 865bec5421Sopenharmony_ci+ 875bec5421Sopenharmony_ci+ uint8x16_t qrp, qpp; 885bec5421Sopenharmony_ci+ while (count >= STEP_RGBA) { 895bec5421Sopenharmony_ci+ qrp = vld1q_u8(rp); 905bec5421Sopenharmony_ci+ qpp = vld1q_u8(pp); 915bec5421Sopenharmony_ci+ qrp = vaddq_u8(qrp, qpp); 925bec5421Sopenharmony_ci+ vst1q_u8(rp, qrp); 935bec5421Sopenharmony_ci+ rp += STEP_RGBA; 945bec5421Sopenharmony_ci+ pp += STEP_RGBA; 955bec5421Sopenharmony_ci+ count -= STEP_RGBA; 965bec5421Sopenharmony_ci+ } 975bec5421Sopenharmony_ci+ 985bec5421Sopenharmony_ci+ if (count >= STEP_RGBA_HALF) { 995bec5421Sopenharmony_ci+ uint8x8_t qrp1, qpp1; 1005bec5421Sopenharmony_ci+ qrp1 = vld1_u8(rp); 1015bec5421Sopenharmony_ci+ qpp1 = vld1_u8(pp); 1025bec5421Sopenharmony_ci+ qrp1 = vadd_u8(qrp1, qpp1); 1035bec5421Sopenharmony_ci+ vst1_u8(rp, qrp1); 1045bec5421Sopenharmony_ci+ rp += STEP_RGBA_HALF; 1055bec5421Sopenharmony_ci+ pp += STEP_RGBA_HALF; 1065bec5421Sopenharmony_ci+ count -= STEP_RGBA_HALF; 1075bec5421Sopenharmony_ci+ } 1085bec5421Sopenharmony_ci+ 1095bec5421Sopenharmony_ci+ for (int i = 0; i < count; i++) { 1105bec5421Sopenharmony_ci+ *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff); 1115bec5421Sopenharmony_ci+ rp++; 1125bec5421Sopenharmony_ci+ } 1135bec5421Sopenharmony_ci+} 1145bec5421Sopenharmony_ci+ 1155bec5421Sopenharmony_ci+void png_read_filter_row_up_x2_neon(png_row_infop row_info, png_bytep row, 1165bec5421Sopenharmony_ci+ png_const_bytep prev_row) 1175bec5421Sopenharmony_ci+{ 1185bec5421Sopenharmony_ci+ png_bytep rp = row; 1195bec5421Sopenharmony_ci+ png_const_bytep pp = prev_row; 1205bec5421Sopenharmony_ci+ int count = row_info->rowbytes; 1215bec5421Sopenharmony_ci+ png_bytep np = row + row_info->rowbytes + 1; 1225bec5421Sopenharmony_ci+ 1235bec5421Sopenharmony_ci+ png_debug(1, "in png_read_filter_row_up_x2_neon"); 1245bec5421Sopenharmony_ci+ 1255bec5421Sopenharmony_ci+ uint8x16_t qrp, qpp, qnp; 1265bec5421Sopenharmony_ci+ while (count >= STEP_RGBA) { 1275bec5421Sopenharmony_ci+ qrp = vld1q_u8(rp); 1285bec5421Sopenharmony_ci+ qpp = vld1q_u8(pp); 1295bec5421Sopenharmony_ci+ qnp = vld1q_u8(np); 1305bec5421Sopenharmony_ci+ qrp = vaddq_u8(qrp, qpp); 1315bec5421Sopenharmony_ci+ qnp = vaddq_u8(qnp, qrp); 1325bec5421Sopenharmony_ci+ vst1q_u8(rp, qrp); 1335bec5421Sopenharmony_ci+ vst1q_u8(np, qnp); 1345bec5421Sopenharmony_ci+ rp += STEP_RGBA; 1355bec5421Sopenharmony_ci+ pp += STEP_RGBA; 1365bec5421Sopenharmony_ci+ np += STEP_RGBA; 1375bec5421Sopenharmony_ci+ count -= STEP_RGBA; 1385bec5421Sopenharmony_ci+ } 1395bec5421Sopenharmony_ci+ 1405bec5421Sopenharmony_ci+ if (count >= STEP_RGBA_HALF) { 1415bec5421Sopenharmony_ci+ uint8x8_t qrp1, qpp1, qnp1; 1425bec5421Sopenharmony_ci+ qrp1 = vld1_u8(rp); 1435bec5421Sopenharmony_ci+ qpp1 = vld1_u8(pp); 1445bec5421Sopenharmony_ci+ qnp1 = vld1_u8(np); 1455bec5421Sopenharmony_ci+ qrp1 = vadd_u8(qrp1, qpp1); 1465bec5421Sopenharmony_ci+ qnp1 = vadd_u8(qnp1, qrp1); 1475bec5421Sopenharmony_ci+ vst1_u8(rp, qrp1); 1485bec5421Sopenharmony_ci+ vst1_u8(np, qnp1); 1495bec5421Sopenharmony_ci+ rp += STEP_RGBA_HALF; 1505bec5421Sopenharmony_ci+ pp += STEP_RGBA_HALF; 1515bec5421Sopenharmony_ci+ np += STEP_RGBA_HALF; 1525bec5421Sopenharmony_ci+ count -= STEP_RGBA_HALF; 1535bec5421Sopenharmony_ci+ } 1545bec5421Sopenharmony_ci+ 1555bec5421Sopenharmony_ci+ for (int i = 0; i < count; i++) { 1565bec5421Sopenharmony_ci+ *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff); 1575bec5421Sopenharmony_ci+ *np = (png_byte)(((int)(*np) + (int)(*rp++)) & 0xff); 1585bec5421Sopenharmony_ci+ np++; 1595bec5421Sopenharmony_ci+ } 1605bec5421Sopenharmony_ci+} 1615bec5421Sopenharmony_ci+ 1625bec5421Sopenharmony_ci+void png_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row, 1635bec5421Sopenharmony_ci+ png_const_bytep prev_row) 1645bec5421Sopenharmony_ci+{ 1655bec5421Sopenharmony_ci+ png_bytep rp = row; 1665bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 1675bec5421Sopenharmony_ci+ 1685bec5421Sopenharmony_ci+ uint8x16_t vtmp = vld1q_u8(rp); 1695bec5421Sopenharmony_ci+ uint8x8x2_t *vrpt = png_ptr(uint8x8x2_t, &vtmp); 1705bec5421Sopenharmony_ci+ uint8x8x2_t vrp = *vrpt; 1715bec5421Sopenharmony_ci+ 1725bec5421Sopenharmony_ci+ uint8x8x4_t vdest; 1735bec5421Sopenharmony_ci+ vdest.val[IND3] = vdup_n_u8(0); 1745bec5421Sopenharmony_ci+ 1755bec5421Sopenharmony_ci+ uint8x8_t vtmp1, vtmp2; 1765bec5421Sopenharmony_ci+ uint32x2_t *temp_pointer; 1775bec5421Sopenharmony_ci+ 1785bec5421Sopenharmony_ci+ png_debug(1, "in png_read_filter_row_sub3_neon"); 1795bec5421Sopenharmony_ci+ 1805bec5421Sopenharmony_ci+ size_t tail_bytes = row_info->rowbytes % STEP_RGB; 1815bec5421Sopenharmony_ci+ png_byte last_byte = *rp_stop; 1825bec5421Sopenharmony_ci+ png_bytep rp_stop_new = rp_stop - tail_bytes; 1835bec5421Sopenharmony_ci+ for (; rp < rp_stop_new;) 1845bec5421Sopenharmony_ci+ { 1855bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 1865bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[IND3], vrp.val[0]); 1875bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6); 1885bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[0], vtmp1); 1895bec5421Sopenharmony_ci+ 1905bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1); 1915bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[1], vtmp2); 1925bec5421Sopenharmony_ci+ vdest.val[IND3] = vadd_u8(vdest.val[IND2], vtmp1); 1935bec5421Sopenharmony_ci+ 1945bec5421Sopenharmony_ci+ vtmp = vld1q_u8(rp + STEP_RGB); 1955bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x2_t, &vtmp); 1965bec5421Sopenharmony_ci+ vrp = *vrpt; 1975bec5421Sopenharmony_ci+ 1985bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 1995bec5421Sopenharmony_ci+ rp += OFFSET3; 2005bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 2015bec5421Sopenharmony_ci+ rp += OFFSET3; 2025bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0); 2035bec5421Sopenharmony_ci+ rp += OFFSET3; 2045bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND3]), 0); 2055bec5421Sopenharmony_ci+ rp += OFFSET3; 2065bec5421Sopenharmony_ci+ } 2075bec5421Sopenharmony_ci+ 2085bec5421Sopenharmony_ci+ if (tail_bytes == TAIL_RGB1) { 2095bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[IND3], vrp.val[0]); 2105bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 2115bec5421Sopenharmony_ci+ } else if (tail_bytes == TAIL_RGB2) { 2125bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 2135bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[IND3], vrp.val[0]); 2145bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[0], vtmp1); 2155bec5421Sopenharmony_ci+ 2165bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 2175bec5421Sopenharmony_ci+ rp += OFFSET3; 2185bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 2195bec5421Sopenharmony_ci+ } else if (tail_bytes == TAIL_RGB3) { 2205bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 2215bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[IND3], vrp.val[0]); 2225bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6); 2235bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[0], vtmp1); 2245bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[1], vtmp2); 2255bec5421Sopenharmony_ci+ 2265bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 2275bec5421Sopenharmony_ci+ rp += OFFSET3; 2285bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 2295bec5421Sopenharmony_ci+ rp += OFFSET3; 2305bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0); 2315bec5421Sopenharmony_ci+ } 2325bec5421Sopenharmony_ci+ *rp_stop = last_byte; 2335bec5421Sopenharmony_ci+ 2345bec5421Sopenharmony_ci+ PNG_UNUSED(prev_row) 2355bec5421Sopenharmony_ci+} 2365bec5421Sopenharmony_ci+ 2375bec5421Sopenharmony_ci+void png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row, 2385bec5421Sopenharmony_ci+ png_const_bytep prev_row) 2395bec5421Sopenharmony_ci+{ 2405bec5421Sopenharmony_ci+ png_bytep rp = row; 2415bec5421Sopenharmony_ci+ int count = row_info->rowbytes; 2425bec5421Sopenharmony_ci+ 2435bec5421Sopenharmony_ci+ uint8x8x4_t vdest; 2445bec5421Sopenharmony_ci+ vdest.val[IND3] = vdup_n_u8(0); 2455bec5421Sopenharmony_ci+ 2465bec5421Sopenharmony_ci+ png_debug(1, "in png_read_filter_row_sub4_neon"); 2475bec5421Sopenharmony_ci+ 2485bec5421Sopenharmony_ci+ uint32x2x4_t vtmp; 2495bec5421Sopenharmony_ci+ uint8x8x4_t *vrpt; 2505bec5421Sopenharmony_ci+ uint8x8x4_t vrp; 2515bec5421Sopenharmony_ci+ uint32x2x4_t vdest_val; 2525bec5421Sopenharmony_ci+ while (count >= STEP_RGBA) { 2535bec5421Sopenharmony_ci+ uint32x2x4_t *temp_pointer; 2545bec5421Sopenharmony_ci+ vtmp = vld4_u32(png_ptr(uint32_t, rp)); 2555bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x4_t, &vtmp); 2565bec5421Sopenharmony_ci+ vrp = *vrpt; 2575bec5421Sopenharmony_ci+ 2585bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[IND3], vrp.val[0]); 2595bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[0], vrp.val[1]); 2605bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[1], vrp.val[IND2]); 2615bec5421Sopenharmony_ci+ vdest.val[IND3] = vadd_u8(vdest.val[IND2], vrp.val[IND3]); 2625bec5421Sopenharmony_ci+ 2635bec5421Sopenharmony_ci+ vdest_val = png_ldr(uint32x2x4_t, &vdest); 2645bec5421Sopenharmony_ci+ vst4_lane_u32(png_ptr(uint32_t, rp), vdest_val, 0); 2655bec5421Sopenharmony_ci+ 2665bec5421Sopenharmony_ci+ rp += STEP_RGBA; 2675bec5421Sopenharmony_ci+ count -= STEP_RGBA; 2685bec5421Sopenharmony_ci+ } 2695bec5421Sopenharmony_ci+ 2705bec5421Sopenharmony_ci+ if (count >= STEP_RGBA_HALF) { 2715bec5421Sopenharmony_ci+ uint32x2x2_t vtmp1 = vld2_u32(png_ptr(uint32_t, rp)); 2725bec5421Sopenharmony_ci+ uint8x8x2_t *vrpt1 = png_ptr(uint8x8x2_t, &vtmp1); 2735bec5421Sopenharmony_ci+ uint8x8x2_t vrp1 = *vrpt1; 2745bec5421Sopenharmony_ci+ uint32x2x2_t *temp_pointer; 2755bec5421Sopenharmony_ci+ uint32x2x2_t vdest_val1; 2765bec5421Sopenharmony_ci+ 2775bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[IND3], vrp1.val[0]); 2785bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[0], vrp1.val[1]); 2795bec5421Sopenharmony_ci+ vdest.val[IND3] = vdest.val[1]; 2805bec5421Sopenharmony_ci+ vdest_val1 = png_ldr(uint32x2x2_t, &vdest); 2815bec5421Sopenharmony_ci+ vst2_lane_u32(png_ptr(uint32_t, rp), vdest_val1, 0); 2825bec5421Sopenharmony_ci+ 2835bec5421Sopenharmony_ci+ rp += STEP_RGBA_HALF; 2845bec5421Sopenharmony_ci+ count -= STEP_RGBA_HALF; 2855bec5421Sopenharmony_ci+ } 2865bec5421Sopenharmony_ci+ 2875bec5421Sopenharmony_ci+ if (count == 0) { 2885bec5421Sopenharmony_ci+ return; 2895bec5421Sopenharmony_ci+ } 2905bec5421Sopenharmony_ci+ 2915bec5421Sopenharmony_ci+ uint32x2_t vtmp2 = vld1_u32(png_ptr(uint32_t, rp)); 2925bec5421Sopenharmony_ci+ uint8x8_t *vrpt2 = png_ptr(uint8x8_t, &vtmp2); 2935bec5421Sopenharmony_ci+ uint8x8_t vrp2 = *vrpt2; 2945bec5421Sopenharmony_ci+ uint32x2_t *temp_pointer; 2955bec5421Sopenharmony_ci+ uint32x2_t vdest_val2; 2965bec5421Sopenharmony_ci+ 2975bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[IND3], vrp2); 2985bec5421Sopenharmony_ci+ vdest_val2 = png_ldr(uint32x2_t, &vdest); 2995bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), vdest_val2, 0); 3005bec5421Sopenharmony_ci+ 3015bec5421Sopenharmony_ci+ PNG_UNUSED(prev_row) 3025bec5421Sopenharmony_ci+} 3035bec5421Sopenharmony_ci+ 3045bec5421Sopenharmony_ci+void png_read_filter_row_avg3_neon(png_row_infop row_info, png_bytep row, 3055bec5421Sopenharmony_ci+ png_const_bytep prev_row) 3065bec5421Sopenharmony_ci+{ 3075bec5421Sopenharmony_ci+ png_bytep rp = row; 3085bec5421Sopenharmony_ci+ png_const_bytep pp = prev_row; 3095bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 3105bec5421Sopenharmony_ci+ 3115bec5421Sopenharmony_ci+ uint8x16_t vtmp; 3125bec5421Sopenharmony_ci+ uint8x8x2_t *vrpt; 3135bec5421Sopenharmony_ci+ uint8x8x2_t vrp; 3145bec5421Sopenharmony_ci+ uint8x8x4_t vdest; 3155bec5421Sopenharmony_ci+ vdest.val[IND3] = vdup_n_u8(0); 3165bec5421Sopenharmony_ci+ 3175bec5421Sopenharmony_ci+ vtmp = vld1q_u8(rp); 3185bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x2_t, &vtmp); 3195bec5421Sopenharmony_ci+ vrp = *vrpt; 3205bec5421Sopenharmony_ci+ 3215bec5421Sopenharmony_ci+ png_debug(1, "in png_read_filter_row_avg3_neon"); 3225bec5421Sopenharmony_ci+ 3235bec5421Sopenharmony_ci+ uint8x8_t vtmp1, vtmp2, vtmp3; 3245bec5421Sopenharmony_ci+ uint8x8x2_t *vppt; 3255bec5421Sopenharmony_ci+ uint8x8x2_t vpp; 3265bec5421Sopenharmony_ci+ uint32x2_t *temp_pointer; 3275bec5421Sopenharmony_ci+ 3285bec5421Sopenharmony_ci+ size_t tail_bytes = row_info->rowbytes % STEP_RGB; 3295bec5421Sopenharmony_ci+ png_byte last_byte = *rp_stop; 3305bec5421Sopenharmony_ci+ png_bytep rp_stop_new = rp_stop - tail_bytes; 3315bec5421Sopenharmony_ci+ for (; rp < rp_stop_new; pp += STEP_RGB) 3325bec5421Sopenharmony_ci+ { 3335bec5421Sopenharmony_ci+ vtmp = vld1q_u8(pp); 3345bec5421Sopenharmony_ci+ vppt = png_ptr(uint8x8x2_t, &vtmp); 3355bec5421Sopenharmony_ci+ vpp = *vppt; 3365bec5421Sopenharmony_ci+ 3375bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 3385bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]); 3395bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 3405bec5421Sopenharmony_ci+ 3415bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3); 3425bec5421Sopenharmony_ci+ vtmp3 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6); 3435bec5421Sopenharmony_ci+ vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2); 3445bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); 3455bec5421Sopenharmony_ci+ 3465bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6); 3475bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1); 3485bec5421Sopenharmony_ci+ 3495bec5421Sopenharmony_ci+ vtmp = vld1q_u8(rp + STEP_RGB); 3505bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x2_t, &vtmp); 3515bec5421Sopenharmony_ci+ vrp = *vrpt; 3525bec5421Sopenharmony_ci+ 3535bec5421Sopenharmony_ci+ vdest.val[IND2] = vhadd_u8(vdest.val[1], vtmp2); 3545bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp3); 3555bec5421Sopenharmony_ci+ 3565bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1); 3575bec5421Sopenharmony_ci+ 3585bec5421Sopenharmony_ci+ vdest.val[IND3] = vhadd_u8(vdest.val[IND2], vtmp2); 3595bec5421Sopenharmony_ci+ vdest.val[IND3] = vadd_u8(vdest.val[IND3], vtmp1); 3605bec5421Sopenharmony_ci+ 3615bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 3625bec5421Sopenharmony_ci+ rp += OFFSET3; 3635bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 3645bec5421Sopenharmony_ci+ rp += OFFSET3; 3655bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0); 3665bec5421Sopenharmony_ci+ rp += OFFSET3; 3675bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND3]), 0); 3685bec5421Sopenharmony_ci+ rp += OFFSET3; 3695bec5421Sopenharmony_ci+ } 3705bec5421Sopenharmony_ci+ 3715bec5421Sopenharmony_ci+ vtmp = vld1q_u8(pp); 3725bec5421Sopenharmony_ci+ vppt = png_ptr(uint8x8x2_t, &vtmp); 3735bec5421Sopenharmony_ci+ vpp = *vppt; 3745bec5421Sopenharmony_ci+ 3755bec5421Sopenharmony_ci+ if (tail_bytes == TAIL_RGB1) { 3765bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]); 3775bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 3785bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 3795bec5421Sopenharmony_ci+ } else if (tail_bytes == TAIL_RGB2) { 3805bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 3815bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]); 3825bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 3835bec5421Sopenharmony_ci+ 3845bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3); 3855bec5421Sopenharmony_ci+ vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2); 3865bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); 3875bec5421Sopenharmony_ci+ 3885bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 3895bec5421Sopenharmony_ci+ rp += OFFSET3; 3905bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 3915bec5421Sopenharmony_ci+ } else if (tail_bytes == TAIL_RGB3) { 3925bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 3935bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]); 3945bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 3955bec5421Sopenharmony_ci+ 3965bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3); 3975bec5421Sopenharmony_ci+ vtmp3 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6); 3985bec5421Sopenharmony_ci+ vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2); 3995bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); 4005bec5421Sopenharmony_ci+ 4015bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6); 4025bec5421Sopenharmony_ci+ 4035bec5421Sopenharmony_ci+ vdest.val[IND2] = vhadd_u8(vdest.val[1], vtmp2); 4045bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp3); 4055bec5421Sopenharmony_ci+ 4065bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 4075bec5421Sopenharmony_ci+ rp += OFFSET3; 4085bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 4095bec5421Sopenharmony_ci+ rp += OFFSET3; 4105bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0); 4115bec5421Sopenharmony_ci+ } 4125bec5421Sopenharmony_ci+ *rp_stop = last_byte; 4135bec5421Sopenharmony_ci+} 4145bec5421Sopenharmony_ci+ 4155bec5421Sopenharmony_ci+void png_read_filter_row_avg3_x2_neon(png_row_infop row_info, png_bytep row, 4165bec5421Sopenharmony_ci+ png_const_bytep prev_row) 4175bec5421Sopenharmony_ci+{ 4185bec5421Sopenharmony_ci+ png_bytep rp = row; 4195bec5421Sopenharmony_ci+ png_const_bytep pp = prev_row; 4205bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 4215bec5421Sopenharmony_ci+ png_bytep np = rp_stop + 1; 4225bec5421Sopenharmony_ci+ 4235bec5421Sopenharmony_ci+ uint8x16_t vtmp; 4245bec5421Sopenharmony_ci+ uint8x8x2_t *vrpt; 4255bec5421Sopenharmony_ci+ uint8x8x2_t vrp; 4265bec5421Sopenharmony_ci+ uint8x8x4_t vdest; 4275bec5421Sopenharmony_ci+ vdest.val[IND3] = vdup_n_u8(0); 4285bec5421Sopenharmony_ci+ 4295bec5421Sopenharmony_ci+ vtmp = vld1q_u8(rp); 4305bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x2_t, &vtmp); 4315bec5421Sopenharmony_ci+ vrp = *vrpt; 4325bec5421Sopenharmony_ci+ 4335bec5421Sopenharmony_ci+ uint8x8x2_t *vnpt; 4345bec5421Sopenharmony_ci+ uint8x8x2_t vnp; 4355bec5421Sopenharmony_ci+ uint8x8x4_t vdestN; 4365bec5421Sopenharmony_ci+ vdestN.val[IND3] = vdup_n_u8(0); 4375bec5421Sopenharmony_ci+ 4385bec5421Sopenharmony_ci+ vtmp = vld1q_u8(np); 4395bec5421Sopenharmony_ci+ vnpt = png_ptr(uint8x8x2_t, &vtmp); 4405bec5421Sopenharmony_ci+ vnp = *vnpt; 4415bec5421Sopenharmony_ci+ 4425bec5421Sopenharmony_ci+ png_debug(1, "in png_read_filter_row_x2_avg3_neon"); 4435bec5421Sopenharmony_ci+ 4445bec5421Sopenharmony_ci+ uint8x8_t vtmp1, vtmp2, vtmp3; 4455bec5421Sopenharmony_ci+ uint8x8x2_t *vppt; 4465bec5421Sopenharmony_ci+ uint8x8x2_t vpp; 4475bec5421Sopenharmony_ci+ uint32x2_t *temp_pointer; 4485bec5421Sopenharmony_ci+ 4495bec5421Sopenharmony_ci+ size_t tail_bytes = row_info->rowbytes % STEP_RGB; 4505bec5421Sopenharmony_ci+ png_byte last_byte = *rp_stop; 4515bec5421Sopenharmony_ci+ png_byte last_byte_next = *(rp_stop + row_info->rowbytes + 1); 4525bec5421Sopenharmony_ci+ png_bytep rp_stop_new = rp_stop - tail_bytes; 4535bec5421Sopenharmony_ci+ for (; rp < rp_stop_new; pp += STEP_RGB) 4545bec5421Sopenharmony_ci+ { 4555bec5421Sopenharmony_ci+ vtmp = vld1q_u8(pp); 4565bec5421Sopenharmony_ci+ vppt = png_ptr(uint8x8x2_t, &vtmp); 4575bec5421Sopenharmony_ci+ vpp = *vppt; 4585bec5421Sopenharmony_ci+ 4595bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 4605bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]); 4615bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 4625bec5421Sopenharmony_ci+ 4635bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3); 4645bec5421Sopenharmony_ci+ vtmp3 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6); 4655bec5421Sopenharmony_ci+ vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2); 4665bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); 4675bec5421Sopenharmony_ci+ 4685bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6); 4695bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1); 4705bec5421Sopenharmony_ci+ 4715bec5421Sopenharmony_ci+ vtmp = vld1q_u8(rp + STEP_RGB); 4725bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x2_t, &vtmp); 4735bec5421Sopenharmony_ci+ vrp = *vrpt; 4745bec5421Sopenharmony_ci+ 4755bec5421Sopenharmony_ci+ vdest.val[IND2] = vhadd_u8(vdest.val[1], vtmp2); 4765bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp3); 4775bec5421Sopenharmony_ci+ 4785bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1); 4795bec5421Sopenharmony_ci+ 4805bec5421Sopenharmony_ci+ vdest.val[IND3] = vhadd_u8(vdest.val[IND2], vtmp2); 4815bec5421Sopenharmony_ci+ vdest.val[IND3] = vadd_u8(vdest.val[IND3], vtmp1); 4825bec5421Sopenharmony_ci+ 4835bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 4845bec5421Sopenharmony_ci+ rp += OFFSET3; 4855bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 4865bec5421Sopenharmony_ci+ rp += OFFSET3; 4875bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0); 4885bec5421Sopenharmony_ci+ rp += OFFSET3; 4895bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND3]), 0); 4905bec5421Sopenharmony_ci+ rp += OFFSET3; 4915bec5421Sopenharmony_ci+ 4925bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET3); 4935bec5421Sopenharmony_ci+ vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]); 4945bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]); 4955bec5421Sopenharmony_ci+ 4965bec5421Sopenharmony_ci+ vtmp3 = vext_u8(vnp.val[0], vnp.val[1], OFFSET6); 4975bec5421Sopenharmony_ci+ vdestN.val[1] = vhadd_u8(vdestN.val[0], vdest.val[1]); 4985bec5421Sopenharmony_ci+ vdestN.val[1] = vadd_u8(vdestN.val[1], vtmp1); 4995bec5421Sopenharmony_ci+ 5005bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vnp.val[1], vnp.val[1], 1); 5015bec5421Sopenharmony_ci+ 5025bec5421Sopenharmony_ci+ vtmp = vld1q_u8(np + STEP_RGB); 5035bec5421Sopenharmony_ci+ vnpt = png_ptr(uint8x8x2_t, &vtmp); 5045bec5421Sopenharmony_ci+ vnp = *vnpt; 5055bec5421Sopenharmony_ci+ 5065bec5421Sopenharmony_ci+ vdestN.val[IND2] = vhadd_u8(vdestN.val[1], vdest.val[IND2]); 5075bec5421Sopenharmony_ci+ vdestN.val[IND2] = vadd_u8(vdestN.val[IND2], vtmp3); 5085bec5421Sopenharmony_ci+ 5095bec5421Sopenharmony_ci+ vdestN.val[IND3] = vhadd_u8(vdestN.val[IND2], vdest.val[IND3]); 5105bec5421Sopenharmony_ci+ vdestN.val[IND3] = vadd_u8(vdestN.val[IND3], vtmp1); 5115bec5421Sopenharmony_ci+ 5125bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0); 5135bec5421Sopenharmony_ci+ np += OFFSET3; 5145bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[1]), 0); 5155bec5421Sopenharmony_ci+ np += OFFSET3; 5165bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[IND2]), 0); 5175bec5421Sopenharmony_ci+ np += OFFSET3; 5185bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[IND3]), 0); 5195bec5421Sopenharmony_ci+ np += OFFSET3; 5205bec5421Sopenharmony_ci+ } 5215bec5421Sopenharmony_ci+ 5225bec5421Sopenharmony_ci+ vtmp = vld1q_u8(pp); 5235bec5421Sopenharmony_ci+ vppt = png_ptr(uint8x8x2_t, &vtmp); 5245bec5421Sopenharmony_ci+ vpp = *vppt; 5255bec5421Sopenharmony_ci+ 5265bec5421Sopenharmony_ci+ if (tail_bytes == TAIL_RGB1) { 5275bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]); 5285bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 5295bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 5305bec5421Sopenharmony_ci+ 5315bec5421Sopenharmony_ci+ vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]); 5325bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]); 5335bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0); 5345bec5421Sopenharmony_ci+ } else if (tail_bytes == TAIL_RGB2) { 5355bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 5365bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]); 5375bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 5385bec5421Sopenharmony_ci+ 5395bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3); 5405bec5421Sopenharmony_ci+ vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2); 5415bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); 5425bec5421Sopenharmony_ci+ 5435bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 5445bec5421Sopenharmony_ci+ rp += OFFSET3; 5455bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 5465bec5421Sopenharmony_ci+ 5475bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET3); 5485bec5421Sopenharmony_ci+ vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]); 5495bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]); 5505bec5421Sopenharmony_ci+ 5515bec5421Sopenharmony_ci+ vdestN.val[1] = vhadd_u8(vdestN.val[0], vdest.val[1]); 5525bec5421Sopenharmony_ci+ vdestN.val[1] = vadd_u8(vdestN.val[1], vtmp1); 5535bec5421Sopenharmony_ci+ 5545bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0); 5555bec5421Sopenharmony_ci+ np += OFFSET3; 5565bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[1]), 0); 5575bec5421Sopenharmony_ci+ } else if (tail_bytes == TAIL_RGB3) { 5585bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 5595bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]); 5605bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 5615bec5421Sopenharmony_ci+ 5625bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3); 5635bec5421Sopenharmony_ci+ vtmp3 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6); 5645bec5421Sopenharmony_ci+ vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2); 5655bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); 5665bec5421Sopenharmony_ci+ 5675bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6); 5685bec5421Sopenharmony_ci+ 5695bec5421Sopenharmony_ci+ vdest.val[IND2] = vhadd_u8(vdest.val[1], vtmp2); 5705bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp3); 5715bec5421Sopenharmony_ci+ 5725bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 5735bec5421Sopenharmony_ci+ rp += OFFSET3; 5745bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 5755bec5421Sopenharmony_ci+ rp += OFFSET3; 5765bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0); 5775bec5421Sopenharmony_ci+ 5785bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET3); 5795bec5421Sopenharmony_ci+ vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]); 5805bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]); 5815bec5421Sopenharmony_ci+ 5825bec5421Sopenharmony_ci+ vtmp3 = vext_u8(vnp.val[0], vnp.val[1], OFFSET6); 5835bec5421Sopenharmony_ci+ vdestN.val[1] = vhadd_u8(vdestN.val[0], vdest.val[1]); 5845bec5421Sopenharmony_ci+ vdestN.val[1] = vadd_u8(vdestN.val[1], vtmp1); 5855bec5421Sopenharmony_ci+ 5865bec5421Sopenharmony_ci+ vdestN.val[IND2] = vhadd_u8(vdestN.val[1], vdest.val[IND2]); 5875bec5421Sopenharmony_ci+ vdestN.val[IND2] = vadd_u8(vdestN.val[IND2], vtmp3); 5885bec5421Sopenharmony_ci+ 5895bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0); 5905bec5421Sopenharmony_ci+ np += OFFSET3; 5915bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[1]), 0); 5925bec5421Sopenharmony_ci+ np += OFFSET3; 5935bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[IND2]), 0); 5945bec5421Sopenharmony_ci+ } 5955bec5421Sopenharmony_ci+ *rp_stop = last_byte; 5965bec5421Sopenharmony_ci+ *(rp_stop + row_info->rowbytes + 1) = last_byte_next; 5975bec5421Sopenharmony_ci+} 5985bec5421Sopenharmony_ci+ 5995bec5421Sopenharmony_ci+void png_read_filter_row_avg4_neon(png_row_infop row_info, png_bytep row, 6005bec5421Sopenharmony_ci+ png_const_bytep prev_row) 6015bec5421Sopenharmony_ci+{ 6025bec5421Sopenharmony_ci+ png_bytep rp = row; 6035bec5421Sopenharmony_ci+ png_const_bytep pp = prev_row; 6045bec5421Sopenharmony_ci+ int count = row_info->rowbytes; 6055bec5421Sopenharmony_ci+ 6065bec5421Sopenharmony_ci+ uint8x8x4_t vdest; 6075bec5421Sopenharmony_ci+ vdest.val[IND3] = vdup_n_u8(0); 6085bec5421Sopenharmony_ci+ 6095bec5421Sopenharmony_ci+ png_debug(1, "in png_read_filter_row_avg4_neon"); 6105bec5421Sopenharmony_ci+ 6115bec5421Sopenharmony_ci+ uint32x2x4_t vtmp; 6125bec5421Sopenharmony_ci+ uint8x8x4_t *vrpt, *vppt; 6135bec5421Sopenharmony_ci+ uint8x8x4_t vrp, vpp; 6145bec5421Sopenharmony_ci+ uint32x2x4_t vdest_val; 6155bec5421Sopenharmony_ci+ while (count >= STEP_RGBA) { 6165bec5421Sopenharmony_ci+ uint32x2x4_t *temp_pointer; 6175bec5421Sopenharmony_ci+ vtmp = vld4_u32(png_ptr(uint32_t, rp)); 6185bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x4_t, &vtmp); 6195bec5421Sopenharmony_ci+ vrp = *vrpt; 6205bec5421Sopenharmony_ci+ vtmp = vld4_u32(png_ptrc(uint32_t, pp)); 6215bec5421Sopenharmony_ci+ vppt = png_ptr(uint8x8x4_t, &vtmp); 6225bec5421Sopenharmony_ci+ vpp = *vppt; 6235bec5421Sopenharmony_ci+ 6245bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]); 6255bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 6265bec5421Sopenharmony_ci+ vdest.val[1] = vhadd_u8(vdest.val[0], vpp.val[1]); 6275bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]); 6285bec5421Sopenharmony_ci+ vdest.val[IND2] = vhadd_u8(vdest.val[1], vpp.val[IND2]); 6295bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[IND2], vrp.val[IND2]); 6305bec5421Sopenharmony_ci+ vdest.val[IND3] = vhadd_u8(vdest.val[IND2], vpp.val[IND3]); 6315bec5421Sopenharmony_ci+ vdest.val[IND3] = vadd_u8(vdest.val[IND3], vrp.val[IND3]); 6325bec5421Sopenharmony_ci+ 6335bec5421Sopenharmony_ci+ vdest_val = png_ldr(uint32x2x4_t, &vdest); 6345bec5421Sopenharmony_ci+ vst4_lane_u32(png_ptr(uint32_t, rp), vdest_val, 0); 6355bec5421Sopenharmony_ci+ 6365bec5421Sopenharmony_ci+ rp += STEP_RGBA; 6375bec5421Sopenharmony_ci+ pp += STEP_RGBA; 6385bec5421Sopenharmony_ci+ count -= STEP_RGBA; 6395bec5421Sopenharmony_ci+ } 6405bec5421Sopenharmony_ci+ 6415bec5421Sopenharmony_ci+ if (count >= STEP_RGBA_HALF) { 6425bec5421Sopenharmony_ci+ uint32x2x2_t vtmp1; 6435bec5421Sopenharmony_ci+ uint8x8x2_t *vrpt1, *vppt1; 6445bec5421Sopenharmony_ci+ uint8x8x2_t vrp1, vpp1; 6455bec5421Sopenharmony_ci+ uint32x2x2_t *temp_pointer; 6465bec5421Sopenharmony_ci+ uint32x2x2_t vdest_val1; 6475bec5421Sopenharmony_ci+ 6485bec5421Sopenharmony_ci+ vtmp1 = vld2_u32(png_ptr(uint32_t, rp)); 6495bec5421Sopenharmony_ci+ vrpt1 = png_ptr(uint8x8x2_t, &vtmp1); 6505bec5421Sopenharmony_ci+ vrp1 = *vrpt1; 6515bec5421Sopenharmony_ci+ vtmp1 = vld2_u32(png_ptrc(uint32_t, pp)); 6525bec5421Sopenharmony_ci+ vppt1 = png_ptr(uint8x8x2_t, &vtmp1); 6535bec5421Sopenharmony_ci+ vpp1 = *vppt1; 6545bec5421Sopenharmony_ci+ 6555bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp1.val[0]); 6565bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp1.val[0]); 6575bec5421Sopenharmony_ci+ vdest.val[1] = vhadd_u8(vdest.val[0], vpp1.val[1]); 6585bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vrp1.val[1]); 6595bec5421Sopenharmony_ci+ vdest.val[IND3] = vdest.val[1]; 6605bec5421Sopenharmony_ci+ vdest_val1 = png_ldr(uint32x2x2_t, &vdest); 6615bec5421Sopenharmony_ci+ vst2_lane_u32(png_ptr(uint32_t, rp), vdest_val1, 0); 6625bec5421Sopenharmony_ci+ 6635bec5421Sopenharmony_ci+ rp += STEP_RGBA_HALF; 6645bec5421Sopenharmony_ci+ pp += STEP_RGBA_HALF; 6655bec5421Sopenharmony_ci+ count -= STEP_RGBA_HALF; 6665bec5421Sopenharmony_ci+ } 6675bec5421Sopenharmony_ci+ 6685bec5421Sopenharmony_ci+ if (count == 0) { 6695bec5421Sopenharmony_ci+ return; 6705bec5421Sopenharmony_ci+ } 6715bec5421Sopenharmony_ci+ 6725bec5421Sopenharmony_ci+ uint32x2_t vtmp2; 6735bec5421Sopenharmony_ci+ uint8x8_t *vrpt2, *vppt2; 6745bec5421Sopenharmony_ci+ uint8x8_t vrp2, vpp2; 6755bec5421Sopenharmony_ci+ uint32x2_t *temp_pointer; 6765bec5421Sopenharmony_ci+ uint32x2_t vdest_val2; 6775bec5421Sopenharmony_ci+ 6785bec5421Sopenharmony_ci+ vtmp2 = vld1_u32(png_ptr(uint32_t, rp)); 6795bec5421Sopenharmony_ci+ vrpt2 = png_ptr(uint8x8_t, &vtmp2); 6805bec5421Sopenharmony_ci+ vrp2 = *vrpt2; 6815bec5421Sopenharmony_ci+ vtmp2 = vld1_u32(png_ptrc(uint32_t, pp)); 6825bec5421Sopenharmony_ci+ vppt2 = png_ptr(uint8x8_t, &vtmp2); 6835bec5421Sopenharmony_ci+ vpp2 = *vppt2; 6845bec5421Sopenharmony_ci+ 6855bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp2); 6865bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp2); 6875bec5421Sopenharmony_ci 6885bec5421Sopenharmony_ci+ vdest_val2 = png_ldr(uint32x2_t, &vdest); 6895bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), vdest_val2, 0); 6905bec5421Sopenharmony_ci+} 6915bec5421Sopenharmony_ci+ 6925bec5421Sopenharmony_ci+void png_read_filter_row_avg4_x2_neon(png_row_infop row_info, png_bytep row, 6935bec5421Sopenharmony_ci+ png_const_bytep prev_row) 6945bec5421Sopenharmony_ci+{ 6955bec5421Sopenharmony_ci+ png_bytep rp = row; 6965bec5421Sopenharmony_ci+ png_const_bytep pp = prev_row; 6975bec5421Sopenharmony_ci+ int count = row_info->rowbytes; 6985bec5421Sopenharmony_ci+ png_bytep np = row + count + 1; 6995bec5421Sopenharmony_ci+ 7005bec5421Sopenharmony_ci+ uint8x8x4_t vdest; 7015bec5421Sopenharmony_ci+ vdest.val[IND3] = vdup_n_u8(0); 7025bec5421Sopenharmony_ci+ 7035bec5421Sopenharmony_ci+ png_debug(1, "in png_read_filter_row_avg4_x2_neon"); 7045bec5421Sopenharmony_ci+ 7055bec5421Sopenharmony_ci+ uint32x2x4_t vtmp; 7065bec5421Sopenharmony_ci+ uint8x8x4_t *vrpt, *vppt; 7075bec5421Sopenharmony_ci+ uint8x8x4_t vrp, vpp; 7085bec5421Sopenharmony_ci+ uint32x2x4_t vdest_val; 7095bec5421Sopenharmony_ci+ 7105bec5421Sopenharmony_ci+ uint8x8x4_t *vnpt; 7115bec5421Sopenharmony_ci+ uint8x8x4_t vnp; 7125bec5421Sopenharmony_ci+ uint8x8x4_t vdestN; 7135bec5421Sopenharmony_ci+ vdestN.val[IND3] = vdup_n_u8(0); 7145bec5421Sopenharmony_ci+ 7155bec5421Sopenharmony_ci+ while (count >= STEP_RGBA) { 7165bec5421Sopenharmony_ci+ uint32x2x4_t *temp_pointer; 7175bec5421Sopenharmony_ci+ vtmp = vld4_u32(png_ptr(uint32_t, rp)); 7185bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x4_t, &vtmp); 7195bec5421Sopenharmony_ci+ vrp = *vrpt; 7205bec5421Sopenharmony_ci+ vtmp = vld4_u32(png_ptrc(uint32_t, pp)); 7215bec5421Sopenharmony_ci+ vppt = png_ptr(uint8x8x4_t, &vtmp); 7225bec5421Sopenharmony_ci+ vpp = *vppt; 7235bec5421Sopenharmony_ci+ vtmp = vld4_u32(png_ptrc(uint32_t, np)); 7245bec5421Sopenharmony_ci+ vnpt = png_ptr(uint8x8x4_t, &vtmp); 7255bec5421Sopenharmony_ci+ vnp = *vnpt; 7265bec5421Sopenharmony_ci+ 7275bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp.val[0]); 7285bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 7295bec5421Sopenharmony_ci+ vdest.val[1] = vhadd_u8(vdest.val[0], vpp.val[1]); 7305bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]); 7315bec5421Sopenharmony_ci+ vdest.val[IND2] = vhadd_u8(vdest.val[1], vpp.val[IND2]); 7325bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[IND2], vrp.val[IND2]); 7335bec5421Sopenharmony_ci+ vdest.val[IND3] = vhadd_u8(vdest.val[IND2], vpp.val[IND3]); 7345bec5421Sopenharmony_ci+ vdest.val[IND3] = vadd_u8(vdest.val[IND3], vrp.val[IND3]); 7355bec5421Sopenharmony_ci+ 7365bec5421Sopenharmony_ci+ vdest_val = png_ldr(uint32x2x4_t, &vdest); 7375bec5421Sopenharmony_ci+ vst4_lane_u32(png_ptr(uint32_t, rp), vdest_val, 0); 7385bec5421Sopenharmony_ci+ 7395bec5421Sopenharmony_ci+ vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]); 7405bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]); 7415bec5421Sopenharmony_ci+ vdestN.val[1] = vhadd_u8(vdestN.val[0], vdest.val[1]); 7425bec5421Sopenharmony_ci+ vdestN.val[1] = vadd_u8(vdestN.val[1], vnp.val[1]); 7435bec5421Sopenharmony_ci+ vdestN.val[IND2] = vhadd_u8(vdestN.val[1], vdest.val[IND2]); 7445bec5421Sopenharmony_ci+ vdestN.val[IND2] = vadd_u8(vdestN.val[IND2], vnp.val[IND2]); 7455bec5421Sopenharmony_ci+ vdestN.val[IND3] = vhadd_u8(vdestN.val[IND2], vdest.val[IND3]); 7465bec5421Sopenharmony_ci+ vdestN.val[IND3] = vadd_u8(vdestN.val[IND3], vnp.val[IND3]); 7475bec5421Sopenharmony_ci+ 7485bec5421Sopenharmony_ci+ vdest_val = png_ldr(uint32x2x4_t, &vdestN); 7495bec5421Sopenharmony_ci+ vst4_lane_u32(png_ptr(uint32_t, np), vdest_val, 0); 7505bec5421Sopenharmony_ci+ 7515bec5421Sopenharmony_ci+ rp += STEP_RGBA; 7525bec5421Sopenharmony_ci+ pp += STEP_RGBA; 7535bec5421Sopenharmony_ci+ np += STEP_RGBA; 7545bec5421Sopenharmony_ci+ count -= STEP_RGBA; 7555bec5421Sopenharmony_ci+ } 7565bec5421Sopenharmony_ci+ 7575bec5421Sopenharmony_ci+ if (count >= STEP_RGBA_HALF) { 7585bec5421Sopenharmony_ci+ uint32x2x2_t vtmp1; 7595bec5421Sopenharmony_ci+ uint8x8x2_t *vrpt1, *vppt1, *vnpt1; 7605bec5421Sopenharmony_ci+ uint8x8x2_t vrp1, vpp1, vnp1; 7615bec5421Sopenharmony_ci+ uint32x2x2_t *temp_pointer; 7625bec5421Sopenharmony_ci+ uint32x2x2_t vdest_val1; 7635bec5421Sopenharmony_ci+ 7645bec5421Sopenharmony_ci+ vtmp1 = vld2_u32(png_ptr(uint32_t, rp)); 7655bec5421Sopenharmony_ci+ vrpt1 = png_ptr(uint8x8x2_t, &vtmp1); 7665bec5421Sopenharmony_ci+ vrp1 = *vrpt1; 7675bec5421Sopenharmony_ci+ vtmp1 = vld2_u32(png_ptrc(uint32_t, pp)); 7685bec5421Sopenharmony_ci+ vppt1 = png_ptr(uint8x8x2_t, &vtmp1); 7695bec5421Sopenharmony_ci+ vpp1 = *vppt1; 7705bec5421Sopenharmony_ci+ vtmp1 = vld2_u32(png_ptrc(uint32_t, np)); 7715bec5421Sopenharmony_ci+ vnpt1 = png_ptr(uint8x8x2_t, &vtmp1); 7725bec5421Sopenharmony_ci+ vnp1 = *vnpt1; 7735bec5421Sopenharmony_ci+ 7745bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp1.val[0]); 7755bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp1.val[0]); 7765bec5421Sopenharmony_ci+ vdest.val[1] = vhadd_u8(vdest.val[0], vpp1.val[1]); 7775bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vrp1.val[1]); 7785bec5421Sopenharmony_ci+ vdest.val[IND3] = vdest.val[1]; 7795bec5421Sopenharmony_ci+ vdest_val1 = png_ldr(uint32x2x2_t, &vdest); 7805bec5421Sopenharmony_ci+ vst2_lane_u32(png_ptr(uint32_t, rp), vdest_val1, 0); 7815bec5421Sopenharmony_ci+ 7825bec5421Sopenharmony_ci+ vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]); 7835bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp1.val[0]); 7845bec5421Sopenharmony_ci+ vdestN.val[1] = vhadd_u8(vdestN.val[0], vdest.val[1]); 7855bec5421Sopenharmony_ci+ vdestN.val[1] = vadd_u8(vdestN.val[1], vnp1.val[1]); 7865bec5421Sopenharmony_ci+ vdestN.val[IND3] = vdestN.val[1]; 7875bec5421Sopenharmony_ci+ vdest_val1 = png_ldr(uint32x2x2_t, &vdestN); 7885bec5421Sopenharmony_ci+ vst2_lane_u32(png_ptr(uint32_t, np), vdest_val1, 0); 7895bec5421Sopenharmony_ci+ 7905bec5421Sopenharmony_ci+ rp += STEP_RGBA_HALF; 7915bec5421Sopenharmony_ci+ pp += STEP_RGBA_HALF; 7925bec5421Sopenharmony_ci+ np += STEP_RGBA_HALF; 7935bec5421Sopenharmony_ci+ count -= STEP_RGBA_HALF; 7945bec5421Sopenharmony_ci+ } 7955bec5421Sopenharmony_ci+ 7965bec5421Sopenharmony_ci+ if (count == 0) { 7975bec5421Sopenharmony_ci+ return; 7985bec5421Sopenharmony_ci+ } 7995bec5421Sopenharmony_ci+ 8005bec5421Sopenharmony_ci+ uint32x2_t vtmp2; 8015bec5421Sopenharmony_ci+ uint8x8_t *vrpt2, *vppt2, *vnpt2; 8025bec5421Sopenharmony_ci+ uint8x8_t vrp2, vpp2, vnp2; 8035bec5421Sopenharmony_ci+ uint32x2_t *temp_pointer; 8045bec5421Sopenharmony_ci+ uint32x2_t vdest_val2; 8055bec5421Sopenharmony_ci+ 8065bec5421Sopenharmony_ci+ vtmp2 = vld1_u32(png_ptr(uint32_t, rp)); 8075bec5421Sopenharmony_ci+ vrpt2 = png_ptr(uint8x8_t, &vtmp2); 8085bec5421Sopenharmony_ci+ vrp2 = *vrpt2; 8095bec5421Sopenharmony_ci+ vtmp2 = vld1_u32(png_ptrc(uint32_t, pp)); 8105bec5421Sopenharmony_ci+ vppt2 = png_ptr(uint8x8_t, &vtmp2); 8115bec5421Sopenharmony_ci+ vpp2 = *vppt2; 8125bec5421Sopenharmony_ci+ vtmp2 = vld1_u32(png_ptrc(uint32_t, np)); 8135bec5421Sopenharmony_ci+ vnpt2 = png_ptr(uint8x8_t, &vtmp2); 8145bec5421Sopenharmony_ci+ vnp2 = *vnpt2; 8155bec5421Sopenharmony_ci+ 8165bec5421Sopenharmony_ci+ vdest.val[0] = vhadd_u8(vdest.val[IND3], vpp2); 8175bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp2); 8185bec5421Sopenharmony_ci+ 8195bec5421Sopenharmony_ci+ vdest_val2 = png_ldr(uint32x2_t, &vdest); 8205bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), vdest_val2, 0); 8215bec5421Sopenharmony_ci+ 8225bec5421Sopenharmony_ci+ vdestN.val[0] = vhadd_u8(vdestN.val[IND3], vdest.val[0]); 8235bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp2); 8245bec5421Sopenharmony_ci+ 8255bec5421Sopenharmony_ci+ vdest_val2 = png_ldr(uint32x2_t, &vdestN); 8265bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), vdest_val2, 0); 8275bec5421Sopenharmony_ci+} 8285bec5421Sopenharmony_ci+ 8295bec5421Sopenharmony_ci+static uint8x8_t paeth(uint8x8_t a, uint8x8_t b, uint8x8_t c) 8305bec5421Sopenharmony_ci+{ 8315bec5421Sopenharmony_ci+ uint8x8_t d, e; 8325bec5421Sopenharmony_ci+ uint16x8_t p1, pa, pb, pc; 8335bec5421Sopenharmony_ci+ 8345bec5421Sopenharmony_ci+ p1 = vaddl_u8(a, b); /* a + b */ 8355bec5421Sopenharmony_ci+ pc = vaddl_u8(c, c); /* c * 2 */ 8365bec5421Sopenharmony_ci+ pa = vabdl_u8(b, c); /* pa */ 8375bec5421Sopenharmony_ci+ pb = vabdl_u8(a, c); /* pb */ 8385bec5421Sopenharmony_ci+ pc = vabdq_u16(p1, pc); /* pc */ 8395bec5421Sopenharmony_ci+ 8405bec5421Sopenharmony_ci+ p1 = vcleq_u16(pa, pb); /* pa <= pb */ 8415bec5421Sopenharmony_ci+ pa = vcleq_u16(pa, pc); /* pa <= pc */ 8425bec5421Sopenharmony_ci+ pb = vcleq_u16(pb, pc); /* pb <= pc */ 8435bec5421Sopenharmony_ci+ 8445bec5421Sopenharmony_ci+ p1 = vandq_u16(p1, pa); /* pa <= pb && pa <= pc */ 8455bec5421Sopenharmony_ci+ 8465bec5421Sopenharmony_ci+ d = vmovn_u16(pb); 8475bec5421Sopenharmony_ci+ e = vmovn_u16(p1); 8485bec5421Sopenharmony_ci+ 8495bec5421Sopenharmony_ci+ d = vbsl_u8(d, b, c); 8505bec5421Sopenharmony_ci+ e = vbsl_u8(e, a, d); 8515bec5421Sopenharmony_ci+ 8525bec5421Sopenharmony_ci+ return e; 8535bec5421Sopenharmony_ci+} 8545bec5421Sopenharmony_ci+ 8555bec5421Sopenharmony_ci+void png_read_filter_row_paeth3_neon(png_row_infop row_info, png_bytep row, 8565bec5421Sopenharmony_ci+ png_const_bytep prev_row) 8575bec5421Sopenharmony_ci+{ 8585bec5421Sopenharmony_ci+ png_bytep rp = row; 8595bec5421Sopenharmony_ci+ png_const_bytep pp = prev_row; 8605bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 8615bec5421Sopenharmony_ci+ 8625bec5421Sopenharmony_ci+ uint8x16_t vtmp; 8635bec5421Sopenharmony_ci+ uint8x8x2_t *vrpt; 8645bec5421Sopenharmony_ci+ uint8x8x2_t vrp; 8655bec5421Sopenharmony_ci+ uint8x8_t vlast = vdup_n_u8(0); 8665bec5421Sopenharmony_ci+ uint8x8x4_t vdest; 8675bec5421Sopenharmony_ci+ vdest.val[IND3] = vdup_n_u8(0); 8685bec5421Sopenharmony_ci+ 8695bec5421Sopenharmony_ci+ vtmp = vld1q_u8(rp); 8705bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x2_t, &vtmp); 8715bec5421Sopenharmony_ci+ vrp = *vrpt; 8725bec5421Sopenharmony_ci+ 8735bec5421Sopenharmony_ci+ uint8x8x2_t *vppt; 8745bec5421Sopenharmony_ci+ uint8x8x2_t vpp; 8755bec5421Sopenharmony_ci+ uint8x8_t vtmp1, vtmp2, vtmp3; 8765bec5421Sopenharmony_ci+ uint32x2_t *temp_pointer; 8775bec5421Sopenharmony_ci+ 8785bec5421Sopenharmony_ci+ png_debug(1, "in png_read_filter_row_paeth3_neon"); 8795bec5421Sopenharmony_ci+ 8805bec5421Sopenharmony_ci+ size_t tail_bytes = row_info->rowbytes % STEP_RGB; 8815bec5421Sopenharmony_ci+ png_byte last_byte = *rp_stop; 8825bec5421Sopenharmony_ci+ png_bytep rp_stop_new = rp_stop - tail_bytes; 8835bec5421Sopenharmony_ci+ for (; rp < rp_stop_new; pp += STEP_RGB) 8845bec5421Sopenharmony_ci+ { 8855bec5421Sopenharmony_ci+ vtmp = vld1q_u8(pp); 8865bec5421Sopenharmony_ci+ vppt = png_ptr(uint8x8x2_t, &vtmp); 8875bec5421Sopenharmony_ci+ vpp = *vppt; 8885bec5421Sopenharmony_ci+ 8895bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast); 8905bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 8915bec5421Sopenharmony_ci+ 8925bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 8935bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3); 8945bec5421Sopenharmony_ci+ vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]); 8955bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); 8965bec5421Sopenharmony_ci+ 8975bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6); 8985bec5421Sopenharmony_ci+ vtmp3 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6); 8995bec5421Sopenharmony_ci+ vdest.val[IND2] = paeth(vdest.val[1], vtmp3, vtmp2); 9005bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp1); 9015bec5421Sopenharmony_ci+ 9025bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1); 9035bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1); 9045bec5421Sopenharmony_ci+ 9055bec5421Sopenharmony_ci+ vtmp = vld1q_u8(rp + STEP_RGB); 9065bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x2_t, &vtmp); 9075bec5421Sopenharmony_ci+ vrp = *vrpt; 9085bec5421Sopenharmony_ci+ 9095bec5421Sopenharmony_ci+ vdest.val[IND3] = paeth(vdest.val[IND2], vtmp2, vtmp3); 9105bec5421Sopenharmony_ci+ vdest.val[IND3] = vadd_u8(vdest.val[IND3], vtmp1); 9115bec5421Sopenharmony_ci+ 9125bec5421Sopenharmony_ci+ vlast = vtmp2; 9135bec5421Sopenharmony_ci+ 9145bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 9155bec5421Sopenharmony_ci+ rp += OFFSET3; 9165bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 9175bec5421Sopenharmony_ci+ rp += OFFSET3; 9185bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0); 9195bec5421Sopenharmony_ci+ rp += OFFSET3; 9205bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND3]), 0); 9215bec5421Sopenharmony_ci+ rp += OFFSET3; 9225bec5421Sopenharmony_ci+ } 9235bec5421Sopenharmony_ci+ 9245bec5421Sopenharmony_ci+ vtmp = vld1q_u8(pp); 9255bec5421Sopenharmony_ci+ vppt = png_ptr(uint8x8x2_t, &vtmp); 9265bec5421Sopenharmony_ci+ vpp = *vppt; 9275bec5421Sopenharmony_ci+ 9285bec5421Sopenharmony_ci+ if (tail_bytes == TAIL_RGB1) { 9295bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast); 9305bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 9315bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 9325bec5421Sopenharmony_ci+ } else if (tail_bytes == TAIL_RGB2) { 9335bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast); 9345bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 9355bec5421Sopenharmony_ci+ 9365bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 9375bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3); 9385bec5421Sopenharmony_ci+ vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]); 9395bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); 9405bec5421Sopenharmony_ci+ 9415bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 9425bec5421Sopenharmony_ci+ rp += OFFSET3; 9435bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 9445bec5421Sopenharmony_ci+ } else if (tail_bytes == TAIL_RGB3) { 9455bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast); 9465bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 9475bec5421Sopenharmony_ci+ 9485bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 9495bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3); 9505bec5421Sopenharmony_ci+ vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]); 9515bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); 9525bec5421Sopenharmony_ci+ 9535bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6); 9545bec5421Sopenharmony_ci+ vtmp3 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6); 9555bec5421Sopenharmony_ci+ vdest.val[IND2] = paeth(vdest.val[1], vtmp3, vtmp2); 9565bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp1); 9575bec5421Sopenharmony_ci+ 9585bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 9595bec5421Sopenharmony_ci+ rp += OFFSET3; 9605bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 9615bec5421Sopenharmony_ci+ rp += OFFSET3; 9625bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0); 9635bec5421Sopenharmony_ci+ } 9645bec5421Sopenharmony_ci+ *rp_stop = last_byte; 9655bec5421Sopenharmony_ci+} 9665bec5421Sopenharmony_ci+ 9675bec5421Sopenharmony_ci+void png_read_filter_row_paeth3_x2_neon(png_row_infop row_info, png_bytep row, 9685bec5421Sopenharmony_ci+ png_const_bytep prev_row) 9695bec5421Sopenharmony_ci+{ 9705bec5421Sopenharmony_ci+ png_bytep rp = row; 9715bec5421Sopenharmony_ci+ png_const_bytep pp = prev_row; 9725bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 9735bec5421Sopenharmony_ci+ png_bytep np = rp_stop + 1; 9745bec5421Sopenharmony_ci+ 9755bec5421Sopenharmony_ci+ uint8x16_t vtmp; 9765bec5421Sopenharmony_ci+ uint8x8x2_t *vrpt; 9775bec5421Sopenharmony_ci+ uint8x8x2_t vrp; 9785bec5421Sopenharmony_ci+ uint8x8_t vlast = vdup_n_u8(0); 9795bec5421Sopenharmony_ci+ uint8x8x4_t vdest; 9805bec5421Sopenharmony_ci+ vdest.val[IND3] = vdup_n_u8(0); 9815bec5421Sopenharmony_ci+ 9825bec5421Sopenharmony_ci+ vtmp = vld1q_u8(rp); 9835bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x2_t, &vtmp); 9845bec5421Sopenharmony_ci+ vrp = *vrpt; 9855bec5421Sopenharmony_ci+ 9865bec5421Sopenharmony_ci+ uint8x8x2_t *vppt; 9875bec5421Sopenharmony_ci+ uint8x8x2_t vpp; 9885bec5421Sopenharmony_ci+ uint8x8_t vtmp1, vtmp2, vtmp3; 9895bec5421Sopenharmony_ci+ uint32x2_t *temp_pointer; 9905bec5421Sopenharmony_ci+ 9915bec5421Sopenharmony_ci+ uint8x8x2_t *vnpt; 9925bec5421Sopenharmony_ci+ uint8x8x2_t vnp; 9935bec5421Sopenharmony_ci+ uint8x8_t vlastN = vdup_n_u8(0); 9945bec5421Sopenharmony_ci+ uint8x8x4_t vdestN; 9955bec5421Sopenharmony_ci+ vdestN.val[IND3] = vdup_n_u8(0); 9965bec5421Sopenharmony_ci+ 9975bec5421Sopenharmony_ci+ vtmp = vld1q_u8(np); 9985bec5421Sopenharmony_ci+ vnpt = png_ptr(uint8x8x2_t, &vtmp); 9995bec5421Sopenharmony_ci+ vnp = *vnpt; 10005bec5421Sopenharmony_ci+ 10015bec5421Sopenharmony_ci+ png_debug(1, "in png_read_filter_row_paeth3_x2_neon"); 10025bec5421Sopenharmony_ci+ 10035bec5421Sopenharmony_ci+ size_t tail_bytes = row_info->rowbytes % STEP_RGB; 10045bec5421Sopenharmony_ci+ png_byte last_byte = *rp_stop; 10055bec5421Sopenharmony_ci+ png_byte last_byte_next = *(rp_stop + row_info->rowbytes + 1); 10065bec5421Sopenharmony_ci+ png_bytep rp_stop_new = rp_stop - tail_bytes; 10075bec5421Sopenharmony_ci+ 10085bec5421Sopenharmony_ci+ for (; rp < rp_stop_new; pp += STEP_RGB) 10095bec5421Sopenharmony_ci+ { 10105bec5421Sopenharmony_ci+ vtmp = vld1q_u8(pp); 10115bec5421Sopenharmony_ci+ vppt = png_ptr(uint8x8x2_t, &vtmp); 10125bec5421Sopenharmony_ci+ vpp = *vppt; 10135bec5421Sopenharmony_ci+ 10145bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast); 10155bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 10165bec5421Sopenharmony_ci+ 10175bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 10185bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3); 10195bec5421Sopenharmony_ci+ vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]); 10205bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); 10215bec5421Sopenharmony_ci+ 10225bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6); 10235bec5421Sopenharmony_ci+ vtmp3 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6); 10245bec5421Sopenharmony_ci+ vdest.val[IND2] = paeth(vdest.val[1], vtmp3, vtmp2); 10255bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp1); 10265bec5421Sopenharmony_ci+ 10275bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1); 10285bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1); 10295bec5421Sopenharmony_ci+ 10305bec5421Sopenharmony_ci+ vtmp = vld1q_u8(rp + STEP_RGB); 10315bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x2_t, &vtmp); 10325bec5421Sopenharmony_ci+ vrp = *vrpt; 10335bec5421Sopenharmony_ci+ 10345bec5421Sopenharmony_ci+ vdest.val[IND3] = paeth(vdest.val[IND2], vtmp2, vtmp3); 10355bec5421Sopenharmony_ci+ vdest.val[IND3] = vadd_u8(vdest.val[IND3], vtmp1); 10365bec5421Sopenharmony_ci+ 10375bec5421Sopenharmony_ci+ vlast = vtmp2; 10385bec5421Sopenharmony_ci+ 10395bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 10405bec5421Sopenharmony_ci+ rp += OFFSET3; 10415bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 10425bec5421Sopenharmony_ci+ rp += OFFSET3; 10435bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0); 10445bec5421Sopenharmony_ci+ rp += OFFSET3; 10455bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND3]), 0); 10465bec5421Sopenharmony_ci+ rp += OFFSET3; 10475bec5421Sopenharmony_ci+ 10485bec5421Sopenharmony_ci+ vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN); 10495bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]); 10505bec5421Sopenharmony_ci+ 10515bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET3); 10525bec5421Sopenharmony_ci+ vdestN.val[1] = paeth(vdestN.val[0], vdest.val[1], vdest.val[0]); 10535bec5421Sopenharmony_ci+ vdestN.val[1] = vadd_u8(vdestN.val[1], vtmp1); 10545bec5421Sopenharmony_ci+ 10555bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET6); 10565bec5421Sopenharmony_ci+ vdestN.val[IND2] = paeth(vdestN.val[1], vdest.val[IND2], vdest.val[1]); 10575bec5421Sopenharmony_ci+ vdestN.val[IND2] = vadd_u8(vdestN.val[IND2], vtmp1); 10585bec5421Sopenharmony_ci+ 10595bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vnp.val[1], vnp.val[1], 1); 10605bec5421Sopenharmony_ci+ 10615bec5421Sopenharmony_ci+ vtmp = vld1q_u8(np + STEP_RGB); 10625bec5421Sopenharmony_ci+ vnpt = png_ptr(uint8x8x2_t, &vtmp); 10635bec5421Sopenharmony_ci+ vnp = *vnpt; 10645bec5421Sopenharmony_ci+ 10655bec5421Sopenharmony_ci+ vdestN.val[IND3] = paeth(vdestN.val[IND2], vdest.val[IND3], vdest.val[IND2]); 10665bec5421Sopenharmony_ci+ vdestN.val[IND3] = vadd_u8(vdestN.val[IND3], vtmp1); 10675bec5421Sopenharmony_ci+ 10685bec5421Sopenharmony_ci+ vlastN = vdest.val[IND3]; 10695bec5421Sopenharmony_ci+ 10705bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0); 10715bec5421Sopenharmony_ci+ np += OFFSET3; 10725bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[1]), 0); 10735bec5421Sopenharmony_ci+ np += OFFSET3; 10745bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[IND2]), 0); 10755bec5421Sopenharmony_ci+ np += OFFSET3; 10765bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[IND3]), 0); 10775bec5421Sopenharmony_ci+ np += OFFSET3; 10785bec5421Sopenharmony_ci+ } 10795bec5421Sopenharmony_ci+ 10805bec5421Sopenharmony_ci+ vtmp = vld1q_u8(pp); 10815bec5421Sopenharmony_ci+ vppt = png_ptr(uint8x8x2_t, &vtmp); 10825bec5421Sopenharmony_ci+ vpp = *vppt; 10835bec5421Sopenharmony_ci+ 10845bec5421Sopenharmony_ci+ if (tail_bytes == TAIL_RGB1) { 10855bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast); 10865bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 10875bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 10885bec5421Sopenharmony_ci+ 10895bec5421Sopenharmony_ci+ vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN); 10905bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]); 10915bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0); 10925bec5421Sopenharmony_ci+ } else if (tail_bytes == TAIL_RGB2) { 10935bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast); 10945bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 10955bec5421Sopenharmony_ci+ 10965bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 10975bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3); 10985bec5421Sopenharmony_ci+ vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]); 10995bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); 11005bec5421Sopenharmony_ci+ 11015bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 11025bec5421Sopenharmony_ci+ rp += OFFSET3; 11035bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 11045bec5421Sopenharmony_ci+ 11055bec5421Sopenharmony_ci+ vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN); 11065bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]); 11075bec5421Sopenharmony_ci+ 11085bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET3); 11095bec5421Sopenharmony_ci+ vdestN.val[1] = paeth(vdestN.val[0], vdest.val[1], vdest.val[0]); 11105bec5421Sopenharmony_ci+ vdestN.val[1] = vadd_u8(vdestN.val[1], vtmp1); 11115bec5421Sopenharmony_ci+ 11125bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0); 11135bec5421Sopenharmony_ci+ np += OFFSET3; 11145bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[1]), 0); 11155bec5421Sopenharmony_ci+ } else if (tail_bytes == TAIL_RGB3) { 11165bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast); 11175bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 11185bec5421Sopenharmony_ci+ 11195bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET3); 11205bec5421Sopenharmony_ci+ vtmp2 = vext_u8(vpp.val[0], vpp.val[1], OFFSET3); 11215bec5421Sopenharmony_ci+ vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]); 11225bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vtmp1); 11235bec5421Sopenharmony_ci+ 11245bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vrp.val[0], vrp.val[1], OFFSET6); 11255bec5421Sopenharmony_ci+ vtmp3 = vext_u8(vpp.val[0], vpp.val[1], OFFSET6); 11265bec5421Sopenharmony_ci+ vdest.val[IND2] = paeth(vdest.val[1], vtmp3, vtmp2); 11275bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[IND2], vtmp1); 11285bec5421Sopenharmony_ci+ 11295bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[0]), 0); 11305bec5421Sopenharmony_ci+ rp += OFFSET3; 11315bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[1]), 0); 11325bec5421Sopenharmony_ci+ rp += OFFSET3; 11335bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), png_ldr(uint32x2_t, &vdest.val[IND2]), 0); 11345bec5421Sopenharmony_ci+ 11355bec5421Sopenharmony_ci+ vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN); 11365bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]); 11375bec5421Sopenharmony_ci+ 11385bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET3); 11395bec5421Sopenharmony_ci+ vdestN.val[1] = paeth(vdestN.val[0], vdest.val[1], vdest.val[0]); 11405bec5421Sopenharmony_ci+ vdestN.val[1] = vadd_u8(vdestN.val[1], vtmp1); 11415bec5421Sopenharmony_ci+ 11425bec5421Sopenharmony_ci+ vtmp1 = vext_u8(vnp.val[0], vnp.val[1], OFFSET6); 11435bec5421Sopenharmony_ci+ vdestN.val[IND2] = paeth(vdestN.val[1], vdest.val[IND2], vdest.val[1]); 11445bec5421Sopenharmony_ci+ vdestN.val[IND2] = vadd_u8(vdestN.val[IND2], vtmp1); 11455bec5421Sopenharmony_ci+ 11465bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[0]), 0); 11475bec5421Sopenharmony_ci+ np += OFFSET3; 11485bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[1]), 0); 11495bec5421Sopenharmony_ci+ np += OFFSET3; 11505bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), png_ldr(uint32x2_t, &vdestN.val[IND2]), 0); 11515bec5421Sopenharmony_ci+ } 11525bec5421Sopenharmony_ci+ *rp_stop = last_byte; 11535bec5421Sopenharmony_ci+ *(rp_stop + row_info->rowbytes + 1) = last_byte_next; 11545bec5421Sopenharmony_ci+} 11555bec5421Sopenharmony_ci+ 11565bec5421Sopenharmony_ci+void png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row, 11575bec5421Sopenharmony_ci+ png_const_bytep prev_row) 11585bec5421Sopenharmony_ci+{ 11595bec5421Sopenharmony_ci+ png_bytep rp = row; 11605bec5421Sopenharmony_ci+ int count = row_info->rowbytes; 11615bec5421Sopenharmony_ci+ png_const_bytep pp = prev_row; 11625bec5421Sopenharmony_ci+ 11635bec5421Sopenharmony_ci+ uint8x8_t vlast = vdup_n_u8(0); 11645bec5421Sopenharmony_ci+ uint8x8x4_t vdest; 11655bec5421Sopenharmony_ci+ vdest.val[IND3] = vdup_n_u8(0); 11665bec5421Sopenharmony_ci+ 11675bec5421Sopenharmony_ci+ png_debug(1, "in png_read_filter_row_paeth4_neon"); 11685bec5421Sopenharmony_ci+ 11695bec5421Sopenharmony_ci+ uint32x2x4_t vtmp; 11705bec5421Sopenharmony_ci+ uint8x8x4_t *vrpt, *vppt; 11715bec5421Sopenharmony_ci+ uint8x8x4_t vrp, vpp; 11725bec5421Sopenharmony_ci+ uint32x2x4_t vdest_val; 11735bec5421Sopenharmony_ci+ while (count >= STEP_RGBA) { 11745bec5421Sopenharmony_ci+ uint32x2x4_t *temp_pointer; 11755bec5421Sopenharmony_ci+ vtmp = vld4_u32(png_ptr(uint32_t, rp)); 11765bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x4_t, &vtmp); 11775bec5421Sopenharmony_ci+ vrp = *vrpt; 11785bec5421Sopenharmony_ci+ vtmp = vld4_u32(png_ptrc(uint32_t, pp)); 11795bec5421Sopenharmony_ci+ vppt = png_ptr(uint8x8x4_t, &vtmp); 11805bec5421Sopenharmony_ci+ vpp = *vppt; 11815bec5421Sopenharmony_ci+ 11825bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast); 11835bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 11845bec5421Sopenharmony_ci+ vdest.val[1] = paeth(vdest.val[0], vpp.val[1], vpp.val[0]); 11855bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]); 11865bec5421Sopenharmony_ci+ vdest.val[IND2] = paeth(vdest.val[1], vpp.val[IND2], vpp.val[1]); 11875bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[IND2], vrp.val[IND2]); 11885bec5421Sopenharmony_ci+ vdest.val[IND3] = paeth(vdest.val[IND2], vpp.val[IND3], vpp.val[IND2]); 11895bec5421Sopenharmony_ci+ vdest.val[IND3] = vadd_u8(vdest.val[IND3], vrp.val[IND3]); 11905bec5421Sopenharmony_ci+ 11915bec5421Sopenharmony_ci+ vlast = vpp.val[IND3]; 11925bec5421Sopenharmony_ci+ 11935bec5421Sopenharmony_ci+ vdest_val = png_ldr(uint32x2x4_t, &vdest); 11945bec5421Sopenharmony_ci+ vst4_lane_u32(png_ptr(uint32_t, rp), vdest_val, 0); 11955bec5421Sopenharmony_ci+ 11965bec5421Sopenharmony_ci+ rp += STEP_RGBA; 11975bec5421Sopenharmony_ci+ pp += STEP_RGBA; 11985bec5421Sopenharmony_ci+ count -= STEP_RGBA; 11995bec5421Sopenharmony_ci+ } 12005bec5421Sopenharmony_ci+ 12015bec5421Sopenharmony_ci+ if (count >= STEP_RGBA_HALF) { 12025bec5421Sopenharmony_ci+ uint32x2x2_t vtmp1; 12035bec5421Sopenharmony_ci+ uint8x8x2_t *vrpt1, *vppt1; 12045bec5421Sopenharmony_ci+ uint8x8x2_t vrp1, vpp1; 12055bec5421Sopenharmony_ci+ uint32x2x2_t *temp_pointer; 12065bec5421Sopenharmony_ci+ uint32x2x2_t vdest_val1; 12075bec5421Sopenharmony_ci+ 12085bec5421Sopenharmony_ci+ vtmp1 = vld2_u32(png_ptr(uint32_t, rp)); 12095bec5421Sopenharmony_ci+ vrpt1 = png_ptr(uint8x8x2_t, &vtmp1); 12105bec5421Sopenharmony_ci+ vrp1 = *vrpt1; 12115bec5421Sopenharmony_ci+ vtmp1 = vld2_u32(png_ptrc(uint32_t, pp)); 12125bec5421Sopenharmony_ci+ vppt1 = png_ptr(uint8x8x2_t, &vtmp1); 12135bec5421Sopenharmony_ci+ vpp1 = *vppt1; 12145bec5421Sopenharmony_ci+ 12155bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp1.val[0], vlast); 12165bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp1.val[0]); 12175bec5421Sopenharmony_ci+ vdest.val[1] = paeth(vdest.val[0], vpp1.val[1], vpp1.val[0]); 12185bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vrp1.val[1]); 12195bec5421Sopenharmony_ci+ vlast = vpp1.val[1]; 12205bec5421Sopenharmony_ci+ 12215bec5421Sopenharmony_ci+ vdest_val1 = png_ldr(uint32x2x2_t, &vdest); 12225bec5421Sopenharmony_ci+ vst2_lane_u32(png_ptr(uint32_t, rp), vdest_val1, 0); 12235bec5421Sopenharmony_ci+ vdest.val[IND3] = vdest.val[1]; 12245bec5421Sopenharmony_ci+ 12255bec5421Sopenharmony_ci+ rp += STEP_RGBA_HALF; 12265bec5421Sopenharmony_ci+ pp += STEP_RGBA_HALF; 12275bec5421Sopenharmony_ci+ count -= STEP_RGBA_HALF; 12285bec5421Sopenharmony_ci+ } 12295bec5421Sopenharmony_ci+ 12305bec5421Sopenharmony_ci+ if (count == 0) { 12315bec5421Sopenharmony_ci+ return; 12325bec5421Sopenharmony_ci+ } 12335bec5421Sopenharmony_ci+ 12345bec5421Sopenharmony_ci+ uint32x2_t vtmp2; 12355bec5421Sopenharmony_ci+ uint8x8_t *vrpt2, *vppt2; 12365bec5421Sopenharmony_ci+ uint8x8_t vrp2, vpp2; 12375bec5421Sopenharmony_ci+ uint32x2_t *temp_pointer; 12385bec5421Sopenharmony_ci+ uint32x2_t vdest_val2; 12395bec5421Sopenharmony_ci+ 12405bec5421Sopenharmony_ci+ vtmp2 = vld1_u32(png_ptr(uint32_t, rp)); 12415bec5421Sopenharmony_ci+ vrpt2 = png_ptr(uint8x8_t, &vtmp2); 12425bec5421Sopenharmony_ci+ vrp2 = *vrpt2; 12435bec5421Sopenharmony_ci+ vtmp2 = vld1_u32(png_ptrc(uint32_t, pp)); 12445bec5421Sopenharmony_ci+ vppt2 = png_ptr(uint8x8_t, &vtmp2); 12455bec5421Sopenharmony_ci+ vpp2 = *vppt2; 12465bec5421Sopenharmony_ci+ 12475bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp2, vlast); 12485bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp2); 12495bec5421Sopenharmony_ci+ 12505bec5421Sopenharmony_ci+ vdest_val2 = png_ldr(uint32x2_t, &vdest); 12515bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), vdest_val2, 0); 12525bec5421Sopenharmony_ci+} 12535bec5421Sopenharmony_ci+ 12545bec5421Sopenharmony_ci+void png_read_filter_row_paeth4_x2_neon(png_row_infop row_info, png_bytep row, 12555bec5421Sopenharmony_ci+ png_const_bytep prev_row) 12565bec5421Sopenharmony_ci+{ 12575bec5421Sopenharmony_ci+ png_bytep rp = row; 12585bec5421Sopenharmony_ci+ int count = row_info->rowbytes; 12595bec5421Sopenharmony_ci+ png_const_bytep pp = prev_row; 12605bec5421Sopenharmony_ci+ png_bytep np = row + row_info->rowbytes + 1; 12615bec5421Sopenharmony_ci+ 12625bec5421Sopenharmony_ci+ uint8x8_t vlast = vdup_n_u8(0); 12635bec5421Sopenharmony_ci+ uint8x8x4_t vdest; 12645bec5421Sopenharmony_ci+ vdest.val[IND3] = vdup_n_u8(0); 12655bec5421Sopenharmony_ci+ 12665bec5421Sopenharmony_ci+ png_debug(1, "in png_read_filter_row_paeth4_x2_neon"); 12675bec5421Sopenharmony_ci+ 12685bec5421Sopenharmony_ci+ uint32x2x4_t vtmp; 12695bec5421Sopenharmony_ci+ uint8x8x4_t *vrpt, *vppt; 12705bec5421Sopenharmony_ci+ uint8x8x4_t vrp, vpp; 12715bec5421Sopenharmony_ci+ uint32x2x4_t vdest_val; 12725bec5421Sopenharmony_ci+ 12735bec5421Sopenharmony_ci+ uint8x8x4_t *vnpt; 12745bec5421Sopenharmony_ci+ uint8x8x4_t vnp; 12755bec5421Sopenharmony_ci+ uint8x8_t vlastN = vdup_n_u8(0); 12765bec5421Sopenharmony_ci+ uint8x8x4_t vdestN; 12775bec5421Sopenharmony_ci+ vdestN.val[IND3] = vdup_n_u8(0); 12785bec5421Sopenharmony_ci+ 12795bec5421Sopenharmony_ci+ while (count >= STEP_RGBA) { 12805bec5421Sopenharmony_ci+ uint32x2x4_t *temp_pointer; 12815bec5421Sopenharmony_ci+ vtmp = vld4_u32(png_ptr(uint32_t, rp)); 12825bec5421Sopenharmony_ci+ vrpt = png_ptr(uint8x8x4_t, &vtmp); 12835bec5421Sopenharmony_ci+ vrp = *vrpt; 12845bec5421Sopenharmony_ci+ vtmp = vld4_u32(png_ptrc(uint32_t, pp)); 12855bec5421Sopenharmony_ci+ vppt = png_ptr(uint8x8x4_t, &vtmp); 12865bec5421Sopenharmony_ci+ vpp = *vppt; 12875bec5421Sopenharmony_ci+ vtmp = vld4_u32(png_ptrc(uint32_t, np)); 12885bec5421Sopenharmony_ci+ vnpt = png_ptr(uint8x8x4_t, &vtmp); 12895bec5421Sopenharmony_ci+ vnp = *vnpt; 12905bec5421Sopenharmony_ci+ 12915bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp.val[0], vlast); 12925bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]); 12935bec5421Sopenharmony_ci+ vdest.val[1] = paeth(vdest.val[0], vpp.val[1], vpp.val[0]); 12945bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]); 12955bec5421Sopenharmony_ci+ vdest.val[IND2] = paeth(vdest.val[1], vpp.val[IND2], vpp.val[1]); 12965bec5421Sopenharmony_ci+ vdest.val[IND2] = vadd_u8(vdest.val[IND2], vrp.val[IND2]); 12975bec5421Sopenharmony_ci+ vdest.val[IND3] = paeth(vdest.val[IND2], vpp.val[IND3], vpp.val[IND2]); 12985bec5421Sopenharmony_ci+ vdest.val[IND3] = vadd_u8(vdest.val[IND3], vrp.val[IND3]); 12995bec5421Sopenharmony_ci+ 13005bec5421Sopenharmony_ci+ vlast = vpp.val[IND3]; 13015bec5421Sopenharmony_ci+ 13025bec5421Sopenharmony_ci+ vdest_val = png_ldr(uint32x2x4_t, &vdest); 13035bec5421Sopenharmony_ci+ vst4_lane_u32(png_ptr(uint32_t, rp), vdest_val, 0); 13045bec5421Sopenharmony_ci+ 13055bec5421Sopenharmony_ci+ vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN); 13065bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp.val[0]); 13075bec5421Sopenharmony_ci+ vdestN.val[1] = paeth(vdestN.val[0], vdest.val[1], vdest.val[0]); 13085bec5421Sopenharmony_ci+ vdestN.val[1] = vadd_u8(vdestN.val[1], vnp.val[1]); 13095bec5421Sopenharmony_ci+ vdestN.val[IND2] = paeth(vdestN.val[1], vdest.val[IND2], vdest.val[1]); 13105bec5421Sopenharmony_ci+ vdestN.val[IND2] = vadd_u8(vdestN.val[IND2], vnp.val[IND2]); 13115bec5421Sopenharmony_ci+ vdestN.val[IND3] = paeth(vdestN.val[IND2], vdest.val[IND3], vdest.val[IND2]); 13125bec5421Sopenharmony_ci+ vdestN.val[IND3] = vadd_u8(vdestN.val[IND3], vnp.val[IND3]); 13135bec5421Sopenharmony_ci+ 13145bec5421Sopenharmony_ci+ vlastN = vdest.val[IND3]; 13155bec5421Sopenharmony_ci+ 13165bec5421Sopenharmony_ci+ vdest_val = png_ldr(uint32x2x4_t, &vdestN); 13175bec5421Sopenharmony_ci+ vst4_lane_u32(png_ptr(uint32_t, np), vdest_val, 0); 13185bec5421Sopenharmony_ci+ 13195bec5421Sopenharmony_ci+ rp += STEP_RGBA; 13205bec5421Sopenharmony_ci+ pp += STEP_RGBA; 13215bec5421Sopenharmony_ci+ np += STEP_RGBA; 13225bec5421Sopenharmony_ci+ count -= STEP_RGBA; 13235bec5421Sopenharmony_ci+ } 13245bec5421Sopenharmony_ci+ 13255bec5421Sopenharmony_ci+ if (count >= STEP_RGBA_HALF) { 13265bec5421Sopenharmony_ci+ uint32x2x2_t vtmp1; 13275bec5421Sopenharmony_ci+ uint8x8x2_t *vrpt1, *vppt1, *vnpt1; 13285bec5421Sopenharmony_ci+ uint8x8x2_t vrp1, vpp1, vnp1; 13295bec5421Sopenharmony_ci+ uint32x2x2_t *temp_pointer; 13305bec5421Sopenharmony_ci+ uint32x2x2_t vdest_val1; 13315bec5421Sopenharmony_ci+ 13325bec5421Sopenharmony_ci+ vtmp1 = vld2_u32(png_ptr(uint32_t, rp)); 13335bec5421Sopenharmony_ci+ vrpt1 = png_ptr(uint8x8x2_t, &vtmp1); 13345bec5421Sopenharmony_ci+ vrp1 = *vrpt1; 13355bec5421Sopenharmony_ci+ vtmp1 = vld2_u32(png_ptrc(uint32_t, pp)); 13365bec5421Sopenharmony_ci+ vppt1 = png_ptr(uint8x8x2_t, &vtmp1); 13375bec5421Sopenharmony_ci+ vpp1 = *vppt1; 13385bec5421Sopenharmony_ci+ vtmp1 = vld2_u32(png_ptrc(uint32_t, np)); 13395bec5421Sopenharmony_ci+ vnpt1 = png_ptr(uint8x8x2_t, &vtmp1); 13405bec5421Sopenharmony_ci+ vnp1 = *vnpt1; 13415bec5421Sopenharmony_ci+ 13425bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp1.val[0], vlast); 13435bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp1.val[0]); 13445bec5421Sopenharmony_ci+ vdest.val[1] = paeth(vdest.val[0], vpp1.val[1], vpp1.val[0]); 13455bec5421Sopenharmony_ci+ vdest.val[1] = vadd_u8(vdest.val[1], vrp1.val[1]); 13465bec5421Sopenharmony_ci+ 13475bec5421Sopenharmony_ci+ vlast = vpp1.val[1]; 13485bec5421Sopenharmony_ci+ 13495bec5421Sopenharmony_ci+ vdest_val1 = png_ldr(uint32x2x2_t, &vdest); 13505bec5421Sopenharmony_ci+ vst2_lane_u32(png_ptr(uint32_t, rp), vdest_val1, 0); 13515bec5421Sopenharmony_ci+ 13525bec5421Sopenharmony_ci+ vdest.val[IND3] = vdest.val[1]; 13535bec5421Sopenharmony_ci+ 13545bec5421Sopenharmony_ci+ vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN); 13555bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp1.val[0]); 13565bec5421Sopenharmony_ci+ vdestN.val[1] = paeth(vdestN.val[0], vdest.val[1], vdest.val[0]); 13575bec5421Sopenharmony_ci+ vdestN.val[1] = vadd_u8(vdestN.val[1], vnp1.val[1]); 13585bec5421Sopenharmony_ci+ 13595bec5421Sopenharmony_ci+ vlastN = vdest.val[1]; 13605bec5421Sopenharmony_ci+ 13615bec5421Sopenharmony_ci+ vdest_val1 = png_ldr(uint32x2x2_t, &vdestN); 13625bec5421Sopenharmony_ci+ vst2_lane_u32(png_ptr(uint32_t, np), vdest_val1, 0); 13635bec5421Sopenharmony_ci+ 13645bec5421Sopenharmony_ci+ vdestN.val[IND3] = vdestN.val[1]; 13655bec5421Sopenharmony_ci+ 13665bec5421Sopenharmony_ci+ rp += STEP_RGBA_HALF; 13675bec5421Sopenharmony_ci+ pp += STEP_RGBA_HALF; 13685bec5421Sopenharmony_ci+ np += STEP_RGBA_HALF; 13695bec5421Sopenharmony_ci+ count -= STEP_RGBA_HALF; 13705bec5421Sopenharmony_ci+ } 13715bec5421Sopenharmony_ci+ 13725bec5421Sopenharmony_ci+ if (count == 0) { 13735bec5421Sopenharmony_ci+ return; 13745bec5421Sopenharmony_ci+ } 13755bec5421Sopenharmony_ci+ 13765bec5421Sopenharmony_ci+ uint32x2_t vtmp2; 13775bec5421Sopenharmony_ci+ uint8x8_t *vrpt2, *vppt2, *vnpt2; 13785bec5421Sopenharmony_ci+ uint8x8_t vrp2, vpp2, vnp2; 13795bec5421Sopenharmony_ci+ uint32x2_t *temp_pointer; 13805bec5421Sopenharmony_ci+ uint32x2_t vdest_val2; 13815bec5421Sopenharmony_ci+ 13825bec5421Sopenharmony_ci+ vtmp2 = vld1_u32(png_ptr(uint32_t, rp)); 13835bec5421Sopenharmony_ci+ vrpt2 = png_ptr(uint8x8_t, &vtmp2); 13845bec5421Sopenharmony_ci+ vrp2 = *vrpt2; 13855bec5421Sopenharmony_ci+ vtmp2 = vld1_u32(png_ptrc(uint32_t, pp)); 13865bec5421Sopenharmony_ci+ vppt2 = png_ptr(uint8x8_t, &vtmp2); 13875bec5421Sopenharmony_ci+ vpp2 = *vppt2; 13885bec5421Sopenharmony_ci+ vtmp2 = vld1_u32(png_ptrc(uint32_t, np)); 13895bec5421Sopenharmony_ci+ vnpt2 = png_ptr(uint8x8_t, &vtmp2); 13905bec5421Sopenharmony_ci+ vnp2 = *vnpt2; 13915bec5421Sopenharmony_ci+ 13925bec5421Sopenharmony_ci+ vdest.val[0] = paeth(vdest.val[IND3], vpp2, vlast); 13935bec5421Sopenharmony_ci+ vdest.val[0] = vadd_u8(vdest.val[0], vrp2); 13945bec5421Sopenharmony_ci+ 13955bec5421Sopenharmony_ci+ vdest_val2 = png_ldr(uint32x2_t, &vdest); 13965bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, rp), vdest_val2, 0); 13975bec5421Sopenharmony_ci+ 13985bec5421Sopenharmony_ci+ vdestN.val[0] = paeth(vdestN.val[IND3], vdest.val[0], vlastN); 13995bec5421Sopenharmony_ci+ vdestN.val[0] = vadd_u8(vdestN.val[0], vnp2); 14005bec5421Sopenharmony_ci+ 14015bec5421Sopenharmony_ci+ vdest_val2 = png_ldr(uint32x2_t, &vdestN); 14025bec5421Sopenharmony_ci+ vst1_lane_u32(png_ptr(uint32_t, np), vdest_val2, 0); 14035bec5421Sopenharmony_ci+} 14045bec5421Sopenharmony_ci+#endif /* PNG_MULTY_LINE_ENABLE */ 14055bec5421Sopenharmony_ci #endif /* PNG_ARM_NEON_OPT > 0 */ 14065bec5421Sopenharmony_ci #endif /* PNG_ARM_NEON_IMPLEMENTATION == 1 (intrinsics) */ 14075bec5421Sopenharmony_ci #endif /* READ */ 14085bec5421Sopenharmony_cidiff --git a/pngpread.c b/pngpread.c 14095bec5421Sopenharmony_ciindex e283627b7..bb12f61ea 100644 14105bec5421Sopenharmony_ci--- a/pngpread.c 14115bec5421Sopenharmony_ci+++ b/pngpread.c 14125bec5421Sopenharmony_ci@@ -264,9 +264,22 @@ png_push_read_chunk(png_structrp png_ptr, png_inforp info_ptr) 14135bec5421Sopenharmony_ci png_ptr->idat_size = png_ptr->push_length; 14145bec5421Sopenharmony_ci png_ptr->process_mode = PNG_READ_IDAT_MODE; 14155bec5421Sopenharmony_ci png_push_have_info(png_ptr, info_ptr); 14165bec5421Sopenharmony_ci- png_ptr->zstream.avail_out = 14175bec5421Sopenharmony_ci- (uInt) PNG_ROWBYTES(png_ptr->pixel_depth, 14185bec5421Sopenharmony_ci- png_ptr->iwidth) + 1; 14195bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 14205bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 14215bec5421Sopenharmony_ci+ if (png_ptr->interlaced == 0 && png_ptr->bit_depth == 8 && 14225bec5421Sopenharmony_ci+ (png_ptr->transformations & PNG_CHECK) == 0) { 14235bec5421Sopenharmony_ci+ int rest = png_ptr->num_rows - png_ptr->row_number; 14245bec5421Sopenharmony_ci+ int row_num = rest < PNG_INFLATE_ROWS ? rest : PNG_INFLATE_ROWS; 14255bec5421Sopenharmony_ci+ png_ptr->zstream.avail_out = (uInt)(PNG_ROWBYTES(png_ptr->pixel_depth, 14265bec5421Sopenharmony_ci+ png_ptr->iwidth) + 1) * row_num; 14275bec5421Sopenharmony_ci+ } 14285bec5421Sopenharmony_ci+ else 14295bec5421Sopenharmony_ci+#endif 14305bec5421Sopenharmony_ci+ { 14315bec5421Sopenharmony_ci+ png_ptr->zstream.avail_out = 14325bec5421Sopenharmony_ci+ (uInt) PNG_ROWBYTES(png_ptr->pixel_depth, 14335bec5421Sopenharmony_ci+ png_ptr->iwidth) + 1; 14345bec5421Sopenharmony_ci+ } 14355bec5421Sopenharmony_ci png_ptr->zstream.next_out = png_ptr->row_buf; 14365bec5421Sopenharmony_ci return; 14375bec5421Sopenharmony_ci } 14385bec5421Sopenharmony_ci@@ -623,6 +636,92 @@ png_push_read_IDAT(png_structrp png_ptr) 14395bec5421Sopenharmony_ci } 14405bec5421Sopenharmony_ci } 14415bec5421Sopenharmony_ci 14425bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 14435bec5421Sopenharmony_ci+// OH ISSUE: png optimize 14445bec5421Sopenharmony_ci+static void png_push_process_row_x2(png_structrp png_ptr, 14455bec5421Sopenharmony_ci+ png_row_info row_info_in) 14465bec5421Sopenharmony_ci+{ 14475bec5421Sopenharmony_ci+ png_debug(1, "in png_push_process_row_x2"); 14485bec5421Sopenharmony_ci+ png_row_info row_info = row_info_in; 14495bec5421Sopenharmony_ci+ png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1, 14505bec5421Sopenharmony_ci+ png_ptr->prev_row + 1, png_ptr->row_buf[0] + 4); 14515bec5421Sopenharmony_ci+ 14525bec5421Sopenharmony_ci+#ifdef PNG_READ_TRANSFORMS_SUPPORTED 14535bec5421Sopenharmony_ci+ if (png_ptr->transformations != 0) 14545bec5421Sopenharmony_ci+ png_do_read_transformations(png_ptr, &row_info); 14555bec5421Sopenharmony_ci+#endif 14565bec5421Sopenharmony_ci+ 14575bec5421Sopenharmony_ci+ if (png_ptr->transformed_pixel_depth == 0) 14585bec5421Sopenharmony_ci+ { 14595bec5421Sopenharmony_ci+ png_ptr->transformed_pixel_depth = row_info.pixel_depth; 14605bec5421Sopenharmony_ci+ if (row_info.pixel_depth > png_ptr->maximum_pixel_depth) 14615bec5421Sopenharmony_ci+ png_error(png_ptr, "progressive row overflow"); 14625bec5421Sopenharmony_ci+ } 14635bec5421Sopenharmony_ci+ 14645bec5421Sopenharmony_ci+ png_push_have_row(png_ptr, png_ptr->row_buf + 1); 14655bec5421Sopenharmony_ci+ png_read_push_finish_row(png_ptr); 14665bec5421Sopenharmony_ci+ 14675bec5421Sopenharmony_ci+ png_ptr->row_buf = png_ptr->row_buf + png_ptr->rowbytes + 1; 14685bec5421Sopenharmony_ci+ 14695bec5421Sopenharmony_ci+ // do it again 14705bec5421Sopenharmony_ci+ if (png_ptr->transformations != 0) 14715bec5421Sopenharmony_ci+ { 14725bec5421Sopenharmony_ci+ memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1); 14735bec5421Sopenharmony_ci+ } 14745bec5421Sopenharmony_ci+ else 14755bec5421Sopenharmony_ci+ { 14765bec5421Sopenharmony_ci+ png_ptr->prev_row = png_ptr->row_buf; 14775bec5421Sopenharmony_ci+ } 14785bec5421Sopenharmony_ci+#ifdef PNG_READ_TRANSFORMS_SUPPORTED 14795bec5421Sopenharmony_ci+ if (png_ptr->transformations != 0) 14805bec5421Sopenharmony_ci+ png_do_read_transformations(png_ptr, &row_info); 14815bec5421Sopenharmony_ci+#endif 14825bec5421Sopenharmony_ci+ 14835bec5421Sopenharmony_ci+ png_push_have_row(png_ptr, png_ptr->row_buf + 1); 14845bec5421Sopenharmony_ci+ png_read_push_finish_row(png_ptr); 14855bec5421Sopenharmony_ci+} 14865bec5421Sopenharmony_ci+ 14875bec5421Sopenharmony_ci+static void png_push_process_multi_rows(png_structrp png_ptr, int row_num) 14885bec5421Sopenharmony_ci+{ 14895bec5421Sopenharmony_ci+ png_debug(1, "in png_push_process_multi_rows"); 14905bec5421Sopenharmony_ci+ uInt row_bytes = png_ptr->rowbytes + 1; 14915bec5421Sopenharmony_ci+ 14925bec5421Sopenharmony_ci+ png_row_info row_info; 14935bec5421Sopenharmony_ci+ row_info.width = png_ptr->iwidth; 14945bec5421Sopenharmony_ci+ row_info.color_type = png_ptr->color_type; 14955bec5421Sopenharmony_ci+ row_info.bit_depth = png_ptr->bit_depth; 14965bec5421Sopenharmony_ci+ row_info.channels = png_ptr->channels; 14975bec5421Sopenharmony_ci+ row_info.pixel_depth = png_ptr->pixel_depth; 14985bec5421Sopenharmony_ci+ row_info.rowbytes = png_ptr->rowbytes; 14995bec5421Sopenharmony_ci+ 15005bec5421Sopenharmony_ci+ png_bytep temp_row = png_ptr->row_buf; 15015bec5421Sopenharmony_ci+ png_bytep temp_prev_row = png_ptr->prev_row; 15025bec5421Sopenharmony_ci+ 15035bec5421Sopenharmony_ci+ for (int i = 0; i < row_num; i++) { 15045bec5421Sopenharmony_ci+ // check if the x2_filter is effective: only supports channels 3 or 4 15055bec5421Sopenharmony_ci+ if ((png_ptr->channels == 3 || png_ptr->channels == 4) && 15065bec5421Sopenharmony_ci+ i < row_num -1 && png_ptr->row_buf[0] > PNG_FILTER_VALUE_SUB && 15075bec5421Sopenharmony_ci+ png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST && 15085bec5421Sopenharmony_ci+ png_ptr->row_buf[0] == png_ptr->row_buf[row_bytes]) 15095bec5421Sopenharmony_ci+ { 15105bec5421Sopenharmony_ci+ png_push_process_row_x2(png_ptr, row_info); 15115bec5421Sopenharmony_ci+ png_ptr->row_buf = png_ptr->row_buf + row_bytes; 15125bec5421Sopenharmony_ci+ i++; 15135bec5421Sopenharmony_ci+ continue; 15145bec5421Sopenharmony_ci+ } 15155bec5421Sopenharmony_ci+ png_push_process_row(png_ptr); 15165bec5421Sopenharmony_ci+ png_ptr->row_buf = png_ptr->row_buf + row_bytes; 15175bec5421Sopenharmony_ci+ } 15185bec5421Sopenharmony_ci+ 15195bec5421Sopenharmony_ci+ if (png_ptr->transformations == 0 && png_ptr->interlaced == 0) 15205bec5421Sopenharmony_ci+ { 15215bec5421Sopenharmony_ci+ png_ptr->prev_row = temp_prev_row; 15225bec5421Sopenharmony_ci+ memcpy(png_ptr->prev_row, png_ptr->row_buf - row_bytes, row_bytes); 15235bec5421Sopenharmony_ci+ } 15245bec5421Sopenharmony_ci+ png_ptr->row_buf = temp_row; 15255bec5421Sopenharmony_ci+} 15265bec5421Sopenharmony_ci+#endif 15275bec5421Sopenharmony_ci+ 15285bec5421Sopenharmony_ci void /* PRIVATE */ 15295bec5421Sopenharmony_ci png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer, 15305bec5421Sopenharmony_ci size_t buffer_length) 15315bec5421Sopenharmony_ci@@ -639,6 +738,17 @@ png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer, 15325bec5421Sopenharmony_ci /* TODO: WARNING: TRUNCATION ERROR: DANGER WILL ROBINSON: */ 15335bec5421Sopenharmony_ci png_ptr->zstream.avail_in = (uInt)buffer_length; 15345bec5421Sopenharmony_ci 15355bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 15365bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 15375bec5421Sopenharmony_ci+ int row_num = 1; 15385bec5421Sopenharmony_ci+ if (png_ptr->interlaced == 0 && png_ptr->bit_depth == 8 && 15395bec5421Sopenharmony_ci+ (png_ptr->transformations & PNG_CHECK) == 0) 15405bec5421Sopenharmony_ci+ { 15415bec5421Sopenharmony_ci+ int rest = png_ptr->num_rows - png_ptr->row_number; 15425bec5421Sopenharmony_ci+ row_num = rest < PNG_INFLATE_ROWS ? rest : PNG_INFLATE_ROWS; 15435bec5421Sopenharmony_ci+ } 15445bec5421Sopenharmony_ci+#endif 15455bec5421Sopenharmony_ci+ 15465bec5421Sopenharmony_ci /* Keep going until the decompressed data is all processed 15475bec5421Sopenharmony_ci * or the stream marked as finished. 15485bec5421Sopenharmony_ci */ 15495bec5421Sopenharmony_ci@@ -655,9 +765,20 @@ png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer, 15505bec5421Sopenharmony_ci if (!(png_ptr->zstream.avail_out > 0)) 15515bec5421Sopenharmony_ci { 15525bec5421Sopenharmony_ci /* TODO: WARNING: TRUNCATION ERROR: DANGER WILL ROBINSON: */ 15535bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 15545bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 15555bec5421Sopenharmony_ci+ if (png_ptr->interlaced == 0 && png_ptr->bit_depth == 8 && 15565bec5421Sopenharmony_ci+ (png_ptr->transformations & PNG_CHECK) == 0) 15575bec5421Sopenharmony_ci+ { 15585bec5421Sopenharmony_ci+ int rest = png_ptr->num_rows - png_ptr->row_number; 15595bec5421Sopenharmony_ci+ row_num = rest < PNG_INFLATE_ROWS ? rest : PNG_INFLATE_ROWS; 15605bec5421Sopenharmony_ci+ } 15615bec5421Sopenharmony_ci+ png_ptr->zstream.avail_out = (uInt)(PNG_ROWBYTES(png_ptr->pixel_depth, 15625bec5421Sopenharmony_ci+ png_ptr->iwidth) + 1) * row_num; 15635bec5421Sopenharmony_ci+#else 15645bec5421Sopenharmony_ci png_ptr->zstream.avail_out = (uInt)(PNG_ROWBYTES(png_ptr->pixel_depth, 15655bec5421Sopenharmony_ci png_ptr->iwidth) + 1); 15665bec5421Sopenharmony_ci- 15675bec5421Sopenharmony_ci+#endif 15685bec5421Sopenharmony_ci png_ptr->zstream.next_out = png_ptr->row_buf; 15695bec5421Sopenharmony_ci } 15705bec5421Sopenharmony_ci 15715bec5421Sopenharmony_ci@@ -719,7 +840,12 @@ png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer, 15725bec5421Sopenharmony_ci 15735bec5421Sopenharmony_ci /* Do we have a complete row? */ 15745bec5421Sopenharmony_ci if (png_ptr->zstream.avail_out == 0) 15755bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 15765bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 15775bec5421Sopenharmony_ci+ png_push_process_multi_rows(png_ptr, row_num); 15785bec5421Sopenharmony_ci+#else 15795bec5421Sopenharmony_ci png_push_process_row(png_ptr); 15805bec5421Sopenharmony_ci+#endif 15815bec5421Sopenharmony_ci } 15825bec5421Sopenharmony_ci 15835bec5421Sopenharmony_ci /* And check for the end of the stream. */ 15845bec5421Sopenharmony_ci@@ -738,6 +864,7 @@ png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer, 15855bec5421Sopenharmony_ci void /* PRIVATE */ 15865bec5421Sopenharmony_ci png_push_process_row(png_structrp png_ptr) 15875bec5421Sopenharmony_ci { 15885bec5421Sopenharmony_ci+ png_debug(1, "in png_push_process_row"); 15895bec5421Sopenharmony_ci /* 1.5.6: row_info moved out of png_struct to a local here. */ 15905bec5421Sopenharmony_ci png_row_info row_info; 15915bec5421Sopenharmony_ci 15925bec5421Sopenharmony_ci@@ -762,8 +889,17 @@ png_push_process_row(png_structrp png_ptr) 15935bec5421Sopenharmony_ci * it may not be in the future, so this was changed just to copy the 15945bec5421Sopenharmony_ci * interlaced row count: 15955bec5421Sopenharmony_ci */ 15965bec5421Sopenharmony_ci- memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1); 15975bec5421Sopenharmony_ci- 15985bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 15995bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 16005bec5421Sopenharmony_ci+ if (png_ptr->transformations == 0 && png_ptr->interlaced == 0) 16015bec5421Sopenharmony_ci+ { 16025bec5421Sopenharmony_ci+ png_ptr->prev_row = png_ptr->row_buf; 16035bec5421Sopenharmony_ci+ } 16045bec5421Sopenharmony_ci+ else 16055bec5421Sopenharmony_ci+#endif 16065bec5421Sopenharmony_ci+ { 16075bec5421Sopenharmony_ci+ memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1); 16085bec5421Sopenharmony_ci+ } 16095bec5421Sopenharmony_ci #ifdef PNG_READ_TRANSFORMS_SUPPORTED 16105bec5421Sopenharmony_ci if (png_ptr->transformations != 0) 16115bec5421Sopenharmony_ci png_do_read_transformations(png_ptr, &row_info); 16125bec5421Sopenharmony_cidiff --git a/pngpriv.h b/pngpriv.h 16135bec5421Sopenharmony_ciindex fb521cf00..81300fbd8 100644 16145bec5421Sopenharmony_ci--- a/pngpriv.h 16155bec5421Sopenharmony_ci+++ b/pngpriv.h 16165bec5421Sopenharmony_ci@@ -189,6 +189,19 @@ 16175bec5421Sopenharmony_ci # define PNG_ARM_NEON_IMPLEMENTATION 0 16185bec5421Sopenharmony_ci #endif /* PNG_ARM_NEON_OPT > 0 */ 16195bec5421Sopenharmony_ci 16205bec5421Sopenharmony_ci+#if defined(PNG_ARM_NEON_IMPLEMENTATION) && defined(PNG_ARM_NEON) 16215bec5421Sopenharmony_ci+// OH ISSUE: png optimize 16225bec5421Sopenharmony_ci+# if PNG_ARM_NEON_IMPLEMENTATION == 1 16235bec5421Sopenharmony_ci+# define PNG_MULTY_LINE_ENABLE 16245bec5421Sopenharmony_ci+# define PNG_INFLATE_MAX_SIZE (65536) 16255bec5421Sopenharmony_ci+# define PNG_INFLATE_ROWS (50) 16265bec5421Sopenharmony_ci+# define PNG_CHECK (PNG_EXPAND | PNG_STRIP_ALPHA | PNG_RGB_TO_GRAY | PNG_ENCODE_ALPHA | \ 16275bec5421Sopenharmony_ci+ PNG_PACKSWAP | PNG_GRAY_TO_RGB | PNG_COMPOSE | PNG_SCALE_16_TO_8 | PNG_16_TO_8 | \ 16285bec5421Sopenharmony_ci+ PNG_BACKGROUND_EXPAND | PNG_EXPAND_16 | PNG_PACK | PNG_ADD_ALPHA | PNG_EXPAND_tRNS | \ 16295bec5421Sopenharmony_ci+ PNG_RGB_TO_GRAY_ERR | PNG_RGB_TO_GRAY_WARN | PNG_FILLER | PNG_USER_TRANSFORM) 16305bec5421Sopenharmony_ci+# endif 16315bec5421Sopenharmony_ci+#endif 16325bec5421Sopenharmony_ci+ 16335bec5421Sopenharmony_ci #ifndef PNG_MIPS_MSA_OPT 16345bec5421Sopenharmony_ci # if defined(__mips_msa) && (__mips_isa_rev >= 5) && defined(PNG_ALIGNED_MEMORY_SUPPORTED) 16355bec5421Sopenharmony_ci # define PNG_MIPS_MSA_OPT 2 16365bec5421Sopenharmony_ci@@ -351,8 +364,14 @@ 16375bec5421Sopenharmony_ci #endif 16385bec5421Sopenharmony_ci 16395bec5421Sopenharmony_ci #ifndef PNG_INTERNAL_FUNCTION 16405bec5421Sopenharmony_ci+// OH ISSUE: png optimize 16415bec5421Sopenharmony_ci+# ifdef PNG_MULTY_LINE_ENABLE 16425bec5421Sopenharmony_ci+# define PNG_HIDE __attribute__((visibility("hidden"))) 16435bec5421Sopenharmony_ci+# else 16445bec5421Sopenharmony_ci+# define PNG_HIDE 16455bec5421Sopenharmony_ci+# endif 16465bec5421Sopenharmony_ci # define PNG_INTERNAL_FUNCTION(type, name, args, attributes)\ 16475bec5421Sopenharmony_ci- PNG_LINKAGE_FUNCTION PNG_FUNCTION(type, name, args, PNG_EMPTY attributes) 16485bec5421Sopenharmony_ci+ PNG_LINKAGE_FUNCTION PNG_FUNCTION(type, name, args, PNG_HIDE attributes) 16495bec5421Sopenharmony_ci #endif 16505bec5421Sopenharmony_ci 16515bec5421Sopenharmony_ci #ifndef PNG_INTERNAL_CALLBACK 16525bec5421Sopenharmony_ci@@ -1304,6 +1323,19 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_neon,(png_row_infop 16535bec5421Sopenharmony_ci row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); 16545bec5421Sopenharmony_ci PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_neon,(png_row_infop 16555bec5421Sopenharmony_ci row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); 16565bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 16575bec5421Sopenharmony_ci+// OH ISSUE: png optimize 16585bec5421Sopenharmony_ci+PNG_INTERNAL_FUNCTION(void, png_read_filter_row_up_x2_neon, (png_row_infop 16595bec5421Sopenharmony_ci+ row_info, png_bytep row, png_const_bytep prev_row), PNG_EMPTY); 16605bec5421Sopenharmony_ci+PNG_INTERNAL_FUNCTION(void, png_read_filter_row_avg3_x2_neon, (png_row_infop 16615bec5421Sopenharmony_ci+ row_info, png_bytep row, png_const_bytep prev_row), PNG_EMPTY); 16625bec5421Sopenharmony_ci+PNG_INTERNAL_FUNCTION(void, png_read_filter_row_avg4_x2_neon, (png_row_infop 16635bec5421Sopenharmony_ci+ row_info, png_bytep row, png_const_bytep prev_row), PNG_EMPTY); 16645bec5421Sopenharmony_ci+PNG_INTERNAL_FUNCTION(void, png_read_filter_row_paeth3_x2_neon, (png_row_infop 16655bec5421Sopenharmony_ci+ row_info, png_bytep row, png_const_bytep prev_row), PNG_EMPTY); 16665bec5421Sopenharmony_ci+PNG_INTERNAL_FUNCTION(void, png_read_filter_row_paeth4_x2_neon, (png_row_infop 16675bec5421Sopenharmony_ci+ row_info, png_bytep row, png_const_bytep prev_row), PNG_EMPTY); 16685bec5421Sopenharmony_ci+#endif 16695bec5421Sopenharmony_ci #endif 16705bec5421Sopenharmony_ci 16715bec5421Sopenharmony_ci #if PNG_MIPS_MSA_OPT > 0 16725bec5421Sopenharmony_cidiff --git a/pngread.c b/pngread.c 16735bec5421Sopenharmony_ciindex 8fa7d9f16..ed5a25307 100644 16745bec5421Sopenharmony_ci--- a/pngread.c 16755bec5421Sopenharmony_ci+++ b/pngread.c 16765bec5421Sopenharmony_ci@@ -54,7 +54,12 @@ png_create_read_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr, 16775bec5421Sopenharmony_ci * required (it will be zero in a write structure.) 16785bec5421Sopenharmony_ci */ 16795bec5421Sopenharmony_ci # ifdef PNG_SEQUENTIAL_READ_SUPPORTED 16805bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 16815bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 16825bec5421Sopenharmony_ci+ png_ptr->IDAT_read_size = PNG_INFLATE_MAX_SIZE; 16835bec5421Sopenharmony_ci+#else 16845bec5421Sopenharmony_ci png_ptr->IDAT_read_size = PNG_IDAT_READ_SIZE; 16855bec5421Sopenharmony_ci+#endif 16865bec5421Sopenharmony_ci # endif 16875bec5421Sopenharmony_ci 16885bec5421Sopenharmony_ci # ifdef PNG_BENIGN_READ_ERRORS_SUPPORTED 16895bec5421Sopenharmony_ci@@ -684,6 +689,224 @@ png_read_rows(png_structrp png_ptr, png_bytepp row, 16905bec5421Sopenharmony_ci #endif /* SEQUENTIAL_READ */ 16915bec5421Sopenharmony_ci 16925bec5421Sopenharmony_ci #ifdef PNG_SEQUENTIAL_READ_SUPPORTED 16935bec5421Sopenharmony_ci+ 16945bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 16955bec5421Sopenharmony_ci+// OH ISSUE: png optimize 16965bec5421Sopenharmony_ci+static void png_read_two_rows(png_structrp png_ptr, png_bytepp rows, png_uint_32 i, 16975bec5421Sopenharmony_ci+ png_row_info row_info) 16985bec5421Sopenharmony_ci+{ 16995bec5421Sopenharmony_ci+ png_debug1(1, "in png_read_two_rows %d", png_ptr->row_buf[0]); 17005bec5421Sopenharmony_ci+ png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1, 17015bec5421Sopenharmony_ci+ png_ptr->prev_row + 1, png_ptr->row_buf[0] + 4); 17025bec5421Sopenharmony_ci+ 17035bec5421Sopenharmony_ci+#ifdef PNG_MNG_FEATURES_SUPPORTED 17045bec5421Sopenharmony_ci+ if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 && 17055bec5421Sopenharmony_ci+ (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING)) 17065bec5421Sopenharmony_ci+ { 17075bec5421Sopenharmony_ci+ /* Intrapixel differencing */ 17085bec5421Sopenharmony_ci+ png_do_read_intrapixel(&row_info, png_ptr->row_buf + 1); 17095bec5421Sopenharmony_ci+ } 17105bec5421Sopenharmony_ci+#endif 17115bec5421Sopenharmony_ci+ 17125bec5421Sopenharmony_ci+#ifdef PNG_READ_TRANSFORMS_SUPPORTED 17135bec5421Sopenharmony_ci+ if (png_ptr->transformations 17145bec5421Sopenharmony_ci+# ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED 17155bec5421Sopenharmony_ci+ || png_ptr->num_palette_max >= 0 17165bec5421Sopenharmony_ci+# endif 17175bec5421Sopenharmony_ci+ ) 17185bec5421Sopenharmony_ci+ png_do_read_transformations(png_ptr, &row_info); 17195bec5421Sopenharmony_ci+#endif 17205bec5421Sopenharmony_ci+ 17215bec5421Sopenharmony_ci+ /* The transformed pixel depth should match the depth now in row_info. */ 17225bec5421Sopenharmony_ci+ if (png_ptr->transformed_pixel_depth == 0) 17235bec5421Sopenharmony_ci+ { 17245bec5421Sopenharmony_ci+ png_ptr->transformed_pixel_depth = row_info.pixel_depth; 17255bec5421Sopenharmony_ci+ if (row_info.pixel_depth > png_ptr->maximum_pixel_depth) 17265bec5421Sopenharmony_ci+ png_error(png_ptr, "sequential row overflow"); 17275bec5421Sopenharmony_ci+ } 17285bec5421Sopenharmony_ci+ 17295bec5421Sopenharmony_ci+ else if (png_ptr->transformed_pixel_depth != row_info.pixel_depth) 17305bec5421Sopenharmony_ci+ png_error(png_ptr, "internal sequential row size calculation error"); 17315bec5421Sopenharmony_ci+ 17325bec5421Sopenharmony_ci+ if (rows[i] != NULL) 17335bec5421Sopenharmony_ci+ png_combine_row(png_ptr, rows[i], -1); 17345bec5421Sopenharmony_ci+ 17355bec5421Sopenharmony_ci+ png_read_finish_row(png_ptr); 17365bec5421Sopenharmony_ci+ 17375bec5421Sopenharmony_ci+ if (png_ptr->read_row_fn != NULL) 17385bec5421Sopenharmony_ci+ (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass); 17395bec5421Sopenharmony_ci+ 17405bec5421Sopenharmony_ci+ png_ptr->row_buf = png_ptr->row_buf + row_info.rowbytes + 1; 17415bec5421Sopenharmony_ci+ 17425bec5421Sopenharmony_ci+ // do again next line 17435bec5421Sopenharmony_ci+ memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1); 17445bec5421Sopenharmony_ci+ 17455bec5421Sopenharmony_ci+#ifdef PNG_MNG_FEATURES_SUPPORTED 17465bec5421Sopenharmony_ci+ if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 && 17475bec5421Sopenharmony_ci+ (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING)) 17485bec5421Sopenharmony_ci+ { 17495bec5421Sopenharmony_ci+ /* Intrapixel differencing */ 17505bec5421Sopenharmony_ci+ png_do_read_intrapixel(&row_info, png_ptr->row_buf + 1); 17515bec5421Sopenharmony_ci+ } 17525bec5421Sopenharmony_ci+#endif 17535bec5421Sopenharmony_ci+ 17545bec5421Sopenharmony_ci+#ifdef PNG_READ_TRANSFORMS_SUPPORTED 17555bec5421Sopenharmony_ci+ if (png_ptr->transformations 17565bec5421Sopenharmony_ci+# ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED 17575bec5421Sopenharmony_ci+ || png_ptr->num_palette_max >= 0 17585bec5421Sopenharmony_ci+# endif 17595bec5421Sopenharmony_ci+ ) 17605bec5421Sopenharmony_ci+ png_do_read_transformations(png_ptr, &row_info); 17615bec5421Sopenharmony_ci+#endif 17625bec5421Sopenharmony_ci+ 17635bec5421Sopenharmony_ci+ /* The transformed pixel depth should match the depth now in row_info. */ 17645bec5421Sopenharmony_ci+ if (png_ptr->transformed_pixel_depth == 0) 17655bec5421Sopenharmony_ci+ { 17665bec5421Sopenharmony_ci+ png_ptr->transformed_pixel_depth = row_info.pixel_depth; 17675bec5421Sopenharmony_ci+ if (row_info.pixel_depth > png_ptr->maximum_pixel_depth) 17685bec5421Sopenharmony_ci+ png_error(png_ptr, "sequential row overflow"); 17695bec5421Sopenharmony_ci+ } 17705bec5421Sopenharmony_ci+ 17715bec5421Sopenharmony_ci+ else if (png_ptr->transformed_pixel_depth != row_info.pixel_depth) 17725bec5421Sopenharmony_ci+ png_error(png_ptr, "internal sequential row size calculation error"); 17735bec5421Sopenharmony_ci+ 17745bec5421Sopenharmony_ci+ if (rows[i+1] != NULL) 17755bec5421Sopenharmony_ci+ png_combine_row(png_ptr, rows[i+1], -1); 17765bec5421Sopenharmony_ci+ 17775bec5421Sopenharmony_ci+ png_read_finish_row(png_ptr); 17785bec5421Sopenharmony_ci+ 17795bec5421Sopenharmony_ci+ if (png_ptr->read_row_fn != NULL) 17805bec5421Sopenharmony_ci+ (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass); 17815bec5421Sopenharmony_ci+ 17825bec5421Sopenharmony_ci+ png_ptr->row_buf = png_ptr->row_buf + row_info.rowbytes + 1; 17835bec5421Sopenharmony_ci+} 17845bec5421Sopenharmony_ci+ 17855bec5421Sopenharmony_ci+static void png_read_muilty_rows(png_structrp png_ptr, png_bytepp rows, 17865bec5421Sopenharmony_ci+ png_uint_32 row_num, png_row_info row_info_in) 17875bec5421Sopenharmony_ci+{ 17885bec5421Sopenharmony_ci+ if (png_ptr == NULL) 17895bec5421Sopenharmony_ci+ return; 17905bec5421Sopenharmony_ci+ 17915bec5421Sopenharmony_ci+ png_debug2(1, "in png_read_muilty_rows (row %lu, pass %d)", 17925bec5421Sopenharmony_ci+ (unsigned long)png_ptr->row_number, png_ptr->pass); 17935bec5421Sopenharmony_ci+ 17945bec5421Sopenharmony_ci+ if ((png_ptr->mode & PNG_HAVE_IDAT) == 0) 17955bec5421Sopenharmony_ci+ png_error(png_ptr, "Invalid attempt to read row data"); 17965bec5421Sopenharmony_ci+ 17975bec5421Sopenharmony_ci+ /* Fill the row with IDAT data: */ 17985bec5421Sopenharmony_ci+ uInt row_bytes = row_info_in.rowbytes; 17995bec5421Sopenharmony_ci+ png_ptr->row_buf[0]=255; /* 255 to force error if no data was found */ 18005bec5421Sopenharmony_ci+ png_read_IDAT_data(png_ptr, png_ptr->row_buf, (row_bytes + 1) * row_num); 18015bec5421Sopenharmony_ci+ png_bytep temp_row = png_ptr->row_buf; 18025bec5421Sopenharmony_ci+ 18035bec5421Sopenharmony_ci+ for (png_uint_32 i = 0; i < row_num; i++) { 18045bec5421Sopenharmony_ci+ png_row_info row_info = row_info_in; 18055bec5421Sopenharmony_ci+ // check if the x2_filter is effective: only supports channels 3 or 4 18065bec5421Sopenharmony_ci+ if ((row_info_in.channels == 3 || row_info_in.channels == 4) && 18075bec5421Sopenharmony_ci+ i < row_num -1 && png_ptr->row_buf[0] > PNG_FILTER_VALUE_SUB && 18085bec5421Sopenharmony_ci+ png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST && 18095bec5421Sopenharmony_ci+ png_ptr->row_buf[0] == png_ptr->row_buf[row_info_in.rowbytes + 1]) 18105bec5421Sopenharmony_ci+ { 18115bec5421Sopenharmony_ci+ png_read_two_rows(png_ptr, rows, i, row_info); 18125bec5421Sopenharmony_ci+ i++; 18135bec5421Sopenharmony_ci+ continue; 18145bec5421Sopenharmony_ci+ } 18155bec5421Sopenharmony_ci+ if (png_ptr->row_buf[0] > PNG_FILTER_VALUE_NONE) 18165bec5421Sopenharmony_ci+ { 18175bec5421Sopenharmony_ci+ if (png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST) 18185bec5421Sopenharmony_ci+ png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1, 18195bec5421Sopenharmony_ci+ png_ptr->prev_row + 1, png_ptr->row_buf[0]); 18205bec5421Sopenharmony_ci+ else 18215bec5421Sopenharmony_ci+ png_debug1(1, "bad adaptive filter value %d", png_ptr->row_buf[0]); 18225bec5421Sopenharmony_ci+ } 18235bec5421Sopenharmony_ci+ 18245bec5421Sopenharmony_ci+ memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info_in.rowbytes + 1); 18255bec5421Sopenharmony_ci+ 18265bec5421Sopenharmony_ci+#ifdef PNG_MNG_FEATURES_SUPPORTED 18275bec5421Sopenharmony_ci+ if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 && 18285bec5421Sopenharmony_ci+ (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING)) 18295bec5421Sopenharmony_ci+ { 18305bec5421Sopenharmony_ci+ /* Intrapixel differencing */ 18315bec5421Sopenharmony_ci+ png_do_read_intrapixel(&row_info, png_ptr->row_buf + 1); 18325bec5421Sopenharmony_ci+ } 18335bec5421Sopenharmony_ci+#endif 18345bec5421Sopenharmony_ci+ 18355bec5421Sopenharmony_ci+#ifdef PNG_READ_TRANSFORMS_SUPPORTED 18365bec5421Sopenharmony_ci+ if (png_ptr->transformations 18375bec5421Sopenharmony_ci+# ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED 18385bec5421Sopenharmony_ci+ || png_ptr->num_palette_max >= 0 18395bec5421Sopenharmony_ci+# endif 18405bec5421Sopenharmony_ci+ ) 18415bec5421Sopenharmony_ci+ png_do_read_transformations(png_ptr, &row_info); 18425bec5421Sopenharmony_ci+#endif 18435bec5421Sopenharmony_ci+ 18445bec5421Sopenharmony_ci+ /* The transformed pixel depth should match the depth now in row_info. */ 18455bec5421Sopenharmony_ci+ if (png_ptr->transformed_pixel_depth == 0) 18465bec5421Sopenharmony_ci+ { 18475bec5421Sopenharmony_ci+ png_ptr->transformed_pixel_depth = row_info.pixel_depth; 18485bec5421Sopenharmony_ci+ if (row_info.pixel_depth > png_ptr->maximum_pixel_depth) 18495bec5421Sopenharmony_ci+ png_error(png_ptr, "sequential row overflow"); 18505bec5421Sopenharmony_ci+ } 18515bec5421Sopenharmony_ci+ 18525bec5421Sopenharmony_ci+ else if (png_ptr->transformed_pixel_depth != row_info.pixel_depth) 18535bec5421Sopenharmony_ci+ png_error(png_ptr, "internal sequential row size calculation error"); 18545bec5421Sopenharmony_ci+ 18555bec5421Sopenharmony_ci+ if (rows[i] != NULL) 18565bec5421Sopenharmony_ci+ png_combine_row(png_ptr, rows[i], -1); 18575bec5421Sopenharmony_ci+ 18585bec5421Sopenharmony_ci+ png_read_finish_row(png_ptr); 18595bec5421Sopenharmony_ci+ 18605bec5421Sopenharmony_ci+ if (png_ptr->read_row_fn != NULL) 18615bec5421Sopenharmony_ci+ (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass); 18625bec5421Sopenharmony_ci+ 18635bec5421Sopenharmony_ci+ png_ptr->row_buf = png_ptr->row_buf + row_bytes + 1; 18645bec5421Sopenharmony_ci+ } 18655bec5421Sopenharmony_ci+ png_ptr->row_buf = temp_row; 18665bec5421Sopenharmony_ci+} 18675bec5421Sopenharmony_ci+ 18685bec5421Sopenharmony_ci+static void png_warn_check(png_structrp png_ptr) 18695bec5421Sopenharmony_ci+{ 18705bec5421Sopenharmony_ci+#ifdef PNG_WARNINGS_SUPPORTED 18715bec5421Sopenharmony_ci+ /* Check for transforms that have been set but were defined out */ 18725bec5421Sopenharmony_ci+#if defined(PNG_WRITE_INVERT_SUPPORTED) && !defined(PNG_READ_INVERT_SUPPORTED) 18735bec5421Sopenharmony_ci+ if ((png_ptr->transformations & PNG_INVERT_MONO) != 0) 18745bec5421Sopenharmony_ci+ png_warning(png_ptr, "PNG_READ_INVERT_SUPPORTED is not defined"); 18755bec5421Sopenharmony_ci+#endif 18765bec5421Sopenharmony_ci+ 18775bec5421Sopenharmony_ci+#if defined(PNG_WRITE_FILLER_SUPPORTED) && !defined(PNG_READ_FILLER_SUPPORTED) 18785bec5421Sopenharmony_ci+ if ((png_ptr->transformations & PNG_FILLER) != 0) 18795bec5421Sopenharmony_ci+ png_warning(png_ptr, "PNG_READ_FILLER_SUPPORTED is not defined"); 18805bec5421Sopenharmony_ci+#endif 18815bec5421Sopenharmony_ci+ 18825bec5421Sopenharmony_ci+#if defined(PNG_WRITE_PACKSWAP_SUPPORTED) && \ 18835bec5421Sopenharmony_ci+ !defined(PNG_READ_PACKSWAP_SUPPORTED) 18845bec5421Sopenharmony_ci+ if ((png_ptr->transformations & PNG_PACKSWAP) != 0) 18855bec5421Sopenharmony_ci+ png_warning(png_ptr, "PNG_READ_PACKSWAP_SUPPORTED is not defined"); 18865bec5421Sopenharmony_ci+#endif 18875bec5421Sopenharmony_ci+ 18885bec5421Sopenharmony_ci+#if defined(PNG_WRITE_PACK_SUPPORTED) && !defined(PNG_READ_PACK_SUPPORTED) 18895bec5421Sopenharmony_ci+ if ((png_ptr->transformations & PNG_PACK) != 0) 18905bec5421Sopenharmony_ci+ png_warning(png_ptr, "PNG_READ_PACK_SUPPORTED is not defined"); 18915bec5421Sopenharmony_ci+#endif 18925bec5421Sopenharmony_ci+ 18935bec5421Sopenharmony_ci+#if defined(PNG_WRITE_SHIFT_SUPPORTED) && !defined(PNG_READ_SHIFT_SUPPORTED) 18945bec5421Sopenharmony_ci+ if ((png_ptr->transformations & PNG_SHIFT) != 0) 18955bec5421Sopenharmony_ci+ png_warning(png_ptr, "PNG_READ_SHIFT_SUPPORTED is not defined"); 18965bec5421Sopenharmony_ci+#endif 18975bec5421Sopenharmony_ci+ 18985bec5421Sopenharmony_ci+#if defined(PNG_WRITE_BGR_SUPPORTED) && !defined(PNG_READ_BGR_SUPPORTED) 18995bec5421Sopenharmony_ci+ if ((png_ptr->transformations & PNG_BGR) != 0) 19005bec5421Sopenharmony_ci+ png_warning(png_ptr, "PNG_READ_BGR_SUPPORTED is not defined"); 19015bec5421Sopenharmony_ci+#endif 19025bec5421Sopenharmony_ci+ 19035bec5421Sopenharmony_ci+#if defined(PNG_WRITE_SWAP_SUPPORTED) && !defined(PNG_READ_SWAP_SUPPORTED) 19045bec5421Sopenharmony_ci+ if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0) 19055bec5421Sopenharmony_ci+ png_warning(png_ptr, "PNG_READ_SWAP_SUPPORTED is not defined"); 19065bec5421Sopenharmony_ci+#endif 19075bec5421Sopenharmony_ci+#endif /* WARNINGS */ 19085bec5421Sopenharmony_ci+} 19095bec5421Sopenharmony_ci+#endif // PNG_MULTY_LINE_ENABLE 19105bec5421Sopenharmony_ci+ 19115bec5421Sopenharmony_ci /* Read the entire image. If the image has an alpha channel or a tRNS 19125bec5421Sopenharmony_ci * chunk, and you have called png_handle_alpha()[*], you will need to 19135bec5421Sopenharmony_ci * initialize the image to the current image that PNG will be overlaying. 19145bec5421Sopenharmony_ci@@ -745,13 +968,45 @@ png_read_image(png_structrp png_ptr, png_bytepp image) 19155bec5421Sopenharmony_ci 19165bec5421Sopenharmony_ci image_height=png_ptr->height; 19175bec5421Sopenharmony_ci 19185bec5421Sopenharmony_ci- for (j = 0; j < pass; j++) 19195bec5421Sopenharmony_ci- { 19205bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 19215bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 19225bec5421Sopenharmony_ci+ if (png_ptr->interlaced == 0 && png_ptr->bit_depth == 8 && 19235bec5421Sopenharmony_ci+ (png_ptr->transformations & PNG_CHECK) == 0) { 19245bec5421Sopenharmony_ci+ if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0) 19255bec5421Sopenharmony_ci+ png_read_start_row(png_ptr); 19265bec5421Sopenharmony_ci+ 19275bec5421Sopenharmony_ci+ png_warn_check(png_ptr); 19285bec5421Sopenharmony_ci+ png_row_info row_info; 19295bec5421Sopenharmony_ci+ row_info.width = png_ptr->iwidth; 19305bec5421Sopenharmony_ci+ row_info.color_type = png_ptr->color_type; 19315bec5421Sopenharmony_ci+ row_info.bit_depth = png_ptr->bit_depth; 19325bec5421Sopenharmony_ci+ row_info.channels = png_ptr->channels; 19335bec5421Sopenharmony_ci+ row_info.pixel_depth = png_ptr->pixel_depth; 19345bec5421Sopenharmony_ci+ row_info.rowbytes = png_ptr->rowbytes; 19355bec5421Sopenharmony_ci+ 19365bec5421Sopenharmony_ci rp = image; 19375bec5421Sopenharmony_ci- for (i = 0; i < image_height; i++) 19385bec5421Sopenharmony_ci+ int row_num = PNG_INFLATE_ROWS; 19395bec5421Sopenharmony_ci+ for (i = 0; i < image_height; i += PNG_INFLATE_ROWS) 19405bec5421Sopenharmony_ci { 19415bec5421Sopenharmony_ci- png_read_row(png_ptr, *rp, NULL); 19425bec5421Sopenharmony_ci- rp++; 19435bec5421Sopenharmony_ci+ if (image_height - i < PNG_INFLATE_ROWS) 19445bec5421Sopenharmony_ci+ { 19455bec5421Sopenharmony_ci+ row_num = image_height - i; 19465bec5421Sopenharmony_ci+ } 19475bec5421Sopenharmony_ci+ png_read_muilty_rows(png_ptr, rp, row_num, row_info); 19485bec5421Sopenharmony_ci+ rp += row_num; 19495bec5421Sopenharmony_ci+ } 19505bec5421Sopenharmony_ci+ } 19515bec5421Sopenharmony_ci+ else 19525bec5421Sopenharmony_ci+#endif 19535bec5421Sopenharmony_ci+ { 19545bec5421Sopenharmony_ci+ for (j = 0; j < pass; j++) 19555bec5421Sopenharmony_ci+ { 19565bec5421Sopenharmony_ci+ rp = image; 19575bec5421Sopenharmony_ci+ for (i = 0; i < image_height; i++) 19585bec5421Sopenharmony_ci+ { 19595bec5421Sopenharmony_ci+ png_read_row(png_ptr, *rp, NULL); 19605bec5421Sopenharmony_ci+ rp++; 19615bec5421Sopenharmony_ci+ } 19625bec5421Sopenharmony_ci } 19635bec5421Sopenharmony_ci } 19645bec5421Sopenharmony_ci } 19655bec5421Sopenharmony_cidiff --git a/pngrutil.c b/pngrutil.c 19665bec5421Sopenharmony_ciindex 9ac8ec11f..f9c65927d 100644 19675bec5421Sopenharmony_ci--- a/pngrutil.c 19685bec5421Sopenharmony_ci+++ b/pngrutil.c 19695bec5421Sopenharmony_ci@@ -4134,7 +4134,12 @@ png_read_filter_row(png_structrp pp, png_row_infop row_info, png_bytep row, 19705bec5421Sopenharmony_ci * PNG_FILTER_OPTIMIZATIONS to a function that overrides the generic 19715bec5421Sopenharmony_ci * implementations. See png_init_filter_functions above. 19725bec5421Sopenharmony_ci */ 19735bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 19745bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 19755bec5421Sopenharmony_ci+ if (filter > PNG_FILTER_VALUE_NONE && filter < PNG_FILTER_VALUE_LAST_X2) 19765bec5421Sopenharmony_ci+#else 19775bec5421Sopenharmony_ci if (filter > PNG_FILTER_VALUE_NONE && filter < PNG_FILTER_VALUE_LAST) 19785bec5421Sopenharmony_ci+#endif 19795bec5421Sopenharmony_ci { 19805bec5421Sopenharmony_ci if (pp->read_filter[0] == NULL) 19815bec5421Sopenharmony_ci png_init_filter_functions(pp); 19825bec5421Sopenharmony_ci@@ -4606,7 +4611,24 @@ defined(PNG_USER_TRANSFORM_PTR_SUPPORTED) 19835bec5421Sopenharmony_ci row_bytes + 48); 19845bec5421Sopenharmony_ci 19855bec5421Sopenharmony_ci else 19865bec5421Sopenharmony_ci+ { 19875bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 19885bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 19895bec5421Sopenharmony_ci+ png_uint_32 row_num = 1; 19905bec5421Sopenharmony_ci+ if (png_ptr->bit_depth == 8 && 19915bec5421Sopenharmony_ci+ (png_ptr->transformations & PNG_CHECK) == 0) 19925bec5421Sopenharmony_ci+ { 19935bec5421Sopenharmony_ci+ row_num = png_ptr->height < PNG_INFLATE_ROWS ? 19945bec5421Sopenharmony_ci+ png_ptr->height : PNG_INFLATE_ROWS; 19955bec5421Sopenharmony_ci+ } 19965bec5421Sopenharmony_ci+ png_ptr->big_row_buf = (png_bytep)png_malloc( 19975bec5421Sopenharmony_ci+ png_ptr, row_bytes * row_num + 48); 19985bec5421Sopenharmony_ci+ if (png_ptr->big_row_buf == NULL) 19995bec5421Sopenharmony_ci+ png_error(png_ptr, "png_malloc failed"); 20005bec5421Sopenharmony_ci+#else 20015bec5421Sopenharmony_ci png_ptr->big_row_buf = (png_bytep)png_malloc(png_ptr, row_bytes + 48); 20025bec5421Sopenharmony_ci+#endif 20035bec5421Sopenharmony_ci+ } 20045bec5421Sopenharmony_ci 20055bec5421Sopenharmony_ci png_ptr->big_prev_row = (png_bytep)png_malloc(png_ptr, row_bytes + 48); 20065bec5421Sopenharmony_ci 20075bec5421Sopenharmony_cidiff --git a/pngstruct.h b/pngstruct.h 20085bec5421Sopenharmony_ciindex e591d94d5..7c3846475 100644 20095bec5421Sopenharmony_ci--- a/pngstruct.h 20105bec5421Sopenharmony_ci+++ b/pngstruct.h 20115bec5421Sopenharmony_ci@@ -140,6 +140,14 @@ typedef const png_colorspace * PNG_RESTRICT png_const_colorspacerp; 20125bec5421Sopenharmony_ci #define PNG_COLORSPACE_CANCEL(flags) (0xffff ^ (flags)) 20135bec5421Sopenharmony_ci #endif /* COLORSPACE || GAMMA */ 20145bec5421Sopenharmony_ci 20155bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 20165bec5421Sopenharmony_ci+// OH ISSUE: png optimize 20175bec5421Sopenharmony_ci+#define PNG_FILTER_VALUE_UP_X2 (6) // PNG_FILTER_VALUE_UP + 4 20185bec5421Sopenharmony_ci+#define PNG_FILTER_VALUE_AVG_X2 (7) // PNG_FILTER_VALUE_AVG + 4 20195bec5421Sopenharmony_ci+#define PNG_FILTER_VALUE_PAETH_X2 (8) // PNG_FILTER_VALUE_PAETH + 4 20205bec5421Sopenharmony_ci+#define PNG_FILTER_VALUE_LAST_X2 (9) // PNG_FILTER_VALUE_LAST + 4 20215bec5421Sopenharmony_ci+#endif 20225bec5421Sopenharmony_ci+ 20235bec5421Sopenharmony_ci struct png_struct_def 20245bec5421Sopenharmony_ci { 20255bec5421Sopenharmony_ci #ifdef PNG_SETJMP_SUPPORTED 20265bec5421Sopenharmony_ci@@ -467,8 +475,14 @@ struct png_struct_def 20275bec5421Sopenharmony_ci png_bytep big_prev_row; 20285bec5421Sopenharmony_ci 20295bec5421Sopenharmony_ci /* New member added in libpng-1.5.7 */ 20305bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 20315bec5421Sopenharmony_ci+ // OH ISSUE: png optimize 20325bec5421Sopenharmony_ci+ void (*read_filter[PNG_FILTER_VALUE_LAST_X2 - 1])(png_row_infop row_info, 20335bec5421Sopenharmony_ci+ png_bytep row, png_const_bytep prev_row); 20345bec5421Sopenharmony_ci+#else 20355bec5421Sopenharmony_ci void (*read_filter[PNG_FILTER_VALUE_LAST-1])(png_row_infop row_info, 20365bec5421Sopenharmony_ci png_bytep row, png_const_bytep prev_row); 20375bec5421Sopenharmony_ci+#endif 20385bec5421Sopenharmony_ci 20395bec5421Sopenharmony_ci #ifdef PNG_READ_SUPPORTED 20405bec5421Sopenharmony_ci #if defined(PNG_COLORSPACE_SUPPORTED) || defined(PNG_GAMMA_SUPPORTED) 20415bec5421Sopenharmony_cidiff --git a/pngtrans.c b/pngtrans.c 20425bec5421Sopenharmony_ciindex 1100f46eb..9addf3423 100644 20435bec5421Sopenharmony_ci--- a/pngtrans.c 20445bec5421Sopenharmony_ci+++ b/pngtrans.c 20455bec5421Sopenharmony_ci@@ -13,6 +13,19 @@ 20465bec5421Sopenharmony_ci 20475bec5421Sopenharmony_ci #include "pngpriv.h" 20485bec5421Sopenharmony_ci 20495bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 20505bec5421Sopenharmony_ci+# if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64) 20515bec5421Sopenharmony_ci+# include <arm64_neon.h> 20525bec5421Sopenharmony_ci+# else 20535bec5421Sopenharmony_ci+# include <arm_neon.h> 20545bec5421Sopenharmony_ci+# endif 20555bec5421Sopenharmony_ci+# define STEP_GRAY (16) 20565bec5421Sopenharmony_ci+# define STEP_GA (32) 20575bec5421Sopenharmony_ci+# define STEP_RGB (48) 20585bec5421Sopenharmony_ci+# define STEP_RGBA (64) 20595bec5421Sopenharmony_ci+# define INDEX2 (2) 20605bec5421Sopenharmony_ci+#endif 20615bec5421Sopenharmony_ci+ 20625bec5421Sopenharmony_ci #if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) 20635bec5421Sopenharmony_ci 20645bec5421Sopenharmony_ci #if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED) 20655bec5421Sopenharmony_ci@@ -269,13 +282,19 @@ png_do_invert(png_row_infop row_info, png_bytep row) 20665bec5421Sopenharmony_ci if (row_info->color_type == PNG_COLOR_TYPE_GRAY) 20675bec5421Sopenharmony_ci { 20685bec5421Sopenharmony_ci png_bytep rp = row; 20695bec5421Sopenharmony_ci- size_t i; 20705bec5421Sopenharmony_ci- size_t istop = row_info->rowbytes; 20715bec5421Sopenharmony_ci- 20725bec5421Sopenharmony_ci- for (i = 0; i < istop; i++) 20735bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 20745bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 20755bec5421Sopenharmony_ci+ png_bytep rp_stop_neon = rp_stop - STEP_GRAY; 20765bec5421Sopenharmony_ci+ for (; rp < rp_stop_neon; rp += STEP_GRAY) 20775bec5421Sopenharmony_ci+ { 20785bec5421Sopenharmony_ci+ uint8x16_t gray = vld1q_u8(rp); 20795bec5421Sopenharmony_ci+ gray = ~gray; 20805bec5421Sopenharmony_ci+ vst1q_u8(rp, gray); 20815bec5421Sopenharmony_ci+ } 20825bec5421Sopenharmony_ci+#endif 20835bec5421Sopenharmony_ci+ for (; rp < rp_stop; rp++) 20845bec5421Sopenharmony_ci { 20855bec5421Sopenharmony_ci *rp = (png_byte)(~(*rp)); 20865bec5421Sopenharmony_ci- rp++; 20875bec5421Sopenharmony_ci } 20885bec5421Sopenharmony_ci } 20895bec5421Sopenharmony_ci 20905bec5421Sopenharmony_ci@@ -283,13 +302,19 @@ png_do_invert(png_row_infop row_info, png_bytep row) 20915bec5421Sopenharmony_ci row_info->bit_depth == 8) 20925bec5421Sopenharmony_ci { 20935bec5421Sopenharmony_ci png_bytep rp = row; 20945bec5421Sopenharmony_ci- size_t i; 20955bec5421Sopenharmony_ci- size_t istop = row_info->rowbytes; 20965bec5421Sopenharmony_ci- 20975bec5421Sopenharmony_ci- for (i = 0; i < istop; i += 2) 20985bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 20995bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 21005bec5421Sopenharmony_ci+ png_bytep rp_stop_neon = rp_stop - STEP_GA; 21015bec5421Sopenharmony_ci+ for (; rp < rp_stop_neon; rp += STEP_GA) 21025bec5421Sopenharmony_ci+ { 21035bec5421Sopenharmony_ci+ uint8x16x2_t gray_alpha = vld2q_u8(rp); 21045bec5421Sopenharmony_ci+ gray_alpha.val[0] = ~gray_alpha.val[0]; 21055bec5421Sopenharmony_ci+ vst2q_u8(rp, gray_alpha); 21065bec5421Sopenharmony_ci+ } 21075bec5421Sopenharmony_ci+#endif 21085bec5421Sopenharmony_ci+ for (; rp < rp_stop; rp += 2) 21095bec5421Sopenharmony_ci { 21105bec5421Sopenharmony_ci *rp = (png_byte)(~(*rp)); 21115bec5421Sopenharmony_ci- rp += 2; 21125bec5421Sopenharmony_ci } 21135bec5421Sopenharmony_ci } 21145bec5421Sopenharmony_ci 21155bec5421Sopenharmony_ci@@ -298,14 +323,21 @@ png_do_invert(png_row_infop row_info, png_bytep row) 21165bec5421Sopenharmony_ci row_info->bit_depth == 16) 21175bec5421Sopenharmony_ci { 21185bec5421Sopenharmony_ci png_bytep rp = row; 21195bec5421Sopenharmony_ci- size_t i; 21205bec5421Sopenharmony_ci- size_t istop = row_info->rowbytes; 21215bec5421Sopenharmony_ci- 21225bec5421Sopenharmony_ci- for (i = 0; i < istop; i += 4) 21235bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 21245bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 21255bec5421Sopenharmony_ci+ png_bytep rp_stop_neon = rp_stop - STEP_RGBA; 21265bec5421Sopenharmony_ci+ for (; rp < rp_stop_neon; rp += STEP_RGBA) 21275bec5421Sopenharmony_ci+ { 21285bec5421Sopenharmony_ci+ uint8x16x4_t gray_alpha = vld4q_u8(rp); 21295bec5421Sopenharmony_ci+ gray_alpha.val[0] = ~gray_alpha.val[0]; 21305bec5421Sopenharmony_ci+ gray_alpha.val[1] = ~gray_alpha.val[1]; 21315bec5421Sopenharmony_ci+ vst4q_u8(rp, gray_alpha); 21325bec5421Sopenharmony_ci+ } 21335bec5421Sopenharmony_ci+#endif 21345bec5421Sopenharmony_ci+ for (; rp < rp_stop; rp += 4) 21355bec5421Sopenharmony_ci { 21365bec5421Sopenharmony_ci *rp = (png_byte)(~(*rp)); 21375bec5421Sopenharmony_ci *(rp + 1) = (png_byte)(~(*(rp + 1))); 21385bec5421Sopenharmony_ci- rp += 4; 21395bec5421Sopenharmony_ci } 21405bec5421Sopenharmony_ci } 21415bec5421Sopenharmony_ci #endif 21425bec5421Sopenharmony_ci@@ -323,10 +355,19 @@ png_do_swap(png_row_infop row_info, png_bytep row) 21435bec5421Sopenharmony_ci if (row_info->bit_depth == 16) 21445bec5421Sopenharmony_ci { 21455bec5421Sopenharmony_ci png_bytep rp = row; 21465bec5421Sopenharmony_ci- png_uint_32 i; 21475bec5421Sopenharmony_ci- png_uint_32 istop= row_info->width * row_info->channels; 21485bec5421Sopenharmony_ci- 21495bec5421Sopenharmony_ci- for (i = 0; i < istop; i++, rp += 2) 21505bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 21515bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 21525bec5421Sopenharmony_ci+ png_bytep rp_stop_neon = rp_stop - STEP_GA; 21535bec5421Sopenharmony_ci+ for (; rp < rp_stop_neon; rp += STEP_GA) 21545bec5421Sopenharmony_ci+ { 21555bec5421Sopenharmony_ci+ uint8x16x2_t gray = vld2q_u8(rp); 21565bec5421Sopenharmony_ci+ uint8x16_t tmp = gray.val[0]; 21575bec5421Sopenharmony_ci+ gray.val[0] = gray.val[1]; 21585bec5421Sopenharmony_ci+ gray.val[1] = tmp; 21595bec5421Sopenharmony_ci+ vst2q_u8(rp, gray); 21605bec5421Sopenharmony_ci+ } 21615bec5421Sopenharmony_ci+#endif 21625bec5421Sopenharmony_ci+ for (; rp < rp_stop; rp += 2) 21635bec5421Sopenharmony_ci { 21645bec5421Sopenharmony_ci #ifdef PNG_BUILTIN_BSWAP16_SUPPORTED 21655bec5421Sopenharmony_ci /* Feature added to libpng-1.6.11 for testing purposes, not 21665bec5421Sopenharmony_ci@@ -622,15 +663,24 @@ png_do_bgr(png_row_infop row_info, png_bytep row) 21675bec5421Sopenharmony_ci 21685bec5421Sopenharmony_ci if ((row_info->color_type & PNG_COLOR_MASK_COLOR) != 0) 21695bec5421Sopenharmony_ci { 21705bec5421Sopenharmony_ci- png_uint_32 row_width = row_info->width; 21715bec5421Sopenharmony_ci if (row_info->bit_depth == 8) 21725bec5421Sopenharmony_ci { 21735bec5421Sopenharmony_ci if (row_info->color_type == PNG_COLOR_TYPE_RGB) 21745bec5421Sopenharmony_ci { 21755bec5421Sopenharmony_ci- png_bytep rp; 21765bec5421Sopenharmony_ci- png_uint_32 i; 21775bec5421Sopenharmony_ci- 21785bec5421Sopenharmony_ci- for (i = 0, rp = row; i < row_width; i++, rp += 3) 21795bec5421Sopenharmony_ci+ png_bytep rp = row; 21805bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 21815bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 21825bec5421Sopenharmony_ci+ png_bytep rp_stop_neon = rp_stop - STEP_RGB; 21835bec5421Sopenharmony_ci+ for (; rp < rp_stop_neon; rp += STEP_RGB) 21845bec5421Sopenharmony_ci+ { 21855bec5421Sopenharmony_ci+ uint8x16x3_t bgr = vld3q_u8(rp); 21865bec5421Sopenharmony_ci+ uint8x16_t tmp = bgr.val[INDEX2]; 21875bec5421Sopenharmony_ci+ bgr.val[INDEX2] = bgr.val[0]; 21885bec5421Sopenharmony_ci+ bgr.val[0] = tmp; 21895bec5421Sopenharmony_ci+ vst3q_u8(rp, bgr); 21905bec5421Sopenharmony_ci+ } 21915bec5421Sopenharmony_ci+#endif 21925bec5421Sopenharmony_ci+ for (; rp < rp_stop; rp += 3) 21935bec5421Sopenharmony_ci { 21945bec5421Sopenharmony_ci png_byte save = *rp; 21955bec5421Sopenharmony_ci *rp = *(rp + 2); 21965bec5421Sopenharmony_ci@@ -640,10 +690,20 @@ png_do_bgr(png_row_infop row_info, png_bytep row) 21975bec5421Sopenharmony_ci 21985bec5421Sopenharmony_ci else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA) 21995bec5421Sopenharmony_ci { 22005bec5421Sopenharmony_ci- png_bytep rp; 22015bec5421Sopenharmony_ci- png_uint_32 i; 22025bec5421Sopenharmony_ci- 22035bec5421Sopenharmony_ci- for (i = 0, rp = row; i < row_width; i++, rp += 4) 22045bec5421Sopenharmony_ci+ png_bytep rp = row; 22055bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 22065bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 22075bec5421Sopenharmony_ci+ png_bytep rp_stop_neon = rp_stop - STEP_RGBA; 22085bec5421Sopenharmony_ci+ for (; rp < rp_stop_neon; rp += STEP_RGBA) 22095bec5421Sopenharmony_ci+ { 22105bec5421Sopenharmony_ci+ uint8x16x4_t bgra = vld4q_u8(rp); 22115bec5421Sopenharmony_ci+ uint8x16_t tmp = bgra.val[INDEX2]; 22125bec5421Sopenharmony_ci+ bgra.val[INDEX2] = bgra.val[0]; 22135bec5421Sopenharmony_ci+ bgra.val[0] = tmp; 22145bec5421Sopenharmony_ci+ vst4q_u8(rp, bgra); 22155bec5421Sopenharmony_ci+ } 22165bec5421Sopenharmony_ci+#endif 22175bec5421Sopenharmony_ci+ for (; rp < rp_stop; rp += 4) 22185bec5421Sopenharmony_ci { 22195bec5421Sopenharmony_ci png_byte save = *rp; 22205bec5421Sopenharmony_ci *rp = *(rp + 2); 22215bec5421Sopenharmony_ci@@ -657,10 +717,20 @@ png_do_bgr(png_row_infop row_info, png_bytep row) 22225bec5421Sopenharmony_ci { 22235bec5421Sopenharmony_ci if (row_info->color_type == PNG_COLOR_TYPE_RGB) 22245bec5421Sopenharmony_ci { 22255bec5421Sopenharmony_ci- png_bytep rp; 22265bec5421Sopenharmony_ci- png_uint_32 i; 22275bec5421Sopenharmony_ci- 22285bec5421Sopenharmony_ci- for (i = 0, rp = row; i < row_width; i++, rp += 6) 22295bec5421Sopenharmony_ci+ png_bytep rp = row; 22305bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 22315bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 22325bec5421Sopenharmony_ci+ png_bytep rp_stop_neon = rp_stop - STEP_RGB; 22335bec5421Sopenharmony_ci+ for (; rp < rp_stop_neon; rp += STEP_RGB) 22345bec5421Sopenharmony_ci+ { 22355bec5421Sopenharmony_ci+ uint16x8x3_t bgr = vld3q_u16((unsigned short *)rp); 22365bec5421Sopenharmony_ci+ uint16x8_t tmp = bgr.val[INDEX2]; 22375bec5421Sopenharmony_ci+ bgr.val[INDEX2] = bgr.val[0]; 22385bec5421Sopenharmony_ci+ bgr.val[0] = tmp; 22395bec5421Sopenharmony_ci+ vst3q_u16((unsigned short *)rp, bgr); 22405bec5421Sopenharmony_ci+ } 22415bec5421Sopenharmony_ci+#endif 22425bec5421Sopenharmony_ci+ for (; rp < rp_stop; rp += 6) 22435bec5421Sopenharmony_ci { 22445bec5421Sopenharmony_ci png_byte save = *rp; 22455bec5421Sopenharmony_ci *rp = *(rp + 4); 22465bec5421Sopenharmony_ci@@ -673,10 +743,20 @@ png_do_bgr(png_row_infop row_info, png_bytep row) 22475bec5421Sopenharmony_ci 22485bec5421Sopenharmony_ci else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA) 22495bec5421Sopenharmony_ci { 22505bec5421Sopenharmony_ci- png_bytep rp; 22515bec5421Sopenharmony_ci- png_uint_32 i; 22525bec5421Sopenharmony_ci- 22535bec5421Sopenharmony_ci- for (i = 0, rp = row; i < row_width; i++, rp += 8) 22545bec5421Sopenharmony_ci+ png_bytep rp = row; 22555bec5421Sopenharmony_ci+ png_bytep rp_stop = row + row_info->rowbytes; 22565bec5421Sopenharmony_ci+#ifdef PNG_MULTY_LINE_ENABLE 22575bec5421Sopenharmony_ci+ png_bytep rp_stop_neon = rp_stop - STEP_RGBA; 22585bec5421Sopenharmony_ci+ for (; rp < rp_stop_neon; rp += STEP_RGBA) 22595bec5421Sopenharmony_ci+ { 22605bec5421Sopenharmony_ci+ uint16x8x4_t bgra = vld4q_u16((unsigned short *)rp); 22615bec5421Sopenharmony_ci+ uint16x8_t tmp = bgra.val[INDEX2]; 22625bec5421Sopenharmony_ci+ bgra.val[INDEX2] = bgra.val[0]; 22635bec5421Sopenharmony_ci+ bgra.val[0] = tmp; 22645bec5421Sopenharmony_ci+ vst4q_u16((unsigned short *)rp, bgra); 22655bec5421Sopenharmony_ci+ } 22665bec5421Sopenharmony_ci+#endif 22675bec5421Sopenharmony_ci+ for (; rp < rp_stop; rp += 8) 22685bec5421Sopenharmony_ci { 22695bec5421Sopenharmony_ci png_byte save = *rp; 22705bec5421Sopenharmony_ci *rp = *(rp + 4); 2271