Lines Matching refs:stride

32 static void avc_chroma_hz_2x2_msa(uint8_t *src, uint8_t *dst, int32_t stride,
46 LD_SB2(src, stride, src0, src1);
59 dst += stride;
63 static void avc_chroma_hz_2x4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
76 LD_UB4(src, stride, src0, src1, src2, src3);
88 ST_H4(res, 0, 1, 2, 3, dst, stride);
91 static void avc_chroma_hz_2w_msa(uint8_t *src, uint8_t *dst, int32_t stride,
96 avc_chroma_hz_2x2_msa(src, dst, stride, coeff0, coeff1);
98 avc_chroma_hz_2x4_msa(src, dst, stride, coeff0, coeff1);
102 static void avc_chroma_hz_4x2_msa(uint8_t *src, uint8_t *dst, int32_t stride,
115 LD_SB2(src, stride, src0, src1);
124 ST_W2(res, 0, 1, dst, stride);
127 static void avc_chroma_hz_4x4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
139 LD_UB4(src, stride, src0, src1, src2, src3);
147 ST_W4(out, 0, 1, 2, 3, dst, stride);
150 static void avc_chroma_hz_4x8_msa(uint8_t *src, uint8_t *dst, int32_t stride,
162 LD_UB8(src, stride, src0, src1, src2, src3, src4, src5, src6, src7);
171 ST_W8(out0, out1, 0, 1, 2, 3, 0, 1, 2, 3, dst, stride);
174 static void avc_chroma_hz_4w_msa(uint8_t *src, uint8_t *dst, int32_t stride,
179 avc_chroma_hz_4x2_msa(src, dst, stride, coeff0, coeff1);
181 avc_chroma_hz_4x4_msa(src, dst, stride, coeff0, coeff1);
183 avc_chroma_hz_4x8_msa(src, dst, stride, coeff0, coeff1);
187 static void avc_chroma_hz_8x4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
198 LD_UB4(src, stride, src0, src1, src2, src3);
207 ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
210 static void avc_chroma_hz_8x8_msa(uint8_t *src, uint8_t *dst, int32_t stride,
223 LD_UB8(src, stride, src0, src1, src2, src3, src4, src5, src6, src7);
240 ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
244 int32_t stride, uint32_t coeff0,
258 LD_UB4(src, stride, src0, src1, src2, src3);
259 src += (4 * stride);
269 ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
270 dst += (4 * stride);
276 src += stride;
287 dst += stride;
292 static void avc_chroma_hz_8w_msa(uint8_t *src, uint8_t *dst, int32_t stride,
297 avc_chroma_hz_8x4_msa(src, dst, stride, coeff0, coeff1);
299 avc_chroma_hz_8x8_msa(src, dst, stride, coeff0, coeff1);
301 avc_chroma_hz_nonmult_msa(src, dst, stride, coeff0, coeff1, height);
305 static void avc_chroma_vt_2x2_msa(uint8_t *src, uint8_t *dst, int32_t stride,
317 LD_SB3(src, stride, src0, src1, src2);
333 dst += stride;
337 static void avc_chroma_vt_2x4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
348 LD_UB5(src, stride, src0, src1, src2, src3, src4);
362 ST_H4(res, 0, 1, 2, 3, dst, stride);
365 static void avc_chroma_vt_2w_msa(uint8_t *src, uint8_t *dst, int32_t stride,
370 avc_chroma_vt_2x2_msa(src, dst, stride, coeff0, coeff1);
372 avc_chroma_vt_2x4_msa(src, dst, stride, coeff0, coeff1);
376 static void avc_chroma_vt_4x2_msa(uint8_t *src, uint8_t *dst, int32_t stride,
387 LD_UB3(src, stride, src0, src1, src2);
397 ST_W2(res, 0, 1, dst, stride);
400 static void avc_chroma_vt_4x4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
411 LD_UB5(src, stride, src0, src1, src2, src3, src4);
421 ST_W4(out, 0, 1, 2, 3, dst, stride);
424 static void avc_chroma_vt_4x8_msa(uint8_t *src, uint8_t *dst, int32_t stride,
434 LD_UB5(src, stride, src0, src1, src2, src3, src4);
435 src += (5 * stride);
436 LD_UB4(src, stride, src5, src6, src7, src8);
449 ST_W8(out0, out1, 0, 1, 2, 3, 0, 1, 2, 3, dst, stride);
452 static void avc_chroma_vt_4w_msa(uint8_t *src, uint8_t *dst, int32_t stride,
457 avc_chroma_vt_4x2_msa(src, dst, stride, coeff0, coeff1);
459 avc_chroma_vt_4x4_msa(src, dst, stride, coeff0, coeff1);
461 avc_chroma_vt_4x8_msa(src, dst, stride, coeff0, coeff1);
465 static void avc_chroma_vt_8x4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
474 LD_UB5(src, stride, src0, src1, src2, src3, src4);
483 ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
486 static void avc_chroma_vt_8x8_msa(uint8_t *src, uint8_t *dst, int32_t stride,
496 LD_UB5(src, stride, src0, src1, src2, src3, src4);
497 src += (5 * stride);
498 LD_UB4(src, stride, src5, src6, src7, src8);
515 ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
518 static void avc_chroma_vt_8w_msa(uint8_t *src, uint8_t *dst, int32_t stride,
523 avc_chroma_vt_8x4_msa(src, dst, stride, coeff0, coeff1);
525 avc_chroma_vt_8x8_msa(src, dst, stride, coeff0, coeff1);
529 static void avc_chroma_hv_2x2_msa(uint8_t *src, uint8_t *dst, int32_t stride,
546 LD_UB3(src, stride, src0, src1, src2);
560 dst += stride;
564 static void avc_chroma_hv_2x4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
581 LD_UB5(src, stride, src0, src1, src2, src3, src4);
595 ST_H4(res, 0, 1, 2, 3, dst, stride);
598 static void avc_chroma_hv_2w_msa(uint8_t *src, uint8_t *dst, int32_t stride,
604 avc_chroma_hv_2x2_msa(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
607 avc_chroma_hv_2x4_msa(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
612 static void avc_chroma_hv_4x2_msa(uint8_t *src, uint8_t *dst, int32_t stride,
627 LD_UB3(src, stride, src0, src1, src2);
637 ST_W2(res, 0, 1, dst, stride);
640 static void avc_chroma_hv_4x4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
657 LD_UB5(src, stride, src0, src1, src2, src3, src4);
669 ST_W2(res0, 0, 1, dst, stride);
670 ST_W2(res1, 0, 1, dst + 2 * stride, stride);
673 static void avc_chroma_hv_4x8_msa(uint8_t *src, uint8_t *dst, int32_t stride,
689 LD_UB5(src, stride, src0, src1, src2, src3, src4);
690 src += (5 * stride);
691 LD_UB4(src, stride, src5, src6, src7, src8);
710 ST_W8(res0, res1, 0, 1, 2, 3, 0, 1, 2, 3, dst, stride);
713 static void avc_chroma_hv_4w_msa(uint8_t *src, uint8_t *dst, int32_t stride,
719 avc_chroma_hv_4x2_msa(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
722 avc_chroma_hv_4x4_msa(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
725 avc_chroma_hv_4x8_msa(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
730 static void avc_chroma_hv_8x4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
747 src += stride;
752 LD_UB4(src, stride, src1, src2, src3, src4);
753 src += (4 * stride);
770 ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
773 static void avc_chroma_hv_8x8_msa(uint8_t *src, uint8_t *dst, int32_t stride,
792 LD_UB5(src, stride, src0, src1, src2, src3, src4);
793 src += (5 * stride);
794 LD_UB4(src, stride, src5, src6, src7, src8);
826 ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
829 static void avc_chroma_hv_8w_msa(uint8_t *src, uint8_t *dst, int32_t stride,
835 avc_chroma_hv_8x4_msa(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
838 avc_chroma_hv_8x8_msa(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
844 int32_t stride, uint32_t coeff0,
859 LD_SB2(src, stride, src0, src1);
862 out1 = LH(dst + stride);
881 dst += stride;
886 int32_t stride, uint32_t coeff0,
900 LD_UB4(src, stride, src0, src1, src2, src3);
902 tp1 = LH(dst + stride);
903 tp2 = LH(dst + 2 * stride);
904 tp3 = LH(dst + 3 * stride);
922 ST_H4(dst0, 0, 1, 2, 3, dst, stride);
926 int32_t stride, uint32_t coeff0,
930 avc_chroma_hz_and_aver_dst_2x2_msa(src, dst, stride, coeff0, coeff1);
932 avc_chroma_hz_and_aver_dst_2x4_msa(src, dst, stride, coeff0, coeff1);
937 int32_t stride, uint32_t coeff0,
951 LD_SB2(src, stride, src0, src1);
953 LW2(dst, stride, load0, load1);
966 ST_W2(dst_data, 0, 1, dst, stride);
970 int32_t stride, uint32_t coeff0,
984 LD_UB4(src, stride, src0, src1, src2, src3);
985 LW4(dst, stride, tp0, tp1, tp2, tp3);
995 ST_W4(out, 0, 1, 2, 3, dst, stride);
999 int32_t stride, uint32_t coeff0,
1013 LD_UB8(src, stride, src0, src1, src2, src3, src4, src5, src6, src7);
1014 LW4(dst, stride, tp0, tp1, tp2, tp3);
1016 LW4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
1027 ST_W8(out0, out1, 0, 1, 2, 3, 0, 1, 2, 3, dst, stride);
1031 int32_t stride, uint32_t coeff0,
1035 avc_chroma_hz_and_aver_dst_4x2_msa(src, dst, stride, coeff0, coeff1);
1037 avc_chroma_hz_and_aver_dst_4x4_msa(src, dst, stride, coeff0, coeff1);
1039 avc_chroma_hz_and_aver_dst_4x8_msa(src, dst, stride, coeff0, coeff1);
1044 int32_t stride, uint32_t coeff0,
1057 LD_UB4(src, stride, src0, src1, src2, src3);
1058 LD4(dst, stride, tp0, tp1, tp2, tp3);
1070 ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride);
1074 int32_t stride, uint32_t coeff0,
1089 LD_UB8(src, stride, src0, src1, src2, src3, src4, src5, src6, src7);
1090 LD4(dst, stride, tp0, tp1, tp2, tp3);
1093 LD4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
1114 ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
1118 int32_t stride, uint32_t coeff0,
1122 avc_chroma_hz_and_aver_dst_8x4_msa(src, dst, stride, coeff0, coeff1);
1124 avc_chroma_hz_and_aver_dst_8x8_msa(src, dst, stride, coeff0, coeff1);
1129 int32_t stride, uint32_t coeff0,
1141 LD_SB3(src, stride, src0, src1, src2);
1143 out1 = LH(dst + stride);
1161 dst += stride;
1166 int32_t stride, uint32_t coeff0,
1179 LD_SB5(src, stride, src0, src1, src2, src3, src4);
1182 tp1 = LH(dst + stride);
1183 tp2 = LH(dst + 2 * stride);
1184 tp3 = LH(dst + 3 * stride);
1204 ST_H4(res, 0, 1, 2, 3, dst, stride);
1208 int32_t stride, uint32_t coeff0,
1212 avc_chroma_vt_and_aver_dst_2x2_msa(src, dst, stride, coeff0, coeff1);
1214 avc_chroma_vt_and_aver_dst_2x4_msa(src, dst, stride, coeff0, coeff1);
1219 int32_t stride, uint32_t coeff0,
1231 LD_UB3(src, stride, src0, src1, src2);
1233 LW2(dst, stride, load0, load1);
1247 ST_W2(res, 0, 1, dst, stride);
1251 int32_t stride, uint32_t coeff0,
1264 LD_UB5(src, stride, src0, src1, src2, src3, src4);
1265 LW4(dst, stride, tp0, tp1, tp2, tp3);
1277 ST_W4(out, 0, 1, 2, 3, dst, stride);
1281 int32_t stride, uint32_t coeff0,
1293 LD_UB5(src, stride, src0, src1, src2, src3, src4);
1294 src += (5 * stride);
1295 LD_UB4(src, stride, src5, src6, src7, src8);
1296 LW4(dst, stride, tp0, tp1, tp2, tp3);
1298 LW4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
1313 ST_W8(out0, out1, 0, 1, 2, 3, 0, 1, 2, 3, dst, stride);
1317 int32_t stride, uint32_t coeff0,
1321 avc_chroma_vt_and_aver_dst_4x2_msa(src, dst, stride, coeff0, coeff1);
1323 avc_chroma_vt_and_aver_dst_4x4_msa(src, dst, stride, coeff0, coeff1);
1325 avc_chroma_vt_and_aver_dst_4x8_msa(src, dst, stride, coeff0, coeff1);
1330 int32_t stride, uint32_t coeff0,
1342 LD_UB5(src, stride, src0, src1, src2, src3, src4);
1343 LD4(dst, stride, tp0, tp1, tp2, tp3);
1355 ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
1359 int32_t stride, uint32_t coeff0,
1371 LD_UB5(src, stride, src0, src1, src2, src3, src4);
1372 src += (5 * stride);
1373 LD_UB4(src, stride, src5, src6, src7, src8);
1374 LD4(dst, stride, tp0, tp1, tp2, tp3);
1377 LD4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
1398 ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
1402 int32_t stride, uint32_t coeff0,
1406 avc_chroma_vt_and_aver_dst_8x4_msa(src, dst, stride, coeff0, coeff1);
1408 avc_chroma_vt_and_aver_dst_8x8_msa(src, dst, stride, coeff0, coeff1);
1413 int32_t stride,
1432 LD_UB3(src, stride, src0, src1, src2);
1434 out1 = LH(dst + stride);
1450 dst += stride;
1455 int32_t stride,
1475 LD_UB5(src, stride, src0, src1, src2, src3, src4);
1477 tp1 = LH(dst + stride);
1478 tp2 = LH(dst + 2 * stride);
1479 tp3 = LH(dst + 3 * stride);
1496 ST_H4(dst0, 0, 1, 2, 3, dst, stride);
1500 int32_t stride,
1508 avc_chroma_hv_and_aver_dst_2x2_msa(src, dst, stride, coef_hor0,
1511 avc_chroma_hv_and_aver_dst_2x4_msa(src, dst, stride, coef_hor0,
1517 int32_t stride,
1536 LD_UB3(src, stride, src0, src1, src2);
1537 LW2(dst, stride, tp0, tp1);
1549 ST_W2(dst0, 0, 1, dst, stride);
1553 int32_t stride,
1573 LD_UB5(src, stride, src0, src1, src2, src3, src4);
1574 LW4(dst, stride, tp0, tp1, tp2, tp3);
1588 ST_W4(out, 0, 1, 2, 3, dst, stride);
1592 int32_t stride,
1612 LD_UB5(src, stride, src0, src1, src2, src3, src4);
1613 src += (5 * stride);
1614 LD_UB4(src, stride, src5, src6, src7, src8);
1615 LW4(dst, stride, tp0, tp1, tp2, tp3);
1617 LW4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
1637 ST_W8(res0, res1, 0, 1, 2, 3, 0, 1, 2, 3, dst, stride);
1641 int32_t stride,
1649 avc_chroma_hv_and_aver_dst_4x2_msa(src, dst, stride, coef_hor0,
1652 avc_chroma_hv_and_aver_dst_4x4_msa(src, dst, stride, coef_hor0,
1655 avc_chroma_hv_and_aver_dst_4x8_msa(src, dst, stride, coef_hor0,
1661 int32_t stride,
1683 src += stride;
1686 LD_UB4(src, stride, src1, src2, src3, src4);
1687 src += (4 * stride);
1688 LD4(dst, stride, tp0, tp1, tp2, tp3);
1705 ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
1709 int32_t stride,
1732 LD_UB5(src, stride, src0, src1, src2, src3, src4);
1733 src += (5 * stride);
1734 LD_UB4(src, stride, src5, src6, src7, src8);
1752 LD4(dst, stride, tp0, tp1, tp2, tp3);
1755 LD4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
1774 ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
1778 int32_t stride,
1786 avc_chroma_hv_and_aver_dst_8x4_msa(src, dst, stride, coef_hor0,
1789 avc_chroma_hv_and_aver_dst_8x8_msa(src, dst, stride, coef_hor0,
1794 static void copy_width4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
1800 LW4(src, stride, tp0, tp1, tp2, tp3);
1801 src += 4 * stride;
1802 LW4(src, stride, tp4, tp5, tp6, tp7);
1803 SW4(tp0, tp1, tp2, tp3, dst, stride);
1804 dst += 4 * stride;
1805 SW4(tp4, tp5, tp6, tp7, dst, stride);
1807 LW4(src, stride, tp0, tp1, tp2, tp3);
1808 SW4(tp0, tp1, tp2, tp3, dst, stride);
1810 LW2(src, stride, tp0, tp1);
1812 dst += stride;
1817 static void copy_width8_msa(uint8_t *src, uint8_t *dst, int32_t stride,
1823 LD4(src, stride, src0, src1, src2, src3);
1824 src += 4 * stride;
1825 LD4(src, stride, src4, src5, src6, src7);
1826 SD4(src0, src1, src2, src3, dst, stride);
1827 dst += 4 * stride;
1828 SD4(src4, src5, src6, src7, dst, stride);
1830 LD4(src, stride, src0, src1, src2, src3);
1831 SD4(src0, src1, src2, src3, dst, stride);
1835 static void avg_width4_msa(uint8_t *src, uint8_t *dst, int32_t stride,
1842 LW4(src, stride, tp0, tp1, tp2, tp3);
1843 src += 4 * stride;
1845 LW4(src, stride, tp0, tp1, tp2, tp3);
1847 LW4(dst, stride, tp0, tp1, tp2, tp3);
1849 LW4(dst + 4 * stride, stride, tp0, tp1, tp2, tp3);
1852 ST_W8(dst0, dst1, 0, 1, 2, 3, 0, 1, 2, 3, dst, stride);
1854 LW4(src, stride, tp0, tp1, tp2, tp3);
1856 LW4(dst, stride, tp0, tp1, tp2, tp3);
1859 ST_W4(dst0, 0, 1, 2, 3, dst, stride);
1861 LW2(src, stride, tp0, tp1);
1863 LW2(dst, stride, tp0, tp1);
1866 ST_W2(dst0, 0, 1, dst, stride);
1870 static void avg_width8_msa(uint8_t *src, uint8_t *dst, int32_t stride,
1878 LD4(src, stride, tp0, tp1, tp2, tp3);
1879 src += 4 * stride;
1880 LD4(src, stride, tp4, tp5, tp6, tp7);
1885 LD4(dst, stride, tp0, tp1, tp2, tp3);
1886 LD4(dst + 4 * stride, stride, tp4, tp5, tp6, tp7);
1893 ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
1895 LD4(src, stride, tp0, tp1, tp2, tp3);
1898 LD4(dst, stride, tp0, tp1, tp2, tp3);
1902 ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride);
1907 ptrdiff_t stride, int height, int x, int y)
1912 avc_chroma_hv_8w_msa(src, dst, stride, x, (8 - x), y, (8 - y), height);
1914 avc_chroma_hz_8w_msa(src, dst, stride, x, (8 - x), height);
1916 avc_chroma_vt_8w_msa(src, dst, stride, y, (8 - y), height);
1918 copy_width8_msa(src, dst, stride, height);
1923 ptrdiff_t stride, int height, int x, int y)
1928 avc_chroma_hv_4w_msa(src, dst, stride, x, (8 - x), y, (8 - y), height);
1930 avc_chroma_hz_4w_msa(src, dst, stride, x, (8 - x), height);
1932 avc_chroma_vt_4w_msa(src, dst, stride, y, (8 - y), height);
1934 copy_width4_msa(src, dst, stride, height);
1939 ptrdiff_t stride, int height, int x, int y)
1946 avc_chroma_hv_2w_msa(src, dst, stride, x, (8 - x), y, (8 - y), height);
1948 avc_chroma_hz_2w_msa(src, dst, stride, x, (8 - x), height);
1950 avc_chroma_vt_2w_msa(src, dst, stride, y, (8 - y), height);
1955 src += stride;
1956 dst += stride;
1962 ptrdiff_t stride, int height, int x, int y)
1968 avc_chroma_hv_and_aver_dst_8w_msa(src, dst, stride, x, (8 - x), y,
1971 avc_chroma_hz_and_aver_dst_8w_msa(src, dst, stride, x, (8 - x), height);
1973 avc_chroma_vt_and_aver_dst_8w_msa(src, dst, stride, y, (8 - y), height);
1975 avg_width8_msa(src, dst, stride, height);
1980 ptrdiff_t stride, int height, int x, int y)
1985 avc_chroma_hv_and_aver_dst_4w_msa(src, dst, stride, x, (8 - x), y,
1988 avc_chroma_hz_and_aver_dst_4w_msa(src, dst, stride, x, (8 - x), height);
1990 avc_chroma_vt_and_aver_dst_4w_msa(src, dst, stride, y, (8 - y), height);
1992 avg_width4_msa(src, dst, stride, height);
1997 ptrdiff_t stride, int height, int x, int y)
2004 avc_chroma_hv_and_aver_dst_2w_msa(src, dst, stride, x, (8 - x), y,
2007 avc_chroma_hz_and_aver_dst_2w_msa(src, dst, stride, x, (8 - x), height);
2009 avc_chroma_vt_and_aver_dst_2w_msa(src, dst, stride, y, (8 - y), height);
2015 src += stride;
2016 dst += stride;