Lines Matching refs:src
36 static av_always_inline void avc_chroma_hv_8x4_lasx(uint8_t *src, uint8_t *dst,
53 DUP2_ARG2(__lasx_xvld, chroma_mask_arr, 0, src, 0, mask, src0);
54 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src, stride_3x, src, stride_4x,
74 static av_always_inline void avc_chroma_hv_8x8_lasx(uint8_t *src, uint8_t *dst,
93 DUP2_ARG2(__lasx_xvld, chroma_mask_arr, 0, src, 0, mask, src0);
94 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src, stride_3x, src, stride_4x,
96 src += stride_4x;
97 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src, stride_3x, src, stride_4x,
130 static av_always_inline void avc_chroma_hz_8x4_lasx(uint8_t *src, uint8_t *dst,
143 DUP2_ARG2(__lasx_xvld, chroma_mask_arr, 0, src, 0, mask, src0);
144 DUP2_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src1, src2);
145 src3 = __lasx_xvldx(src, stride_3x);
157 static av_always_inline void avc_chroma_hz_8x8_lasx(uint8_t *src, uint8_t *dst,
172 DUP2_ARG2(__lasx_xvld, chroma_mask_arr, 0, src, 0, mask, src0);
173 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src, stride_3x, src, stride_4x,
175 src += stride_4x;
176 DUP2_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src5, src6);
177 src7 = __lasx_xvldx(src, stride_3x);
196 static av_always_inline void avc_chroma_hz_nonmult_lasx(uint8_t *src,
215 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
217 src += stride_4x;
230 src0 = __lasx_xvld(src, 0);
231 src1 = __lasx_xvldx(src, stride);
242 static av_always_inline void avc_chroma_vt_8x4_lasx(uint8_t *src, uint8_t *dst,
254 src0 = __lasx_xvld(src, 0);
255 src += stride;
256 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
269 static av_always_inline void avc_chroma_vt_8x8_lasx(uint8_t *src, uint8_t *dst,
283 src0 = __lasx_xvld(src, 0);
284 src += stride;
285 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
287 src += stride_4x;
288 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
310 static av_always_inline void copy_width8x8_lasx(uint8_t *src, uint8_t *dst,
319 "ld.d %[tmp0], %[src], 0x0 \n\t"
320 "ldx.d %[tmp1], %[src], %[stride] \n\t"
321 "ldx.d %[tmp2], %[src], %[stride_2] \n\t"
322 "ldx.d %[tmp3], %[src], %[stride_3] \n\t"
323 "add.d %[src], %[src], %[stride_4] \n\t"
324 "ld.d %[tmp4], %[src], 0x0 \n\t"
325 "ldx.d %[tmp5], %[src], %[stride] \n\t"
326 "ldx.d %[tmp6], %[src], %[stride_2] \n\t"
327 "ldx.d %[tmp7], %[src], %[stride_3] \n\t"
342 [dst]"+&r"(dst), [src]"+&r"(src),
350 static av_always_inline void copy_width8x4_lasx(uint8_t *src, uint8_t *dst,
358 "ld.d %[tmp0], %[src], 0x0 \n\t"
359 "ldx.d %[tmp1], %[src], %[stride] \n\t"
360 "ldx.d %[tmp2], %[src], %[stride_2] \n\t"
361 "ldx.d %[tmp3], %[src], %[stride_3] \n\t"
370 : [stride]"r"(stride), [dst]"r"(dst), [src]"r"(src)
375 static void avc_chroma_hv_8w_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
381 avc_chroma_hv_8x4_lasx(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
384 avc_chroma_hv_8x8_lasx(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
389 static void avc_chroma_hv_4x2_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
404 DUP2_ARG2(__lasx_xvld, chroma_mask_arr, 32, src, 0, mask, src0);
405 DUP2_ARG2(__lasx_xvldx, src, stride, src, stride_2, src1, src2);
417 static void avc_chroma_hv_4x4_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
433 DUP2_ARG2(__lasx_xvld, chroma_mask_arr, 32, src, 0, mask, src0);
434 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
435 src, stride_4, src1, src2, src3, src4);
449 static void avc_chroma_hv_4x8_lasx(uint8_t *src, uint8_t * dst, ptrdiff_t stride,
466 DUP2_ARG2(__lasx_xvld, chroma_mask_arr, 32, src, 0, mask, src0);
467 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
468 src, stride_4, src1, src2, src3, src4);
469 src += stride_4;
470 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
471 src, stride_4, src5, src6, src7, src8);
495 static void avc_chroma_hv_4w_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
501 avc_chroma_hv_4x8_lasx(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
504 avc_chroma_hv_4x4_lasx(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
507 avc_chroma_hv_4x2_lasx(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
512 static void avc_chroma_hz_4x2_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
521 DUP2_ARG2(__lasx_xvld, chroma_mask_arr, 32, src, 0, mask, src0);
522 src1 = __lasx_xvldx(src, stride);
531 static void avc_chroma_hz_4x4_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
542 DUP2_ARG2(__lasx_xvld, chroma_mask_arr, 32, src, 0, mask, src0);
543 DUP2_ARG2(__lasx_xvldx, src, stride, src, stride_2, src1, src2);
544 src3 = __lasx_xvldx(src, stride_3);
556 static void avc_chroma_hz_4x8_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
569 DUP2_ARG2(__lasx_xvld, chroma_mask_arr, 32, src, 0, mask, src0);
570 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
571 src, stride_4, src1, src2, src3, src4);
572 src += stride_4;
573 DUP2_ARG2(__lasx_xvldx, src, stride, src, stride_2, src5, src6);
574 src7 = __lasx_xvldx(src, stride_3);
591 static void avc_chroma_hz_4w_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
596 avc_chroma_hz_4x8_lasx(src, dst, stride, coeff0, coeff1);
598 avc_chroma_hz_4x4_lasx(src, dst, stride, coeff0, coeff1);
600 avc_chroma_hz_4x2_lasx(src, dst, stride, coeff0, coeff1);
604 static void avc_chroma_hz_8w_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
609 avc_chroma_hz_8x4_lasx(src, dst, stride, coeff0, coeff1);
611 avc_chroma_hz_8x8_lasx(src, dst, stride, coeff0, coeff1);
613 avc_chroma_hz_nonmult_lasx(src, dst, stride, coeff0, coeff1, height);
617 static void avc_chroma_vt_4x2_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
627 src0 = __lasx_xvld(src, 0);
628 DUP2_ARG2(__lasx_xvldx, src, stride, src, stride << 1, src1, src2);
638 static void avc_chroma_vt_4x4_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
651 src0 = __lasx_xvld(src, 0);
652 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
653 src, stride_4, src1, src2, src3, src4);
667 static void avc_chroma_vt_4x8_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
681 src0 = __lasx_xvld(src, 0);
682 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
683 src, stride_4, src1, src2, src3, src4);
684 src += stride_4;
685 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
686 src, stride_4, src5, src6, src7, src8);
708 static void avc_chroma_vt_4w_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
713 avc_chroma_vt_4x8_lasx(src, dst, stride, coeff0, coeff1);
715 avc_chroma_vt_4x4_lasx(src, dst, stride, coeff0, coeff1);
717 avc_chroma_vt_4x2_lasx(src, dst, stride, coeff0, coeff1);
721 static void avc_chroma_vt_8w_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
726 avc_chroma_vt_8x4_lasx(src, dst, stride, coeff0, coeff1);
728 avc_chroma_vt_8x8_lasx(src, dst, stride, coeff0, coeff1);
732 static void copy_width4_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
744 "ld.wu %[tp0], %[src], 0 \n\t"
745 "ldx.wu %[tp1], %[src], %[stride] \n\t"
746 "ldx.wu %[tp2], %[src], %[stride_2] \n\t"
747 "ldx.wu %[tp3], %[src], %[stride_3] \n\t"
748 "add.d %[src], %[src], %[stride_4] \n\t"
749 "ld.wu %[tp4], %[src], 0 \n\t"
750 "ldx.wu %[tp5], %[src], %[stride] \n\t"
751 "ldx.wu %[tp6], %[src], %[stride_2] \n\t"
752 "ldx.wu %[tp7], %[src], %[stride_3] \n\t"
763 [src]"+&r"(src), [dst]"+&r"(dst), [tp0]"+&r"(tp0), [tp1]"+&r"(tp1),
775 "ld.wu %[tp0], %[src], 0 \n\t"
776 "ldx.wu %[tp1], %[src], %[stride] \n\t"
777 "ldx.wu %[tp2], %[src], %[stride_2] \n\t"
778 "ldx.wu %[tp3], %[src], %[stride_3] \n\t"
784 [src]"+&r"(src), [dst]"+&r"(dst), [tp0]"+&r"(tp0), [tp1]"+&r"(tp1),
791 "ld.wu %[tp0], %[src], 0 \n\t"
792 "ldx.wu %[tp1], %[src], %[stride] \n\t"
796 : [src]"r"(src), [dst]"r"(dst), [stride]"r"(stride)
802 static void copy_width8_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
806 copy_width8x8_lasx(src, dst, stride);
808 copy_width8x4_lasx(src, dst, stride);
812 void ff_put_h264_chroma_mc4_lasx(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
818 avc_chroma_hv_4w_lasx(src, dst, stride, x, (8 - x), y, (8 - y), height);
820 avc_chroma_hz_4w_lasx(src, dst, stride, x, (8 - x), height);
822 avc_chroma_vt_4w_lasx(src, dst, stride, y, (8 - y), height);
824 copy_width4_lasx(src, dst, stride, height);
828 void ff_put_h264_chroma_mc8_lasx(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
834 copy_width8_lasx(src, dst, stride, height);
836 avc_chroma_hv_8w_lasx(src, dst, stride, x, (8 - x), y, (8 - y), height);
838 avc_chroma_hz_8w_lasx(src, dst, stride, x, (8 - x), height);
840 avc_chroma_vt_8w_lasx(src, dst, stride, y, (8 - y), height);
844 static av_always_inline void avc_chroma_hv_and_aver_dst_8x4_lasx(uint8_t *src,
862 DUP2_ARG2(__lasx_xvld, chroma_mask_arr, 0, src, 0, mask, src0);
863 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src, stride_3x, src, stride_4x,
888 static av_always_inline void avc_chroma_hv_and_aver_dst_8x8_lasx(uint8_t *src,
908 DUP2_ARG2(__lasx_xvld, chroma_mask_arr, 0, src, 0, mask, src0);
909 src += stride;
910 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
912 src += stride_4x;
913 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
960 static av_always_inline void avc_chroma_hz_and_aver_dst_8x4_lasx(uint8_t *src,
976 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
993 static av_always_inline void avc_chroma_hz_and_aver_dst_8x8_lasx(uint8_t *src,
1011 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
1013 src += stride_4x;
1014 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
1046 static av_always_inline void avc_chroma_vt_and_aver_dst_8x4_lasx(uint8_t *src,
1061 src0 = __lasx_xvld(src, 0);
1062 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src, stride_3x, src, stride_4x,
1080 static av_always_inline void avc_chroma_vt_and_aver_dst_8x8_lasx(uint8_t *src,
1096 src0 = __lasx_xvld(src, 0);
1097 src += stride;
1098 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
1100 src += stride_4x;
1101 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
1135 static av_always_inline void avg_width8x8_lasx(uint8_t *src, uint8_t *dst,
1144 src0 = __lasx_xvldrepl_d(src, 0);
1145 src1 = __lasx_xvldrepl_d(src + stride, 0);
1146 src2 = __lasx_xvldrepl_d(src + stride_2x, 0);
1147 src3 = __lasx_xvldrepl_d(src + stride_3x, 0);
1164 src += stride_4x;
1166 src0 = __lasx_xvldrepl_d(src, 0);
1167 src1 = __lasx_xvldrepl_d(src + stride, 0);
1168 src2 = __lasx_xvldrepl_d(src + stride_2x, 0);
1169 src3 = __lasx_xvldrepl_d(src + stride_3x, 0);
1187 static av_always_inline void avg_width8x4_lasx(uint8_t *src, uint8_t *dst,
1195 src0 = __lasx_xvldrepl_d(src, 0);
1196 src1 = __lasx_xvldrepl_d(src + stride, 0);
1197 src2 = __lasx_xvldrepl_d(src + stride_2x, 0);
1198 src3 = __lasx_xvldrepl_d(src + stride_3x, 0);
1216 static void avc_chroma_hv_and_aver_dst_8w_lasx(uint8_t *src, uint8_t *dst,
1225 avc_chroma_hv_and_aver_dst_8x4_lasx(src, dst, stride, coef_hor0,
1228 avc_chroma_hv_and_aver_dst_8x8_lasx(src, dst, stride, coef_hor0,
1233 static void avc_chroma_hz_and_aver_dst_8w_lasx(uint8_t *src, uint8_t *dst,
1238 avc_chroma_hz_and_aver_dst_8x4_lasx(src, dst, stride, coeff0, coeff1);
1240 avc_chroma_hz_and_aver_dst_8x8_lasx(src, dst, stride, coeff0, coeff1);
1244 static void avc_chroma_vt_and_aver_dst_8w_lasx(uint8_t *src, uint8_t *dst,
1249 avc_chroma_vt_and_aver_dst_8x4_lasx(src, dst, stride, coeff0, coeff1);
1251 avc_chroma_vt_and_aver_dst_8x8_lasx(src, dst, stride, coeff0, coeff1);
1255 static void avg_width8_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
1259 avg_width8x8_lasx(src, dst, stride);
1261 avg_width8x4_lasx(src, dst, stride);
1265 void ff_avg_h264_chroma_mc8_lasx(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
1271 avg_width8_lasx(src, dst, stride, height);
1273 avc_chroma_hv_and_aver_dst_8w_lasx(src, dst, stride, x, (8 - x), y,
1276 avc_chroma_hz_and_aver_dst_8w_lasx(src, dst, stride, x, (8 - x), height);
1278 avc_chroma_vt_and_aver_dst_8w_lasx(src, dst, stride, y, (8 - y), height);