Lines Matching refs:stride

37                              ptrdiff_t stride, uint32_t coef_hor0,
41 ptrdiff_t stride_2x = stride << 1;
42 ptrdiff_t stride_3x = stride_2x + stride;
54 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src, stride_3x, src, stride_4x,
69 __lasx_xvstelm_d(out, dst + stride, 0, 2);
75 ptrdiff_t stride, uint32_t coef_hor0,
79 ptrdiff_t stride_2x = stride << 1;
80 ptrdiff_t stride_3x = stride_2x + stride;
81 ptrdiff_t stride_4x = stride << 2;
94 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src, stride_3x, src, stride_4x,
97 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src, stride_3x, src, stride_4x,
120 __lasx_xvstelm_d(out0, dst + stride, 0, 2);
125 __lasx_xvstelm_d(out1, dst + stride, 0, 2);
131 ptrdiff_t stride, uint32_t coeff0, uint32_t coeff1)
133 ptrdiff_t stride_2x = stride << 1;
134 ptrdiff_t stride_3x = stride_2x + stride;
144 DUP2_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src1, src2);
151 __lasx_xvstelm_d(out, dst + stride, 0, 2);
158 ptrdiff_t stride, uint32_t coeff0, uint32_t coeff1)
160 ptrdiff_t stride_2x = stride << 1;
161 ptrdiff_t stride_3x = stride_2x + stride;
162 ptrdiff_t stride_4x = stride << 2;
173 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src, stride_3x, src, stride_4x,
176 DUP2_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src5, src6);
186 __lasx_xvstelm_d(out0, dst + stride, 0, 2);
191 __lasx_xvstelm_d(out1, dst + stride, 0, 2);
197 uint8_t *dst, ptrdiff_t stride, uint32_t coeff0,
201 ptrdiff_t stride_2x = stride << 1;
202 ptrdiff_t stride_3x = stride_2x + stride;
203 ptrdiff_t stride_4x = stride << 2;
215 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
223 __lasx_xvstelm_d(out, dst + stride, 0, 2);
231 src1 = __lasx_xvldx(src, stride);
237 dst += stride;
243 ptrdiff_t stride, uint32_t coeff0, uint32_t coeff1)
245 ptrdiff_t stride_2x = stride << 1;
246 ptrdiff_t stride_3x = stride_2x + stride;
255 src += stride;
256 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
264 __lasx_xvstelm_d(out, dst + stride, 0, 2);
270 ptrdiff_t stride, uint32_t coeff0, uint32_t coeff1)
272 ptrdiff_t stride_2x = stride << 1;
273 ptrdiff_t stride_3x = stride_2x + stride;
274 ptrdiff_t stride_4x = stride << 2;
284 src += stride;
285 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
288 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
300 __lasx_xvstelm_d(out0, dst + stride, 0, 2);
305 __lasx_xvstelm_d(out1, dst + stride, 0, 2);
311 ptrdiff_t stride)
316 "slli.d %[stride_2], %[stride], 1 \n\t"
317 "add.d %[stride_3], %[stride_2], %[stride] \n\t"
320 "ldx.d %[tmp1], %[src], %[stride] \n\t"
325 "ldx.d %[tmp5], %[src], %[stride] \n\t"
330 "stx.d %[tmp1], %[dst], %[stride] \n\t"
335 "stx.d %[tmp5], %[dst], %[stride] \n\t"
345 : [stride]"r"(stride)
351 ptrdiff_t stride)
356 "slli.d %[stride_2], %[stride], 1 \n\t"
357 "add.d %[stride_3], %[stride_2], %[stride] \n\t"
359 "ldx.d %[tmp1], %[src], %[stride] \n\t"
364 "stx.d %[tmp1], %[dst], %[stride] \n\t"
370 : [stride]"r"(stride), [dst]"r"(dst), [src]"r"(src)
375 static void avc_chroma_hv_8w_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
381 avc_chroma_hv_8x4_lasx(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
384 avc_chroma_hv_8x8_lasx(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
389 static void avc_chroma_hv_4x2_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
393 ptrdiff_t stride_2 = stride << 1;
405 DUP2_ARG2(__lasx_xvldx, src, stride, src, stride_2, src1, src2);
414 __lasx_xvstelm_w(res_vt, dst + stride, 0, 1);
417 static void avc_chroma_hv_4x4_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
421 ptrdiff_t stride_2 = stride << 1;
422 ptrdiff_t stride_3 = stride_2 + stride;
434 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
444 __lasx_xvstelm_w(res_hz0, dst + stride, 0, 1);
449 static void avc_chroma_hv_4x8_lasx(uint8_t *src, uint8_t * dst, ptrdiff_t stride,
453 ptrdiff_t stride_2 = stride << 1;
454 ptrdiff_t stride_3 = stride_2 + stride;
467 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
470 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
485 __lasx_xvstelm_w(res_hz0, dst + stride, 0, 1);
490 __lasx_xvstelm_w(res_hz0, dst + stride, 0, 3);
495 static void avc_chroma_hv_4w_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
501 avc_chroma_hv_4x8_lasx(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
504 avc_chroma_hv_4x4_lasx(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
507 avc_chroma_hv_4x2_lasx(src, dst, stride, coef_hor0, coef_hor1, coef_ver0,
512 static void avc_chroma_hz_4x2_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
522 src1 = __lasx_xvldx(src, stride);
528 __lasx_xvstelm_w(res, dst + stride, 0, 1);
531 static void avc_chroma_hz_4x4_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
534 ptrdiff_t stride_2 = stride << 1;
535 ptrdiff_t stride_3 = stride_2 + stride;
543 DUP2_ARG2(__lasx_xvldx, src, stride, src, stride_2, src1, src2);
551 __lasx_xvstelm_w(res, dst + stride, 0, 1);
556 static void avc_chroma_hz_4x8_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
559 ptrdiff_t stride_2 = stride << 1;
560 ptrdiff_t stride_3 = stride_2 + stride;
570 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
573 DUP2_ARG2(__lasx_xvldx, src, stride, src, stride_2, src5, src6);
581 __lasx_xvstelm_w(res0, dst + stride, 0, 1);
586 __lasx_xvstelm_w(res0, dst + stride, 0, 3);
591 static void avc_chroma_hz_4w_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
596 avc_chroma_hz_4x8_lasx(src, dst, stride, coeff0, coeff1);
598 avc_chroma_hz_4x4_lasx(src, dst, stride, coeff0, coeff1);
600 avc_chroma_hz_4x2_lasx(src, dst, stride, coeff0, coeff1);
604 static void avc_chroma_hz_8w_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
609 avc_chroma_hz_8x4_lasx(src, dst, stride, coeff0, coeff1);
611 avc_chroma_hz_8x8_lasx(src, dst, stride, coeff0, coeff1);
613 avc_chroma_hz_nonmult_lasx(src, dst, stride, coeff0, coeff1, height);
617 static void avc_chroma_vt_4x2_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
628 DUP2_ARG2(__lasx_xvldx, src, stride, src, stride << 1, src1, src2);
635 __lasx_xvstelm_w(res, dst + stride, 0, 1);
638 static void avc_chroma_vt_4x4_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
641 ptrdiff_t stride_2 = stride << 1;
642 ptrdiff_t stride_3 = stride_2 + stride;
652 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
662 __lasx_xvstelm_w(res, dst + stride, 0, 1);
667 static void avc_chroma_vt_4x8_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
670 ptrdiff_t stride_2 = stride << 1;
671 ptrdiff_t stride_3 = stride_2 + stride;
682 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
685 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2, src, stride_3,
698 __lasx_xvstelm_w(res0, dst + stride, 0, 1);
703 __lasx_xvstelm_w(res0, dst + stride, 0, 3);
708 static void avc_chroma_vt_4w_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
713 avc_chroma_vt_4x8_lasx(src, dst, stride, coeff0, coeff1);
715 avc_chroma_vt_4x4_lasx(src, dst, stride, coeff0, coeff1);
717 avc_chroma_vt_4x2_lasx(src, dst, stride, coeff0, coeff1);
721 static void avc_chroma_vt_8w_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
726 avc_chroma_vt_8x4_lasx(src, dst, stride, coeff0, coeff1);
728 avc_chroma_vt_8x8_lasx(src, dst, stride, coeff0, coeff1);
732 static void copy_width4_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
741 "slli.d %[stride_2], %[stride], 1 \n\t"
742 "add.d %[stride_3], %[stride_2], %[stride] \n\t"
745 "ldx.wu %[tp1], %[src], %[stride] \n\t"
750 "ldx.wu %[tp5], %[src], %[stride] \n\t"
754 "stx.w %[tp1], %[dst], %[stride] \n\t"
759 "stx.w %[tp5], %[dst], %[stride] \n\t"
766 : [stride]"r"(stride)
773 "slli.d %[stride_2], %[stride], 1 \n\t"
774 "add.d %[stride_3], %[stride_2], %[stride] \n\t"
776 "ldx.wu %[tp1], %[src], %[stride] \n\t"
780 "stx.w %[tp1], %[dst], %[stride] \n\t"
786 : [stride]"r"(stride)
792 "ldx.wu %[tp1], %[src], %[stride] \n\t"
794 "stx.w %[tp1], %[dst], %[stride] \n\t"
796 : [src]"r"(src), [dst]"r"(dst), [stride]"r"(stride)
802 static void copy_width8_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
806 copy_width8x8_lasx(src, dst, stride);
808 copy_width8x4_lasx(src, dst, stride);
812 void ff_put_h264_chroma_mc4_lasx(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
818 avc_chroma_hv_4w_lasx(src, dst, stride, x, (8 - x), y, (8 - y), height);
820 avc_chroma_hz_4w_lasx(src, dst, stride, x, (8 - x), height);
822 avc_chroma_vt_4w_lasx(src, dst, stride, y, (8 - y), height);
824 copy_width4_lasx(src, dst, stride, height);
828 void ff_put_h264_chroma_mc8_lasx(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
834 copy_width8_lasx(src, dst, stride, height);
836 avc_chroma_hv_8w_lasx(src, dst, stride, x, (8 - x), y, (8 - y), height);
838 avc_chroma_hz_8w_lasx(src, dst, stride, x, (8 - x), height);
840 avc_chroma_vt_8w_lasx(src, dst, stride, y, (8 - y), height);
845 uint8_t *dst, ptrdiff_t stride, uint32_t coef_hor0,
849 ptrdiff_t stride_2x = stride << 1;
850 ptrdiff_t stride_3x = stride_2x + stride;
851 ptrdiff_t stride_4x = stride << 2;
863 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src, stride_3x, src, stride_4x,
877 DUP4_ARG2(__lasx_xvldx, dst, 0, dst, stride, dst, stride_2x, dst, stride_3x,
883 __lasx_xvstelm_d(out, dst + stride, 0, 2);
889 uint8_t *dst, ptrdiff_t stride, uint32_t coef_hor0,
893 ptrdiff_t stride_2x = stride << 1;
894 ptrdiff_t stride_3x = stride_2x + stride;
895 ptrdiff_t stride_4x = stride << 2;
909 src += stride;
910 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
913 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
937 DUP4_ARG2(__lasx_xvldx, dst, 0, dst, stride, dst, stride_2x, dst, stride_3x,
942 DUP4_ARG2(__lasx_xvldx, dst, 0, dst, stride, dst, stride_2x, dst, stride_3x,
950 __lasx_xvstelm_d(out0, dst + stride, 0, 2);
955 __lasx_xvstelm_d(out1, dst + stride, 0, 2);
961 uint8_t *dst, ptrdiff_t stride, uint32_t coeff0,
964 ptrdiff_t stride_2x = stride << 1;
965 ptrdiff_t stride_3x = stride_2x + stride;
976 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
982 DUP4_ARG2(__lasx_xvldx, dst, 0, dst, stride, dst, stride_2x, dst, stride_3x,
988 __lasx_xvstelm_d(out, dst + stride, 0, 2);
994 uint8_t *dst, ptrdiff_t stride, uint32_t coeff0,
997 ptrdiff_t stride_2x = stride << 1;
998 ptrdiff_t stride_3x = stride_2x + stride;
999 ptrdiff_t stride_4x = stride << 2;
1011 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
1014 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
1023 DUP4_ARG2(__lasx_xvldx, dst, 0, dst, stride, dst, stride_2x, dst, stride_3x,
1028 DUP4_ARG2(__lasx_xvldx, dst, 0, dst, stride, dst, stride_2x, dst, stride_3x,
1036 __lasx_xvstelm_d(out0, dst + stride, 0, 2);
1041 __lasx_xvstelm_d(out1, dst + stride, 0, 2);
1047 uint8_t *dst, ptrdiff_t stride, uint32_t coeff0,
1050 ptrdiff_t stride_2x = stride << 1;
1051 ptrdiff_t stride_3x = stride_2x + stride;
1052 ptrdiff_t stride_4x = stride << 2;
1062 DUP4_ARG2(__lasx_xvldx, src, stride, src, stride_2x, src, stride_3x, src, stride_4x,
1069 DUP4_ARG2(__lasx_xvldx, dst, 0, dst, stride, dst, stride_2x, dst, stride_3x,
1075 __lasx_xvstelm_d(out, dst + stride, 0, 2);
1081 uint8_t *dst, ptrdiff_t stride, uint32_t coeff0,
1084 ptrdiff_t stride_2x = stride << 1;
1085 ptrdiff_t stride_3x = stride_2x + stride;
1086 ptrdiff_t stride_4x = stride << 2;
1097 src += stride;
1098 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
1101 DUP4_ARG2(__lasx_xvldx, src, 0, src, stride, src, stride_2x, src, stride_3x,
1112 DUP4_ARG2(__lasx_xvldx, dst, 0, dst, stride, dst, stride_2x, dst, stride_3x,
1117 DUP4_ARG2(__lasx_xvldx, dst, 0, dst, stride, dst, stride_2x, dst, stride_3x,
1125 __lasx_xvstelm_d(out0, dst + stride, 0, 2);
1130 __lasx_xvstelm_d(out1, dst + stride, 0, 2);
1136 ptrdiff_t stride)
1140 ptrdiff_t stride_2x = stride << 1;
1141 ptrdiff_t stride_3x = stride_2x + stride;
1142 ptrdiff_t stride_4x = stride << 2;
1145 src1 = __lasx_xvldrepl_d(src + stride, 0);
1149 dst1 = __lasx_xvldrepl_d(dst + stride, 0);
1160 __lasx_xvstelm_d(dst0, dst + stride, 0, 1);
1167 src1 = __lasx_xvldrepl_d(src + stride, 0);
1171 dst1 = __lasx_xvldrepl_d(dst + stride, 0);
1182 __lasx_xvstelm_d(dst0, dst + stride, 0, 1);
1188 ptrdiff_t stride)
1192 ptrdiff_t stride_2x = stride << 1;
1193 ptrdiff_t stride_3x = stride_2x + stride;
1196 src1 = __lasx_xvldrepl_d(src + stride, 0);
1200 dst1 = __lasx_xvldrepl_d(dst + stride, 0);
1211 __lasx_xvstelm_d(dst0, dst + stride, 0, 1);
1217 ptrdiff_t stride,
1225 avc_chroma_hv_and_aver_dst_8x4_lasx(src, dst, stride, coef_hor0,
1228 avc_chroma_hv_and_aver_dst_8x8_lasx(src, dst, stride, coef_hor0,
1234 ptrdiff_t stride, uint32_t coeff0,
1238 avc_chroma_hz_and_aver_dst_8x4_lasx(src, dst, stride, coeff0, coeff1);
1240 avc_chroma_hz_and_aver_dst_8x8_lasx(src, dst, stride, coeff0, coeff1);
1245 ptrdiff_t stride, uint32_t coeff0,
1249 avc_chroma_vt_and_aver_dst_8x4_lasx(src, dst, stride, coeff0, coeff1);
1251 avc_chroma_vt_and_aver_dst_8x8_lasx(src, dst, stride, coeff0, coeff1);
1255 static void avg_width8_lasx(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
1259 avg_width8x8_lasx(src, dst, stride);
1261 avg_width8x4_lasx(src, dst, stride);
1265 void ff_avg_h264_chroma_mc8_lasx(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
1271 avg_width8_lasx(src, dst, stride, height);
1273 avc_chroma_hv_and_aver_dst_8w_lasx(src, dst, stride, x, (8 - x), y,
1276 avc_chroma_hz_and_aver_dst_8w_lasx(src, dst, stride, x, (8 - x), height);
1278 avc_chroma_vt_and_aver_dst_8w_lasx(src, dst, stride, y, (8 - y), height);