Lines Matching refs:v4

32         ld1             {v3.16b, v4.16b}, [x0], #32
39 shl v19.8h, v4.8h, #4 // 16 * src[24]
54 mla v7.8h, v4.8h, v0.h[2] // 16 * src[8] + 15 * src[24] + 4 * src[56]
58 mla v18.8h, v4.8h, v0.h[1] // - 4 * src[8] + 9 * src[24] + 16 * src[56]
59 add v4.8h, v1.8h, v5.8h // t1/2 = 12/2 * src[0] + 12/2 * src[32]
67 add v22.8h, v4.8h, v3.8h // t5/2 = t1/2 + t3/2
69 sub v17.8h, v4.8h, v3.8h // t8/2 = t1/2 - t3/2
70 add v23.8h, v4.8h, v3.8h // t5/2 = t1/2 + t3/2
73 sub v2.8h, v4.8h, v3.8h // t8/2 = t1/2 - t3/2
75 neg v4.8h, v20.8h // +t2
80 ssra v5.8h, v4.8h, #1 // (t6 + t2) >> 1
87 srshr v4.8h, v5.8h, #2 // (t6 + t2 + 4) >> 3
94 trn2 v17.8h, v3.8h, v4.8h
102 trn1 v3.8h, v3.8h, v4.8h
103 trn2 v4.2d, v21.2d, v18.2d
111 shl v17.8h, v4.8h, #4 // 16 * src[40]
116 shl v23.8h, v4.8h, #2 // 4 * src[40]
147 mls v16.8h, v4.8h, v0.h[2] // -t4 = - 4 * src[8] + 9 * src[24] - 15 * src[40] + 16 * src[56]
150 mla v2.8h, v4.8h, v0.h[1] // t1 = 16 * src[8] + 15 * src[24] + 9 * src[40] + 4 * src[56]
152 neg v4.8h, v5.8h // +t3
161 ssra v19.8h, v4.8h, #1 // (t7 + t3) >> 1
169 srshr v4.8h, v19.8h, #6 // (t7 + t3 + 64) >> 7
176 st1 {v4.16b, v5.16b}, [x1], #32
190 ld1 {v1.8b, v2.8b, v3.8b, v4.8b}, [x2], #32
196 trn2 v7.4h, v2.4h, v4.4h
198 trn1 v2.4h, v2.4h, v4.4h
200 trn2 v4.4h, v17.4h, v19.4h
206 trn1 v6.2s, v7.2s, v4.2s
207 trn2 v4.2s, v7.2s, v4.2s
216 shl v23.4h, v4.4h, #4 // 16 * src[7]
237 mla v17.4h, v4.4h, v0.h[1] // -t2 = - 15 * src[1] + 4 * src[3] + 16 * src[5] + 9 * src[7]
239 mls v16.4h, v4.4h, v0.h[2] // -t3 = - 9 * src[1] + 16 * src[3] - 4 * src[5] - 15 * src[7]
242 sub v4.4h, v7.4h, v20.4h // t2/2 = 12/2 * src[0] - 12/2 * src[4]
246 add v20.4h, v4.4h, v2.4h // t6/2 = t2/2 + t4/2
247 sub v24.4h, v4.4h, v2.4h // t7/2 = t2/2 - t4/2
249 add v27.4h, v4.4h, v2.4h // t6/2 = t2/2 + t4/2
250 sub v2.4h, v4.4h, v2.4h // t7/2 = t2/2 - t4/2
253 neg v4.4h, v16.4h // +t3
258 ssra v24.4h, v4.4h, #1 // (t7 + t3) >> 1
266 trn1 v4.2d, v19.2d, v26.2d
270 srshr v4.8h, v4.8h, #2 // (t8 + t4 + 4) >> 3, (t5 - t1 + 4) >> 3
273 trn2 v2.8h, v3.8h, v4.8h
274 trn1 v3.8h, v3.8h, v4.8h
275 trn2 v4.4s, v6.4s, v2.4s
278 mul v3.8h, v4.8h, v0.h[5] // 22/2 * src[24]
280 mul v4.8h, v4.8h, v0.h[4] // 10/2 * src[24]
284 mla v4.8h, v2.8h, v0.h[5] // t3/2 = 22/2 * src[8] + 10/2 * src[24]
288 neg v6.8h, v4.8h // -t3/2
289 ssra v4.8h, v0.8h, #1 // (t1 + t3) >> 1
293 srshr v0.8h, v4.8h, #6 // (t1 + t3 + 64) >> 7
326 ld1 {v4.d}[0], [x2], x3 // 30 31 32 33
330 ld1 {v4.d}[1], [x2] // 70 71 72 73
336 trn2 v2.8h, v3.8h, v4.8h // 21 31 23 33 61 71 63 73
337 trn1 v3.8h, v3.8h, v4.8h // 20 30 22 32 60 70 62 72
338 ld1 {v4.s}[0], [x0], x1
350 ld1 {v4.s}[1], [x0]
437 uaddw v1.8h, v1.8h, v4.8b
467 ld1 {v4.d}[0], [x2] // 30 31 32 33
474 trn2 v2.4h, v3.4h, v4.4h // 21 31 23 33
475 trn1 v3.4h, v3.4h, v4.4h // 20 30 22 32
476 trn2 v4.2s, v7.2s, v2.2s // 03 13 23 33
480 mul v3.4h, v4.4h, v0.h[0] // 10/2 * src[3]
481 mul v4.4h, v4.4h, v0.h[1] // 22/2 * src[3]
485 mls v4.4h, v2.4h, v0.h[0] // t4/2 = - 10/2 * src[1] + 22/2 * src[3]
489 neg v16.4h, v4.4h // -t4/2
491 ssra v4.4h, v1.4h, #1 // (t2 + t4) >> 1
495 srshr v2.4h, v4.4h, #2 // (t2 + t4 + 64) >> 3
497 srshr v4.4h, v7.4h, #2 // (t1 - t3 + 64) >> 3
500 trn2 v3.4h, v2.4h, v4.4h // 12 13 32 33
501 trn1 v2.4h, v2.4h, v4.4h // 02 03 22 23
502 trn2 v4.2s, v7.2s, v3.2s // 30 31 32 33
506 mul v2.4h, v4.4h, v0.h[1] // 22/2 * src[24]
507 mul v4.4h, v4.4h, v0.h[0] // 10/2 * src[24]
511 mla v4.4h, v3.4h, v0.h[1] // t3/2 = 22/2 * src[8] + 10/2 * src[24]
515 neg v7.4h, v4.4h // -t3/2
516 ssra v4.4h, v0.4h, #1 // (t1 + t3) >> 1
520 trn1 v0.2d, v4.2d, v3.2d
550 ld1 {v4.8b}, [x0], x1
564 uaddw v4.8h, v16.8h, v4.8b
572 sqxtun v4.8b, v4.8h
580 st1 {v4.8b}, [x3], x1
607 dup v4.8h, w0
608 uaddw v0.8h, v4.8h, v0.8b
609 uaddw v1.8h, v4.8h, v1.8b
610 uaddw v2.8h, v4.8h, v2.8b
611 uaddw v3.8h, v4.8h, v3.8b
644 dup v4.8h, w2
648 uaddw v0.8h, v4.8h, v0.8b
649 uaddw v1.8h, v4.8h, v1.8b
650 uaddw v2.8h, v4.8h, v2.8b
651 uaddw v3.8h, v4.8h, v3.8b
718 ld1 {v4.s}[0], [x0], x1 // P6
727 uxtl v4.8h, v4.8b // P6
731 mls v17.4h, v4.4h, v0.h[1] // 2*P5-5*P6
743 mls v5.4h, v4.4h, v0.h[0] // 2*P3-5*P4+5*P5-2*P6
744 abs v4.4h, v3.4h
747 sshr v4.4h, v4.4h, #1 // clip
752 cmeq v16.4h, v4.4h, #0 // test clip == 0
766 cmhs v5.4h, v0.4h, v4.4h
768 bsl v5.8b, v4.8b, v0.8b // FFMIN(d, clip)
791 ld1 {v4.8b}, [x3]
795 trn1 v2.8b, v3.8b, v4.8b
796 trn2 v3.8b, v3.8b, v4.8b
797 trn1 v4.4h, v6.4h, v2.4h // P1, P5
801 ushll v3.8h, v4.8b, #1 // 2*P1, 2*P5
807 uxtl v4.8h, v4.8b // P1, P5
811 mov d4, v4.d[1] // P5
813 mla v2.4h, v4.4h, v0.h[1] // 2*P3-5*P4+5*P5
814 sub v7.4h, v1.4h, v4.4h // P4-P5
841 mla v4.4h, v0.4h, v2.4h // invert d depending on clip_sign & a0_sign, or zero it if they match, and accumulate into P5
843 sqxtun v3.8b, v4.8h
863 ld1 {v4.8b}, [x3], x1 // P2
870 uxtl v4.8h, v4.8b // P2
875 mls v3.8h, v4.8h, v0.h[1] // 2*P1-5*P2
876 uxtl v4.8h, v7.8b // P7
881 mla v16.8h, v4.8h, v0.h[1] // 2*P5-5*P6+5*P7
884 sub v4.8h, v7.8h, v1.8h // P4-P5
887 abs v17.8h, v4.8h
888 sshr v4.8h, v4.8h, #8 // clip_sign
904 sub v4.8h, v4.8h, v6.8h // clip_sign - a0_sign
920 mls v7.8h, v0.8h, v4.8h // invert d depending on clip_sign & a0_sign, or zero it if they match, and accumulate into P4
921 mla v1.8h, v0.8h, v4.8h // invert d depending on clip_sign & a0_sign, or zero it if they match, and accumulate into P5
942 ld1 {v4.8b}, [x3], x1
949 trn1 v2.8b, v3.8b, v4.8b // P1[2], P1[3], P3[2]...
950 trn2 v3.8b, v3.8b, v4.8b // P2[2], P2[3], P4[2]...
951 dup v4.8h, w2 // pq
1005 cmhs v4.8h, v19.8h, v4.8h // test a0 >= pq
1009 orr v4.16b, v6.16b, v4.16b // test clip == 0 || a0 >= pq
1011 orr v5.16b, v4.16b, v7.16b // test clip == 0 || a0 >= pq || a3 >= a0
1019 bic v0.16b, v5.16b, v4.16b // set each d to zero if it should not be filtered because clip == 0 || a0 >= pq (a3 > a0 case already zeroed by saturating sub)
1048 ld1 {v4.16b}, [x3], x1 // P2
1055 uxtl v19.8h, v4.8b // P2[0..7]
1062 uxtl2 v4.8h, v4.16b // P2[8..15]
1067 mls v3.8h, v4.8h, v0.h[1] // 2*P1[8..15]-5*P2[8..15]
1068 ushll v4.8h, v6.8b, #1 // 2*P3[0..7]
1078 mls v4.8h, v25.8h, v0.h[1] // 2*P3[0..7]-5*P4[0..7]
1095 mla v4.8h, v24.8h, v0.h[1] // 2*P3[0..7]-5*P4[0..7]+5*P5[0..7]
1105 mls v4.8h, v21.8h, v0.h[0] // 2*P3[0..7]-5*P4[0..7]+5*P5[0..7]-2*P6[0..7]
1111 srshr v4.8h, v4.8h, #3
1116 abs v17.8h, v4.8h // a0[0..7]
1117 sshr v4.8h, v4.8h, #8 // a0_sign[0..7]
1123 sub v4.8h, v26.8h, v4.8h // clip_sign[0..7] - a0_sign[0..7]
1151 mls v25.8h, v5.8h, v4.8h // invert d[0..7] depending on clip_sign[0..7] & a0_sign[0..7], or zero it if they match, and accumulate into P4[0..7]
1154 mla v24.8h, v5.8h, v4.8h // invert d[0..7] depending on clip_sign[0..7] & a0_sign[0..7], or zero it if they match, and accumulate into P5[0..7]
1182 ld1 {v4.8b}, [x3], x1
1191 trn1 v18.8b, v3.8b, v4.8b // P1[2], P1[3], P3[2]...
1193 trn2 v3.8b, v3.8b, v4.8b // P2[2], P2[3], P4[2]...
1194 ld1 {v4.8b}, [x3], x1
1207 trn1 v26.8b, v4.8b, v21.8b // P1[10], P1[11], P3[10]...
1208 trn2 v4.8b, v4.8b, v21.8b // P2[10], P2[11], P4[10]...
1217 trn1 v25.4h, v2.4h, v4.4h // P2[8], P2[9], P2[10], P2[11], P6[8]...
1234 trn2 v2.4h, v2.4h, v4.4h // P4[8], P4[9], P4[10], P4[11], P8[8]...
1235 trn2 v4.4h, v6.4h, v17.4h // P4[12], P4[13], P4[14], P4[15], P8[12]...
1251 trn1 v18.2s, v2.2s, v4.2s // P4[8..15]
1262 trn2 v2.2s, v2.2s, v4.2s // P8[8..15]
1263 uxtl v4.8h, v18.8b // P4[8..15]
1274 sub v22.8h, v4.8h, v18.8h // P4[8..15]-P5[8..15]
1277 mls v3.8h, v4.8h, v0.h[1] // 2*P3[8..15]-5*P4[8..15]
1286 mls v24.8h, v4.8h, v0.h[0] // 2*P1[8..15]-5*P2[8..15]+5*P3[8..15]-2*P4[8..15]
1340 mls v4.8h, v0.8h, v1.8h // invert d[8..15] depending on clip_sign[8..15] & a0_sign[8..15], or zero it if they match, and accumulate into P4
1344 sqxtun v2.8b, v4.8h
1424 1: ld1 {v3.16b, v4.16b, v5.16b}, [x0], #48
1425 ext v25.16b, v3.16b, v4.16b, #1
1426 ext v26.16b, v3.16b, v4.16b, #2
1427 ext v27.16b, v3.16b, v4.16b, #3
1428 ext v29.16b, v4.16b, v5.16b, #1
1429 ext v30.16b, v4.16b, v5.16b, #2
1430 ext v31.16b, v4.16b, v5.16b, #3
1435 bic v28.16b, v4.16b, v20.16b
1494 st1 {v3.16b, v4.16b}, [x2], #32
1496 ld1 {v4.16b, v5.16b}, [x0], #32
1505 ext v25.16b, v3.16b, v4.16b, #1
1507 ext v26.16b, v3.16b, v4.16b, #2
1508 ext v27.16b, v3.16b, v4.16b, #3
1509 ext v29.16b, v4.16b, v5.16b, #1
1511 ext v30.16b, v4.16b, v5.16b, #2
1512 ext v31.16b, v4.16b, v5.16b, #3
1518 bic v28.16b, v4.16b, v20.16b