Lines Matching refs:v7
37 shl v7.8h, v2.8h, #4 // 16 * src[8]
48 ssra v7.8h, v22.8h, #2 // 16 * src[8] + 4 * src[56]
54 mla v7.8h, v4.8h, v0.h[2] // 16 * src[8] + 15 * src[24] + 4 * src[56]
62 mla v7.8h, v6.8h, v0.h[1] // t1 = 16 * src[8] + 15 * src[24] + 9 * src[40] + 4 * src[56]
74 neg v3.8h, v7.8h // -t1
77 ssra v22.8h, v7.8h, #1 // (t5 + t1) >> 1
79 neg v7.8h, v18.8h // +t4
83 ssra v17.8h, v7.8h, #1 // (t8 + t4) >> 1
92 srshr v7.8h, v21.8h, #2 // (t6 - t2 + 4) >> 3
97 trn2 v20.8h, v7.8h, v16.8h
107 trn1 v2.8h, v7.8h, v16.8h
109 trn2 v7.2d, v17.2d, v19.2d
114 shl v21.8h, v7.8h, #4 // 16 * src[56]
143 mla v17.8h, v7.8h, v0.h[1] // -t2 = - 15 * src[8] + 4 * src[24] + 16 * src[40] + 9 * src[56]
144 mls v5.8h, v7.8h, v0.h[2] // -t3 = - 9 * src[8] + 16 * src[24] - 4 * src[40] - 15 * src[56]
145 add v7.8h, v1.8h, v3.8h // t6/2 = t2/2 + t4/2
158 ssra v7.8h, v21.8h, #1 // (t6 + t2) >> 1
168 srshr v3.8h, v7.8h, #6 // (t6 + t2 + 64) >> 7
196 trn2 v7.4h, v2.4h, v4.4h
206 trn1 v6.2s, v7.2s, v4.2s
207 trn2 v4.2s, v7.2s, v4.2s
208 trn1 v7.2s, v1.2s, v16.2s
225 shl v7.4h, v7.4h, #2 // 8/2 * src[0]
230 ssra v7.4h, v7.4h, #1 // 12/2 * src[0]
240 add v3.4h, v7.4h, v20.4h // t1/2 = 12/2 * src[0] + 12/2 * src[4]
242 sub v4.4h, v7.4h, v20.4h // t2/2 = 12/2 * src[0] - 12/2 * src[4]
244 add v7.4h, v3.4h, v1.4h // t5/2 = t1/2 + t3/2
255 ssra v7.4h, v21.4h, #1 // (t5 + t1) >> 1
263 trn1 v1.2d, v7.2d, v1.2d
276 trn1 v7.4s, v1.4s, v3.4s
281 mul v6.8h, v7.8h, v0.h[6] // 17 * src[0]
333 ld1 {v7.s}[0], [x0], x1
348 ld1 {v7.s}[1], [x0], x1
436 uaddw v3.8h, v3.8h, v7.8b
471 trn2 v7.4h, v1.4h, v2.4h // 01 11 03 13
476 trn2 v4.2s, v7.2s, v2.2s // 03 13 23 33
478 trn1 v2.2s, v7.2s, v2.2s // 01 11 21 31
482 mul v7.4h, v16.4h, v0.h[2] // 17 * src[0]
486 add v2.4h, v7.4h, v1.4h // t1 = 17 * src[0] + 17 * src[2]
487 sub v1.4h, v7.4h, v1.4h // t2 = 17 * src[0] - 17 * src[2]
488 neg v7.4h, v3.4h // -t3/2
493 ssra v7.4h, v2.4h, #1 // (t1 - t3) >> 1
497 srshr v4.4h, v7.4h, #2 // (t1 - t3 + 64) >> 3
498 trn2 v7.4h, v1.4h, v3.4h // 10 11 30 31
502 trn2 v4.2s, v7.2s, v3.2s // 30 31 32 33
504 trn1 v3.2s, v7.2s, v3.2s // 10 11 12 13
508 mul v7.4h, v16.4h, v0.h[2] // 17 * src[0]
512 add v0.4h, v7.4h, v1.4h // t1 = 17 * src[0] + 17 * src[16]
513 sub v1.4h, v7.4h, v1.4h // t2 = 17 * src[0] - 17 * src[16]
515 neg v7.4h, v4.4h // -t3/2
519 ssra v7.4h, v0.4h, #1 // (t1 - t3) >> 1
521 trn1 v1.2d, v2.2d, v7.2d
556 ld1 {v7.8b}, [x0]
569 uaddw v7.8h, v16.8h, v7.8b
578 sqxtun v2.8b, v7.8h
721 ld1 {v7.s}[0], [x3] // P4
733 uxtl v6.8h, v7.8b // P4
745 srshr v7.4h, v17.4h, #3
749 abs v7.4h, v7.4h // a2
755 cmhs v19.4h, v2.4h, v7.4h // test a1 >= a2
758 bsl v19.8b, v7.8b, v2.8b // a3
761 cmhs v7.4h, v19.4h, v17.4h // test a3 >= a0
763 orr v5.8b, v2.8b, v7.8b // test clip == 0 || a0 >= pq || a3 >= a0
798 trn1 v7.4h, v1.4h, v3.4h // P2, P6
802 uxtl v6.8h, v7.8b // P2, P6
803 uxtl v7.8h, v2.8b // P3, P7
808 mla v3.8h, v7.8h, v0.h[1] // 2*P1-5*P2+5*P3, 2*P5-5*P6+5*P7
814 sub v7.4h, v1.4h, v4.4h // P4-P5
817 abs v6.4h, v7.4h
818 sshr v7.4h, v7.4h, #8 // clip_sign
828 sub v2.4h, v7.4h, v2.4h // clip_sign - a0_sign
832 cmhs v7.4h, v19.4h, v17.4h // test a3 >= a0
834 orr v5.8b, v3.8b, v7.8b // test clip == 0 || a0 >= pq || a3 >= a0
866 ld1 {v7.8b}, [x0], x1 // P7
876 uxtl v4.8h, v7.8b // P7
879 uxtl v7.8h, v17.8b // P4
884 sub v4.8h, v7.8h, v1.8h // P4-P5
885 mls v6.8h, v7.8h, v0.h[1] // 2*P3-5*P4
889 mls v3.8h, v7.8h, v0.h[0] // 2*P1-5*P2+5*P3-2*P4
920 mls v7.8h, v0.8h, v4.8h // invert d depending on clip_sign & a0_sign, or zero it if they match, and accumulate into P4
922 sqxtun v0.8b, v7.8h
945 ld1 {v7.8b}, [x3], x1
956 trn1 v20.8b, v7.8b, v17.8b // P1[6], P1[7], P3[6]...
957 trn2 v7.8b, v7.8b, v17.8b // P2[6], P2[7], P4[6]...
961 trn1 v16.4h, v5.4h, v7.4h // P2[4], P2[5], P2[6], P2[7], P6[4]...
963 trn2 v5.4h, v5.4h, v7.4h // P4[4], P4[5], P4[6], P4[7], P8[4]...
964 trn1 v7.2s, v6.2s, v3.2s // P1
970 ushll v7.8h, v7.8b, #1 // 2*P1
977 mls v7.8h, v5.8h, v0.h[1] // 2*P1-5*P2
985 mla v7.8h, v18.8h, v0.h[1] // 2*P1-5*P2+5*P3
991 mls v7.8h, v16.8h, v0.h[0] // 2*P1-5*P2+5*P3-2*P4
997 srshr v7.8h, v7.8h, #3
999 abs v7.8h, v7.8h // a1
1001 cmhs v18.8h, v7.8h, v17.8h // test a1 >= a2
1004 bsl v18.16b, v17.16b, v7.16b // a3
1008 cmhs v7.8h, v18.8h, v19.8h // test a3 >= a0
1011 orr v5.16b, v4.16b, v7.16b // test clip == 0 || a0 >= pq || a3 >= a0
1051 ld1 {v7.16b}, [x0], x1 // P7
1066 uxtl v24.8h, v7.8b // P7[0..7]
1073 uxtl2 v7.8h, v7.16b // P7[8..15]
1084 mla v19.8h, v7.8h, v0.h[1] // 2*P5[8..15]-5*P6[8..15]+5*P7[8..15]
1085 sub v7.8h, v18.8h, v1.8h // P4[8..15]-P5[8..15]
1090 abs v28.8h, v7.8h
1093 sshr v7.8h, v7.8h, #8 // clip_sign[8..15]
1131 sub v6.8h, v7.8h, v6.8h // clip_sign[8..15] - a0_sign[8..15]
1134 orr v7.16b, v29.16b, v21.16b // test clip[8..15] == 0 || a0[8..15] >= pq
1140 orr v5.16b, v7.16b, v16.16b // test clip[8..15] == 0 || a0[8..15] >= pq || a3[8..15] >= a0[8..15]
1149 orr v2.16b, v7.16b, v2.16b
1186 ld1 {v7.8b}, [x3], x1
1199 trn1 v22.8b, v7.8b, v17.8b // P1[6], P1[7], P3[6]...
1201 trn2 v7.8b, v7.8b, v17.8b // P2[6], P2[7], P4[6]...
1213 trn1 v23.4h, v5.4h, v7.4h // P2[4], P2[5], P2[6], P2[7], P6[4]...
1233 trn2 v3.4h, v5.4h, v7.4h // P4[4], P4[5], P4[6], P4[7], P8[4]...
1238 trn1 v7.2s, v16.2s, v20.2s // P3[0..7]
1252 uxtl v28.8h, v7.8b // P3[0..7]
1258 ushll v7.8h, v7.8b, #1 // 2*P3[0..7]
1275 mls v7.8h, v27.8h, v0.h[1] // 2*P3[0..7]-5*P4[0..7]
1291 mla v7.8h, v16.8h, v0.h[1] // 2*P3[0..7]-5*P4[0..7]+5*P5[0..7]
1299 mls v7.8h, v20.8h, v0.h[0] // 2*P3[0..7]-5*P4[0..7]+5*P5[0..7]-2*P6[0..7]
1304 srshr v7.8h, v7.8h, #3
1309 abs v5.8h, v7.8h // a0[0..7]
1310 sshr v6.8h, v7.8h, #8 // a0_sign[0..7]
1311 cmhs v7.8h, v2.8h, v19.8h // test a0[8..15] >= pq
1317 orr v7.16b, v28.16b, v7.16b // test clip[8..15] == 0 || a0[8..15] >= pq
1323 orr v2.16b, v7.16b, v2.16b // test clip[8..15] == 0 || a0[8..15] >= pq || a3[8..15] >= a0[8..15]
1337 bic v0.16b, v2.16b, v7.16b // set each d[8..15] to zero if it should not be filtered because clip[8..15] == 0 || a0[8..15] >= pq (a3 > a0 case already zeroed by saturating sub)