Lines Matching refs:v18
38 shl v18.8h, v2.8h, #2 // 4 * src[8]
49 sub v18.8h, v22.8h, v18.8h // - 4 * src[8] + 16 * src[56]
58 mla v18.8h, v4.8h, v0.h[1] // - 4 * src[8] + 9 * src[24] + 16 * src[56]
71 mls v18.8h, v6.8h, v0.h[2] // -t4 = - 4 * src[8] + 9 * src[24] - 15 * src[40] + 16 * src[56]
79 neg v7.8h, v18.8h // +t4
82 ssra v2.8h, v18.8h, #1 // (t8 - t4) >> 1
95 trn2 v18.8h, v5.8h, v6.8h
98 trn1 v21.4s, v17.4s, v18.4s
99 trn2 v17.4s, v17.4s, v18.4s
100 trn1 v18.4s, v19.4s, v20.4s
103 trn2 v4.2d, v21.2d, v18.2d
108 trn1 v6.2d, v21.2d, v18.2d
112 trn1 v18.4s, v3.4s, v5.4s
123 trn1 v22.2d, v18.2d, v19.2d
124 trn2 v18.2d, v18.2d, v19.2d
129 shl v18.8h, v18.8h, #2 // 8/2 * src[32]
135 ssra v18.8h, v18.8h, #1 // 12/2 * src[32]
139 add v20.8h, v21.8h, v18.8h // t1/2 = 12/2 * src[0] + 12/2 * src[32]
141 sub v1.8h, v21.8h, v18.8h // t2/2 = 12/2 * src[0] - 12/2 * src[32]
146 add v18.8h, v20.8h, v6.8h // t5/2 = t1/2 + t3/2
160 ssra v18.8h, v2.8h, #1 // (t5 + t1) >> 1
167 srshr v2.8h, v18.8h, #6 // (t5 + t1 + 64) >> 7
192 ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [x2]
199 trn2 v3.4h, v16.4h, v18.4h
201 trn1 v16.4h, v16.4h, v18.4h
203 ld1 {v18.8b}, [x0], x1
298 uaddw v1.8h, v1.8h, v18.8b
340 trn1 v18.4s, v1.4s, v3.4s // 00 10 20 30 40 50 60 70
347 mul v3.8h, v18.8h, v0.h[6] // 17 * src[0]
357 neg v18.8h, v17.8h // -t4/2
360 ssra v18.8h, v1.8h, #1 // (t2 - t4) >> 1
364 srshr v16.8h, v18.8h, #2 // (t2 - t4 + 64) >> 3
366 trn2 v18.8h, v1.8h, v16.8h // 10 11 30 31 50 51 70 71
369 trn1 v3.4s, v18.4s, v17.4s // 10 11 12 13 50 51 52 53
370 trn2 v16.4s, v18.4s, v17.4s // 30 31 32 33 70 71 72 73
378 shl v24.4h, v18.4h, #4 // 16 * src[40]
380 shl v26.4h, v18.4h, #2 // 4 * src[40]
404 mla v19.4h, v18.4h, v0.h[1] // t1 = 16 * src[8] + 15 * src[24] + 9 * src[40] + 4 * src[56]
406 mls v2.4h, v18.4h, v0.h[2] // -t4 = - 4 * src[8] + 9 * src[24] - 15 * src[40] + 16 * src[56]
408 add v18.4h, v17.4h, v3.4h // t6/2 = t2/2 + t4/2
420 ssra v18.4h, v23.4h, #1 // (t6 + t2) >> 1
427 trn1 v2.2d, v18.2d, v22.2d
724 dup v18.8h, w2 // pq
756 cmhs v18.4h, v17.4h, v18.4h // test a0 >= pq
759 orr v2.8b, v16.8b, v18.8b // test clip == 0 || a0 >= pq
824 cmeq v18.4h, v6.4h, #0 // test clip == 0
830 orr v3.8b, v18.8b, v5.8b // test clip == 0 || a0 >= pq
871 ld1 {v18.8b}, [x0] // P8
880 uxtl v17.8h, v18.8b // P8
899 cmhs v18.8h, v3.8h, v16.8h // test a1 >= a2
902 bsl v18.16b, v16.16b, v3.16b // a3
905 uqsub v6.8h, v20.8h, v18.8h // a0 >= a3 ? a0-a3 : 0 (a0 > a3 in all cases where filtering is enabled, so makes more sense to subtract this way round than the opposite and then taking the abs)
906 cmhs v16.8h, v18.8h, v20.8h // test a3 >= a0
952 trn1 v18.8b, v5.8b, v6.8b // P1[4], P1[5], P3[4]...
960 trn1 v3.4h, v18.4h, v20.4h // P1[4], P1[5], P1[6], P1[7], P5[4]...
962 trn2 v17.4h, v18.4h, v20.4h // P3[4], P3[5], P3[6], P3[7], P7[4]...
965 trn1 v18.2s, v19.2s, v16.2s // P2
974 uxtl v5.8h, v18.8b // P2
976 uxtl v18.8h, v16.8b // P3
985 mla v7.8h, v18.8h, v0.h[1] // 2*P1-5*P2+5*P3
1001 cmhs v18.8h, v7.8h, v17.8h // test a1 >= a2
1004 bsl v18.16b, v17.16b, v7.16b // a3
1007 uqsub v5.8h, v19.8h, v18.8h // a0 >= a3 ? a0-a3 : 0 (a0 > a3 in all cases where filtering is enabled, so makes more sense to subtract this way round than the opposite and then taking the abs)
1008 cmhs v7.8h, v18.8h, v19.8h // test a3 >= a0
1054 ld1 {v18.16b}, [x3] // P4
1069 uxtl v25.8h, v18.8b // P4[0..7]
1076 uxtl2 v18.8h, v18.16b // P4[8..15]
1085 sub v7.8h, v18.8h, v1.8h // P4[8..15]-P5[8..15]
1086 mls v6.8h, v18.8h, v0.h[1] // 2*P3[8..15]-5*P4[8..15]
1098 mls v3.8h, v18.8h, v0.h[0] // 2*P1[8..15]-5*P2[8..15]+5*P3[8..15]-2*P4[8..15]
1156 mls v18.8h, v0.8h, v6.8h // invert d[8..15] depending on clip_sign[8..15] & a0_sign[8..15], or zero it if they match, and accumulate into P4[8..15]
1161 sqxtun2 v2.16b, v18.8h
1191 trn1 v18.8b, v3.8b, v4.8b // P1[2], P1[3], P3[2]...
1206 trn1 v19.4h, v16.4h, v18.4h // P1[0], P1[1], P1[2], P1[3], P5[0]...
1223 trn2 v16.4h, v16.4h, v18.4h // P3[0], P3[1], P3[2], P3[3], P7[0]...
1224 trn1 v18.4h, v6.4h, v17.4h // P2[12], P2[13], P2[14], P2[15], P6[12]...
1229 trn1 v26.2s, v25.2s, v18.2s // P2[8..15]
1230 trn2 v18.2s, v25.2s, v18.2s // P6[8..15]
1249 uxtl v17.8h, v18.8b // P6[8..15]
1251 trn1 v18.2s, v2.2s, v4.2s // P4[8..15]
1263 uxtl v4.8h, v18.8b // P4[8..15]
1270 uxtl v18.8h, v23.8b // P5[8..15]
1274 sub v22.8h, v4.8h, v18.8h // P4[8..15]-P5[8..15]
1293 mla v3.8h, v18.8h, v0.h[1] // 2*P3[8..15]-5*P4[8..15]+5*P5[8..15]
1345 mla v18.8h, v0.8h, v1.8h // invert d[8..15] depending on clip_sign[8..15] & a0_sign[8..15], or zero it if they match, and accumulate into P5
1348 sqxtun v3.8b, v18.8h