Lines Matching refs:v2

31         ld1             {v1.16b, v2.16b}, [x0], #32
37 shl v7.8h, v2.8h, #4 // 16 * src[8]
38 shl v18.8h, v2.8h, #2 // 4 * src[8]
51 mls v20.8h, v2.8h, v0.h[2] // - 15 * src[8] + 4 * src[24] + 16 * src[40]
56 mls v19.8h, v2.8h, v0.h[1] // - 9 * src[8] + 16 * src[24] - 4 * src[40]
57 sub v2.8h, v23.8h, v21.8h // t4/2 = 6/2 * src[16] - 16/2 * src[48]
63 add v5.8h, v1.8h, v2.8h // t6/2 = t2/2 + t4/2
64 sub v16.8h, v1.8h, v2.8h // t7/2 = t2/2 - t4/2
66 add v21.8h, v1.8h, v2.8h // t6/2 = t2/2 + t4/2
72 sub v1.8h, v1.8h, v2.8h // t7/2 = t2/2 - t4/2
73 sub v2.8h, v4.8h, v3.8h // t8/2 = t1/2 - t3/2
82 ssra v2.8h, v18.8h, #1 // (t8 - t4) >> 1
90 srshr v2.8h, v2.8h, #2 // (t8 - t4 + 4) >> 3
96 trn2 v19.8h, v2.8h, v1.8h
106 trn1 v1.8h, v2.8h, v1.8h
107 trn1 v2.8h, v7.8h, v16.8h
113 trn1 v19.4s, v1.4s, v2.4s
118 trn2 v1.4s, v1.4s, v2.4s
119 shl v2.8h, v6.8h, #4 // 16 * src[8]
126 ssra v2.8h, v21.8h, #2 // 16 * src[8] + 4 * src[56]
138 mla v2.8h, v20.8h, v0.h[2] // 16 * src[8] + 15 * src[24] + 4 * src[56]
150 mla v2.8h, v4.8h, v0.h[1] // t1 = 16 * src[8] + 15 * src[24] + 9 * src[40] + 4 * src[56]
159 neg v3.8h, v2.8h // -t1
160 ssra v18.8h, v2.8h, #1 // (t5 + t1) >> 1
167 srshr v2.8h, v18.8h, #6 // (t5 + t1 + 64) >> 7
173 st1 {v2.16b, v3.16b}, [x1], #32
190 ld1 {v1.8b, v2.8b, v3.8b, v4.8b}, [x2], #32
196 trn2 v7.4h, v2.4h, v4.4h
198 trn1 v2.4h, v2.4h, v4.4h
209 trn1 v20.2s, v2.2s, v17.2s
213 trn2 v2.2s, v2.2s, v17.2s
234 shl v3.4h, v2.4h, #3 // 16/2 * src[6]
235 mla v1.4h, v2.4h, v0.h[0] // t3/2 = 16/2 * src[2] + 6/2 * src[6]
238 sub v2.4h, v24.4h, v3.4h // t4/2 = 6/2 * src[2] - 16/2 * src[6]
246 add v20.4h, v4.4h, v2.4h // t6/2 = t2/2 + t4/2
247 sub v24.4h, v4.4h, v2.4h // t7/2 = t2/2 - t4/2
249 add v27.4h, v4.4h, v2.4h // t6/2 = t2/2 + t4/2
250 sub v2.4h, v4.4h, v2.4h // t7/2 = t2/2 - t4/2
260 ssra v2.4h, v16.4h, #1 // (t7 - t3) >> 1
264 trn1 v2.2d, v20.2d, v2.2d
268 srshr v2.8h, v2.8h, #2 // (t6 + t2 + 4) >> 3, (t7 - t3 + 4) >> 3
271 trn2 v6.8h, v1.8h, v2.8h
272 trn1 v1.8h, v1.8h, v2.8h
273 trn2 v2.8h, v3.8h, v4.8h
275 trn2 v4.4s, v6.4s, v2.4s
279 trn1 v2.4s, v6.4s, v2.4s
283 mls v3.8h, v2.8h, v0.h[4] // t4/2 = - 10/2 * src[8] + 22/2 * src[24]
284 mla v4.8h, v2.8h, v0.h[5] // t3/2 = 22/2 * src[8] + 10/2 * src[24]
287 neg v2.8h, v3.8h // -t4/2
290 ssra v2.8h, v1.8h, #1 // (t2 - t4) >> 1
294 srshr v1.8h, v2.8h, #6 // (t2 - t4 + 64) >> 7
295 srshr v2.8h, v3.8h, #6 // (t2 + t4 + 64) >> 7
299 uaddw v2.8h, v2.8h, v22.8b
303 sqxtun v2.8b, v2.8h
307 st1 {v2.8b}, [x3], x1
324 ld1 {v2.d}[0], [x2], x3 // 10 11 12 13
328 ld1 {v2.d}[1], [x2], x3 // 50 51 52 53
334 trn2 v16.8h, v1.8h, v2.8h // 01 11 03 13 41 51 43 53
335 trn1 v1.8h, v1.8h, v2.8h // 00 10 02 12 40 50 42 52
336 trn2 v2.8h, v3.8h, v4.8h // 21 31 23 33 61 71 63 73
339 trn2 v17.4s, v16.4s, v2.4s // 03 13 23 33 43 53 63 73
341 trn1 v2.4s, v16.4s, v2.4s // 01 11 21 31 41 51 61 71
351 mla v16.8h, v2.8h, v0.h[5] // t3/2 = 22/2 * src[1] + 10/2 * src[3]
352 mls v17.8h, v2.8h, v0.h[4] // t4/2 = - 10/2 * src[1] + 22/2 * src[3]
353 add v2.8h, v3.8h, v1.8h // t1 = 17 * src[0] + 17 * src[2]
356 ssra v16.8h, v2.8h, #1 // (t1 + t3) >> 1
359 ssra v3.8h, v2.8h, #1 // (t1 - t3) >> 1
362 srshr v2.8h, v17.8h, #2 // (t2 + t4 + 64) >> 3
365 trn2 v17.8h, v2.8h, v3.8h // 12 13 32 33 52 53 72 73
368 trn1 v2.8h, v2.8h, v3.8h // 02 03 22 23 42 43 62 63
371 trn1 v17.4s, v1.4s, v2.4s // 00 01 02 03 40 41 42 43
381 trn2 v1.4s, v1.4s, v2.4s // 20 21 22 23 60 61 62 63
383 sub v2.4h, v25.4h, v23.4h // - 4 * src[8] + 16 * src[56]
395 mla v2.4h, v16.4h, v0.h[1] // - 4 * src[8] + 9 * src[24] + 16 * src[56]
406 mls v2.4h, v18.4h, v0.h[2] // -t4 = - 4 * src[8] + 9 * src[24] - 15 * src[40] + 16 * src[56]
417 neg v27.4h, v2.4h // +t4
419 srsra v0.4h, v2.4h, #1 // (t8 - t4 + 1) >> 1
427 trn1 v2.2d, v18.2d, v22.2d
431 srshr v2.8h, v2.8h, #6 // (t6 + t2 + 64) >> 7, (t7 - t3 + 65) >> 7
435 uaddw v2.8h, v2.8h, v6.8b
439 sqxtun v2.8b, v2.8h
443 st1 {v2.s}[0], [x4], x1
447 st1 {v2.s}[1], [x4], x1
465 ld1 {v2.d}[0], [x2], x3 // 10 11 12 13
471 trn2 v7.4h, v1.4h, v2.4h // 01 11 03 13
472 trn1 v1.4h, v1.4h, v2.4h // 00 10 02 12
474 trn2 v2.4h, v3.4h, v4.4h // 21 31 23 33
476 trn2 v4.2s, v7.2s, v2.2s // 03 13 23 33
478 trn1 v2.2s, v7.2s, v2.2s // 01 11 21 31
484 mla v3.4h, v2.4h, v0.h[1] // t3/2 = 22/2 * src[1] + 10/2 * src[3]
485 mls v4.4h, v2.4h, v0.h[0] // t4/2 = - 10/2 * src[1] + 22/2 * src[3]
486 add v2.4h, v7.4h, v1.4h // t1 = 17 * src[0] + 17 * src[2]
490 ssra v3.4h, v2.4h, #1 // (t1 + t3) >> 1
493 ssra v7.4h, v2.4h, #1 // (t1 - t3) >> 1
495 srshr v2.4h, v4.4h, #2 // (t2 + t4 + 64) >> 3
500 trn2 v3.4h, v2.4h, v4.4h // 12 13 32 33
501 trn1 v2.4h, v2.4h, v4.4h // 02 03 22 23
503 trn1 v16.2s, v1.2s, v2.2s // 00 01 02 03
505 trn2 v1.2s, v1.2s, v2.2s // 20 21 22 23
506 mul v2.4h, v4.4h, v0.h[1] // 22/2 * src[24]
510 mls v2.4h, v3.4h, v0.h[0] // t4/2 = - 10/2 * src[8] + 22/2 * src[24]
514 neg v3.4h, v2.4h // -t4/2
518 ssra v2.4h, v1.4h, #1 // (t2 + t4) >> 1
521 trn1 v1.2d, v2.2d, v7.2d
547 ld1 {v2.8b}, [x0], x1
562 uaddw v2.8h, v16.8h, v2.8b
570 sqxtun v2.8b, v2.8h
577 st1 {v2.8b}, [x3], x1
578 sqxtun v2.8b, v7.8h
583 st1 {v2.8b}, [x3]
599 ld1 {v2.8b}, [x0], x1
610 uaddw v2.8h, v4.8h, v2.8b
614 sqxtun v2.8b, v2.8h
618 st1 {v2.8b}, [x3], x1
635 ld1 {v2.s}[0], [x0], x1
646 ld1 {v2.s}[1], [x0], x1
650 uaddw v2.8h, v4.8h, v2.8b
654 sqxtun v2.8b, v2.8h
658 st1 {v2.s}[0], [x3], x1
662 st1 {v2.s}[1], [x3], x1
687 dup v2.8h, w0
688 uaddw v0.8h, v2.8h, v0.8b
689 uaddw v1.8h, v2.8h, v1.8b
716 ld1 {v2.s}[0], [x3], x1 // P1
725 ushll v2.8h, v2.8b, #1 // 2*P1
729 mls v2.4h, v3.4h, v0.h[1] // 2*P1-5*P2
736 mla v2.4h, v19.4h, v0.h[1] // 2*P1-5*P2+5*P3
741 mls v2.4h, v6.4h, v0.h[0] // 2*P1-5*P2+5*P3-2*P4
746 srshr v2.4h, v2.4h, #3
751 abs v2.4h, v2.4h // a1
755 cmhs v19.4h, v2.4h, v7.4h // test a1 >= a2
758 bsl v19.8b, v7.8b, v2.8b // a3
759 orr v2.8b, v16.8b, v18.8b // test clip == 0 || a0 >= pq
763 orr v5.8b, v2.8b, v7.8b // test clip == 0 || a0 >= pq || a3 >= a0
769 bic v0.8b, v5.8b, v2.8b // set each d to zero if it should not be filtered because clip == 0 || a0 >= pq (a3 > a0 case already zeroed by saturating sub)
789 ld1 {v2.8b}, [x3], x1
793 trn1 v6.8b, v1.8b, v2.8b
794 trn2 v1.8b, v1.8b, v2.8b
795 trn1 v2.8b, v3.8b, v4.8b
797 trn1 v4.4h, v6.4h, v2.4h // P1, P5
799 trn2 v2.4h, v6.4h, v2.4h // P3, P7
803 uxtl v7.8h, v2.8b // P3, P7
806 ushll v2.8h, v2.8b, #1 // 2*P3, 2*P7
812 mls v2.4h, v1.4h, v0.h[1] // 2*P3-5*P4
813 mla v2.4h, v4.4h, v0.h[1] // 2*P3-5*P4+5*P5
815 mls v2.4h, v6.4h, v0.h[0] // 2*P3-5*P4+5*P5-2*P6
819 srshr v2.4h, v2.4h, #3
823 abs v17.4h, v2.4h // a0
825 sshr v2.4h, v2.4h, #8 // a0_sign
828 sub v2.4h, v7.4h, v2.4h // clip_sign - a0_sign
841 mla v4.4h, v0.4h, v2.4h // invert d depending on clip_sign & a0_sign, or zero it if they match, and accumulate into P5
842 mls v1.4h, v0.4h, v2.4h // invert d depending on clip_sign & a0_sign, or zero it if they match, and accumulate into P4
844 sqxtun v2.8b, v1.8h
845 st2 {v2.b, v3.b}[0], [x0], x1
846 st2 {v2.b, v3.b}[1], [x0], x1
847 st2 {v2.b, v3.b}[2], [x0], x1
848 st2 {v2.b, v3.b}[3], [x0]
861 movi v2.2d, #0x0000ffff00000000
910 cmtst v2.2d, v5.2d, v2.2d // if 2nd of each group of is not filtered, then none of the others in the group should be either
914 orr v2.16b, v3.16b, v2.16b
919 bic v0.16b, v3.16b, v2.16b // set each d to zero if it should not be filtered
939 ld1 {v2.8b}, [x3], x1
946 trn1 v16.8b, v1.8b, v2.8b // P1[0], P1[1], P3[0]...
948 trn2 v1.8b, v1.8b, v2.8b // P2[0], P2[1], P4[0]...
949 trn1 v2.8b, v3.8b, v4.8b // P1[2], P1[3], P3[2]...
954 trn1 v6.4h, v16.4h, v2.4h // P1[0], P1[1], P1[2], P1[3], P5[0]...
958 trn2 v2.4h, v16.4h, v2.4h // P3[0], P3[1], P3[2], P3[3], P7[0]...
968 trn1 v16.2s, v2.2s, v17.2s // P3
969 trn2 v2.2s, v2.2s, v17.2s // P7
978 uxtl v2.8h, v2.8b // P7
983 mla v19.8h, v2.8h, v0.h[1] // 2*P5-5*P6+5*P7
984 uxtl v2.8h, v3.8b // P5
986 sub v3.8h, v16.8h, v2.8h // P4-P5
993 mla v5.8h, v2.8h, v0.h[1] // 2*P3-5*P4+5*P5
1020 mla v2.8h, v0.8h, v3.8h // invert d depending on clip_sign & a0_sign, or zero it if they match, and accumulate into P5
1022 sqxtun v1.8b, v2.8h
1046 movi v2.2d, #0x0000ffff00000000
1135 cmtst v17.2d, v5.2d, v2.2d // if 2nd of each group of is not filtered, then none of the others in the group should be either
1143 cmtst v2.2d, v5.2d, v2.2d
1149 orr v2.16b, v7.16b, v2.16b
1153 bic v0.16b, v3.16b, v2.16b // set each d[8..15] to zero if it should not be filtered because clip[8..15] == 0 || a0[8..15] >= pq (a3 > a0 case already zeroed by saturating sub)
1157 sqxtun v2.8b, v25.8h
1161 sqxtun2 v2.16b, v18.8h
1163 st1 {v2.16b}, [x3], x1
1178 ld1 {v2.8b}, [x3], x1
1187 trn1 v16.8b, v1.8b, v2.8b // P1[0], P1[1], P3[0]...
1189 trn2 v1.8b, v1.8b, v2.8b // P2[0], P2[1], P4[0]...
1190 ld1 {v2.8b}, [x3], x1
1203 trn1 v24.8b, v2.8b, v19.8b // P1[8], P1[9], P3[8]...
1205 trn2 v2.8b, v2.8b, v19.8b // P2[8], P2[9], P4[8]...
1217 trn1 v25.4h, v2.4h, v4.4h // P2[8], P2[9], P2[10], P2[11], P6[8]...
1234 trn2 v2.4h, v2.4h, v4.4h // P4[8], P4[9], P4[10], P4[11], P8[8]...
1251 trn1 v18.2s, v2.2s, v4.2s // P4[8..15]
1262 trn2 v2.2s, v2.2s, v4.2s // P8[8..15]
1267 uxtl v2.8h, v2.8b // P8[8..15]
1289 mls v25.8h, v2.8h, v0.h[0] // 2*P5[8..15]-5*P6[8..15]+5*P7[8..15]-2*P8[8..15]
1290 srshr v2.8h, v6.8h, #3
1297 abs v2.8h, v2.8h // a2[0..7]
1301 cmhs v20.8h, v5.8h, v2.8h // test a1[0..7] >= a2[0..7]
1305 bsl v20.16b, v2.16b, v5.16b // a3[0..7]
1306 abs v2.8h, v3.8h // a0[8..15]
1311 cmhs v7.8h, v2.8h, v19.8h // test a0[8..15] >= pq
1313 uqsub v3.8h, v2.8h, v24.8h // a0[8..15] >= a3[8..15] ? a0[8..15]-a3[8..15] : 0 (a0 > a3 in all cases where filtering is enabled, so makes more sense to subtract this way round than the opposite and then taking the abs)
1314 cmhs v2.8h, v24.8h, v2.8h // test a3[8..15] >= a0[8..15]
1323 orr v2.16b, v7.16b, v2.16b // test clip[8..15] == 0 || a0[8..15] >= pq || a3[8..15] >= a0[8..15]
1326 mov w7, v2.s[1]
1327 mov w8, v2.s[3]
1330 cmhs v2.8h, v3.8h, v26.8h
1333 bsl v2.16b, v26.16b, v3.16b // FFMIN(d[8..15], clip[8..15])
1337 bic v0.16b, v2.16b, v7.16b // set each d[8..15] to zero if it should not be filtered because clip[8..15] == 0 || a0[8..15] >= pq (a3 > a0 case already zeroed by saturating sub)
1339 bic v2.16b, v5.16b, v19.16b // set each d[0..7] to zero if it should not be filtered because clip[0..7] == 0 || a0[0..7] >= pq (a3 > a0 case already zeroed by saturating sub)
1342 mls v27.8h, v2.8h, v6.8h // invert d[0..7] depending on clip_sign[0..7] & a0_sign[0..7], or zero it if they match, and accumulate into P4
1343 mla v16.8h, v2.8h, v6.8h // invert d[0..7] depending on clip_sign[0..7] & a0_sign[0..7], or zero it if they match, and accumulate into P5
1344 sqxtun v2.8b, v4.8h
1360 st2 {v2.b, v3.b}[0], [x4], x1
1361 st2 {v2.b, v3.b}[1], [x4], x1
1362 st2 {v2.b, v3.b}[2], [x4], x1
1363 st2 {v2.b, v3.b}[3], [x4]
1365 st2 {v2.b, v3.b}[4], [x6], x1
1366 st2 {v2.b, v3.b}[5], [x6], x1
1367 st2 {v2.b, v3.b}[6], [x6], x1
1368 st2 {v2.b, v3.b}[7], [x6]
1394 ld1 {v0.16b, v1.16b, v2.16b}, [x0], #48
1398 ext v29.16b, v1.16b, v2.16b, #1
1399 ext v30.16b, v1.16b, v2.16b, #2
1400 ext v31.16b, v1.16b, v2.16b, #3
1453 ld1 {v1.16b, v2.16b}, [x0], #32
1466 ext v29.16b, v1.16b, v2.16b, #1
1468 ext v30.16b, v1.16b, v2.16b, #2
1469 ext v31.16b, v1.16b, v2.16b, #3
1495 3: mov v3.16b, v2.16b