Lines Matching refs:out1
196 Outputs - out0, out1, out2, out3
198 Loads word in 'out1' from (psrc + stride)
202 #define LW4(psrc, stride, out0, out1, out2, out3) \
205 out1 = LW((psrc) + stride); \
210 #define LW2(psrc, stride, out0, out1) \
213 out1 = LW((psrc) + stride); \
219 Outputs - out0, out1
221 Loads double word in 'out1' from (psrc + stride)
223 #define LD2(psrc, stride, out0, out1) \
226 out1 = LD((psrc) + stride); \
228 #define LD4(psrc, stride, out0, out1, out2, out3) \
230 LD2((psrc), stride, out0, out1); \
267 Outputs - out0, out1
270 Loads elements in 'out1' from (psrc + stride)
272 #define LD_V2(RTYPE, psrc, stride, out0, out1) \
275 out1 = LD_V(RTYPE, (psrc) + stride); \
283 #define LD_V3(RTYPE, psrc, stride, out0, out1, out2) \
285 LD_V2(RTYPE, (psrc), stride, out0, out1); \
291 #define LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \
293 LD_V2(RTYPE, (psrc), stride, out0, out1); \
302 #define LD_V5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \
304 LD_V4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
310 #define LD_V6(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5) \
312 LD_V4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
321 out0, out1, out2, out3, out4, out5, out6) \
323 LD_V5(RTYPE, (psrc), stride, out0, out1, out2, out3, out4); \
330 out0, out1, out2, out3, out4, out5, out6, out7) \
332 LD_V4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
342 out0, out1, out2, out3, out4, out5, out6, out7, \
346 out0, out1, out2, out3, out4, out5, out6, out7); \
580 Outputs - out0, out1
590 #define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \
593 out1 = (RTYPE) __msa_aver_u_b((v16u8) in2, (v16u8) in3); \
598 out0, out1, out2, out3) \
600 AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \
617 #define SLDI_B2(RTYPE, d0, s0, d1, s1, slide_val, out0, out1) \
620 SLDI_B(RTYPE, d1, s1, slide_val, out1) \
628 out0, out1, out2) \
630 SLDI_B2(RTYPE, d0, s0, d1, s1, slide_val, out0, out1) \
638 slide_val, out0, out1, out2, out3) \
640 SLDI_B2(RTYPE, d0, s0, d1, s1, slide_val, out0, out1) \
649 Outputs - out0, out1
653 Selective byte elements from in2 & in3 are copied to out1 as
656 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
659 out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \
667 out0, out1, out2) \
669 VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \
675 out0, out1, out2, out3) \
677 VSHF_B2(RTYPE, in0, in1, in0, in1, mask0, mask1, out0, out1); \
685 Outputs - out0, out1
689 Selective halfword elements from in2 & in3 are copied to out1
692 #define VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
695 out1 = (RTYPE) __msa_vshf_h((v8i16) mask1, (v8i16) in3, (v8i16) in2); \
700 out0, out1, out2) \
702 VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \
709 Outputs - out0, out1
713 Selective byte elements from in2 & in3 are copied to out1 as
716 #define VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
719 out1 = (RTYPE) __msa_vshf_w((v4i32) mask1, (v4i32) in3, (v4i32) in2); \
726 Outputs - out0, out1
735 #define DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
738 out1 = (RTYPE) __msa_dotp_u_h((v16u8) mult1, (v16u8) cnst1); \
744 out0, out1, out2, out3) \
746 DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
754 Outputs - out0, out1
763 #define DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
766 out1 = (RTYPE) __msa_dotp_s_h((v16i8) mult1, (v16i8) cnst1); \
771 out0, out1, out2) \
773 DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
779 cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \
781 DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
789 Outputs - out0, out1
798 #define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
801 out1 = (RTYPE) __msa_dotp_s_w((v8i16) mult1, (v8i16) cnst1); \
807 out0, out1, out2, out3) \
809 DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
817 Outputs - out0, out1
826 #define DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
830 out1 = (RTYPE) __msa_dpadd_s_h((v8i16) out1, \
836 cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \
838 DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
846 Outputs - out0, out1
855 #define DPADD_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
859 out1 = (RTYPE) __msa_dpadd_u_h((v8u16) out1, \
867 Outputs - out0, out1
876 #define DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
880 out1 = (RTYPE) __msa_dpadd_s_w((v4i32) out1, \
886 cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \
888 DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
1033 Outputs - out0, out1
1039 #define HADD_SB2(RTYPE, in0, in1, out0, out1) \
1042 out1 = (RTYPE) __msa_hadd_s_h((v16i8) in1, (v16i8) in1); \
1046 #define HADD_SB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
1048 HADD_SB2(RTYPE, in0, in1, out0, out1); \
1056 Outputs - out0, out1
1062 #define HADD_UB2(RTYPE, in0, in1, out0, out1) \
1065 out1 = (RTYPE) __msa_hadd_u_h((v16u8) in1, (v16u8) in1); \
1069 #define HADD_UB3(RTYPE, in0, in1, in2, out0, out1, out2) \
1071 HADD_UB2(RTYPE, in0, in1, out0, out1); \
1076 #define HADD_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
1078 HADD_UB2(RTYPE, in0, in1, out0, out1); \
1087 Outputs - out0, out1
1093 #define HSUB_UB2(RTYPE, in0, in1, out0, out1) \
1096 out1 = (RTYPE) __msa_hsub_u_h((v16u8) in1, (v16u8) in1); \
1101 #define HSUB_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
1103 HSUB_UB2(RTYPE, in0, in1, out0, out1); \
1176 Outputs - out0, out1
1181 elements of 'in3' are interleaved and copied to 'out1'
1183 #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
1186 out1 = (RTYPE) __msa_ilvev_b((v16i8) in3, (v16i8) in2); \
1195 Outputs - out0, out1
1200 elements of 'in3' are interleaved and copied to 'out1'
1202 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
1205 out1 = (RTYPE) __msa_ilvev_h((v8i16) in3, (v8i16) in2); \
1213 Outputs - out0, out1
1218 elements of 'in3' are interleaved and copied to 'out1'
1220 #define ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
1223 out1 = (RTYPE) __msa_ilvev_w((v4i32) in3, (v4i32) in2); \
1232 Outputs - out0, out1
1237 elements of 'in3' are interleaved and copied to 'out1'
1239 #define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1242 out1 = (RTYPE) __msa_ilvev_d((v2i64) in3, (v2i64) in2); \
1250 Outputs - out0, out1
1255 elements of in3 are interleaved and copied to out1.
1257 #define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
1260 out1 = (RTYPE) __msa_ilvl_b((v16i8) in2, (v16i8) in3); \
1268 out0, out1, out2, out3) \
1270 ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1280 Outputs - out0, out1
1285 elements of in3 are interleaved and copied to out1.
1287 #define ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
1290 out1 = (RTYPE) __msa_ilvl_h((v8i16) in2, (v8i16) in3); \
1296 out0, out1, out2, out3) \
1298 ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
1306 Outputs - out0, out1
1311 elements of in3 are interleaved and copied to out1.
1313 #define ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
1316 out1 = (RTYPE) __msa_ilvl_w((v4i32) in2, (v4i32) in3); \
1324 Outputs - out0, out1, out2, out3
1329 elements of in3 are interleaved and copied to out1.
1332 #define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
1335 out1 = (RTYPE) __msa_ilvr_b((v16i8) in2, (v16i8) in3); \
1343 #define ILVR_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
1345 ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1354 out0, out1, out2, out3) \
1356 ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1367 out0, out1, out2, out3, out4, out5, out6, out7) \
1370 out0, out1, out2, out3); \
1379 Outputs - out0, out1, out2, out3
1384 halfword elements of in3 are interleaved and copied to out1.
1387 #define ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
1390 out1 = (RTYPE) __msa_ilvr_h((v8i16) in2, (v8i16) in3); \
1395 #define ILVR_H3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
1397 ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
1403 out0, out1, out2, out3) \
1405 ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
1411 #define ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
1414 out1 = (RTYPE) __msa_ilvr_w((v4i32) in2, (v4i32) in3); \
1421 out0, out1, out2, out3) \
1423 ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1); \
1431 Outputs - out0, out1, out2, out3
1436 double word elements of in3 are interleaved and copied to out1.
1438 #define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1441 out1 = (RTYPE) __msa_ilvr_d((v2i64) in2, (v2i64) in3); \
1447 #define ILVR_D3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
1449 ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
1455 out0, out1, out2, out3) \
1457 ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
1465 Outputs - out0, out1
1470 double word elements of in3 are interleaved and copied to out1.
1472 #define ILVL_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1475 out1 = (RTYPE) __msa_ilvl_d((v2i64) in2, (v2i64) in3); \
1483 Outputs - out0, out1
1488 interleaved and stored to 'out1'
1490 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
1493 out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \
1501 #define ILVRL_H2(RTYPE, in0, in1, out0, out1) \
1504 out1 = (RTYPE) __msa_ilvl_h((v8i16) in0, (v8i16) in1); \
1511 #define ILVRL_W2(RTYPE, in0, in1, out0, out1) \
1514 out1 = (RTYPE) __msa_ilvl_w((v4i32) in0, (v4i32) in1); \
1644 Outputs - out0, out1
1650 #define SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) \
1653 out1 = (RTYPE) __msa_splati_h((v8i16) in, idx1); \
1659 out0, out1, out2) \
1661 SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1); \
1668 out0, out1, out2, out3) \
1670 SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1); \
1679 Outputs - out0, out1
1684 elements in 'out1' vector
1687 #define SPLATI_W2(RTYPE, in, stidx, out0, out1) \
1690 out1 = (RTYPE) __msa_splati_w((v4i32) in, (stidx+1)); \
1695 #define SPLATI_W4(RTYPE, in, out0, out1, out2, out3) \
1697 SPLATI_W2(RTYPE, in, 0, out0, out1); \
1705 Outputs - out0, out1
1711 out1 & even byte elements of in3 are copied to the right
1712 half of out1.
1714 #define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
1717 out1 = (RTYPE) __msa_pckev_b((v16i8) in2, (v16i8) in3); \
1724 #define PCKEV_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
1726 PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1733 out0, out1, out2, out3) \
1735 PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1745 Outputs - out0, out1
1751 out1 & even halfword elements of in3 are copied to the right
1752 half of out1.
1754 #define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
1757 out1 = (RTYPE) __msa_pckev_h((v8i16) in2, (v8i16) in3); \
1763 out0, out1, out2, out3) \
1765 PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
1773 Outputs - out0, out1
1779 out1 & even double elements of in3 are copied to the right
1780 half of out1.
1782 #define PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1785 out1 = (RTYPE) __msa_pckev_d((v2i64) in2, (v2i64) in3); \
1792 out0, out1, out2, out3) \
1794 PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
1801 Outputs - out0, out1
1806 element is overwritten to index 0 and result is written to out1
1808 #define PCKOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1811 out1 = (RTYPE) __msa_pckod_d((v2i64) in2, (v2i64) in3); \
1885 Outputs - out0, out1
1892 #define ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1) \
1895 out1 = (RTYPE) __msa_adds_s_h((v8i16) in2, (v8i16) in3); \
1900 out0, out1, out2, out3) \
1902 ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1); \
2096 Outputs - out0, out1
2101 #define MUL2(in0, in1, in2, in3, out0, out1) \
2104 out1 = in2 * in3; \
2106 #define MUL4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
2108 MUL2(in0, in1, in2, in3, out0, out1); \
2114 Outputs - out0, out1
2118 #define ADD2(in0, in1, in2, in3, out0, out1) \
2121 out1 = in2 + in3; \
2123 #define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
2125 ADD2(in0, in1, in2, in3, out0, out1); \
2131 Outputs - out0, out1
2135 #define SUB2(in0, in1, in2, in3, out0, out1) \
2138 out1 = in2 - in3; \
2140 #define SUB4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
2143 out1 = in2 - in3; \
2183 Outputs - out0, out1 (sign extended 2 halfword vectors)
2189 generate 8 signed halfword elements in 'out1'
2191 #define UNPCK_SB_SH(in, out0, out1) \
2196 ILVRL_B2_SH(tmp_m, in, out0, out1); \
2201 Outputs - out0, out1 (unsigned 2 halfword vectors)
2204 Zero extended left half of vector is returned in 'out1'
2206 #define UNPCK_UB_SH(in, out0, out1) \
2210 ILVRL_B2_SH(zero_m, in, out0, out1); \
2216 Outputs - out0, out1 (sign extended 2 word vectors)
2222 generate 4 signed word elements in 'out1'
2224 #define UNPCK_SH_SW(in, out0, out1) \
2229 ILVRL_H2_SW(tmp_m, in, out0, out1); \
2246 Outputs - out0, out1, out2, out3
2249 #define BUTTERFLY_4(in0, in1, in2, in3, out0, out1, out2, out3) \
2252 out1 = in1 + in2; \
2264 out0, out1, out2, out3, out4, out5, out6, out7) \
2267 out1 = in1 + in6; \
2284 out0, out1, out2, out3, out4, out5, out6, out7, \
2288 out1 = in1 + in14; \
2308 Outputs - out0, out1, out2, out3 (output 4x4 byte block)
2312 #define TRANSPOSE4x4_UB_UB(in0, in1, in2, in3, out0, out1, out2, out3) \
2321 out1 = (v16u8) __msa_sldi_b(zero_m, (v16i8) out0, 4); \
2322 out2 = (v16u8) __msa_sldi_b(zero_m, (v16i8) out1, 4); \
2328 Outputs - out0, out1, out2, out3 (output 4x8 byte block)
2333 out0, out1, out2, out3) \
2345 out1 = (RTYPE) __msa_ilvl_d((v2i64) out2, (v2i64) out0); \
2354 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
2360 out0, out1, out2, out3, out4, out5, out6, out7) \
2373 8, out1, out3, out5, out7); \
2381 Outputs - out0, out1, out2, out3
2387 out0, out1, out2, out3) \
2392 out1 = (v16u8) __msa_ilvev_d(tmp1_m, tmp0_m); \
2403 ILVEV_B2_SD(out1, out3, tmp2_m, tmp3_m, tmp0_m, tmp1_m); \
2407 tmp0_m = (v2i64) __msa_ilvod_b((v16i8) out3, (v16i8) out1); \
2409 out1 = (v16u8) __msa_ilvev_h((v8i16) tmp1_m, (v8i16) tmp0_m); \
2416 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
2422 out0, out1, out2, out3, out4, out5, out6, out7) \
2430 ILVEV_D2_UB(in6, in14, in7, in15, out1, out0); \
2438 out7 = (v16u8) __msa_ilvev_b((v16i8) out0, (v16i8) out1); \
2439 tmp7_m = (v16u8) __msa_ilvod_b((v16i8) out0, (v16i8) out1); \
2451 out1 = (v16u8) __msa_ilvev_w((v4i32) tmp3_m, (v4i32) tmp2_m); \
2462 Outputs - out0, out1, out2, out3
2466 #define TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \
2472 out1 = (v8i16) __msa_ilvl_d((v2i64) out0, (v2i64) out0); \
2478 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
2483 out0, out1, out2, out3, out4, out5, out6, out7) \
2499 out1 = (RTYPE) __msa_pckod_d((v2i64) tmp0_m, (v2i64) tmp4_m); \
2509 Outputs - out0, out1, out2, out3
2513 #define TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, out0, out1, out2, out3) \
2521 out1 = (v4i32) __msa_ilvl_d((v2i64) s2_m, (v2i64) s0_m); \