1e1051a39Sopenharmony_ci/*
2e1051a39Sopenharmony_ci * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci *
4e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci * this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci */
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci#include "../bn_local.h"
11e1051a39Sopenharmony_ci#if !(defined(__GNUC__) && __GNUC__>=2)
12e1051a39Sopenharmony_ci# include "../bn_asm.c"         /* kind of dirty hack for Sun Studio */
13e1051a39Sopenharmony_ci#else
14e1051a39Sopenharmony_ci/*-
15e1051a39Sopenharmony_ci * x86_64 BIGNUM accelerator version 0.1, December 2002.
16e1051a39Sopenharmony_ci *
17e1051a39Sopenharmony_ci * Implemented by Andy Polyakov <appro@openssl.org> for the OpenSSL
18e1051a39Sopenharmony_ci * project.
19e1051a39Sopenharmony_ci *
20e1051a39Sopenharmony_ci * Rights for redistribution and usage in source and binary forms are
21e1051a39Sopenharmony_ci * granted according to the License. Warranty of any kind is disclaimed.
22e1051a39Sopenharmony_ci *
23e1051a39Sopenharmony_ci * Q. Version 0.1? It doesn't sound like Andy, he used to assign real
24e1051a39Sopenharmony_ci *    versions, like 1.0...
25e1051a39Sopenharmony_ci * A. Well, that's because this code is basically a quick-n-dirty
26e1051a39Sopenharmony_ci *    proof-of-concept hack. As you can see it's implemented with
27e1051a39Sopenharmony_ci *    inline assembler, which means that you're bound to GCC and that
28e1051a39Sopenharmony_ci *    there might be enough room for further improvement.
29e1051a39Sopenharmony_ci *
30e1051a39Sopenharmony_ci * Q. Why inline assembler?
31e1051a39Sopenharmony_ci * A. x86_64 features own ABI which I'm not familiar with. This is
32e1051a39Sopenharmony_ci *    why I decided to let the compiler take care of subroutine
33e1051a39Sopenharmony_ci *    prologue/epilogue as well as register allocation. For reference.
34e1051a39Sopenharmony_ci *    Win64 implements different ABI for AMD64, different from Linux.
35e1051a39Sopenharmony_ci *
36e1051a39Sopenharmony_ci * Q. How much faster does it get?
37e1051a39Sopenharmony_ci * A. 'apps/openssl speed rsa dsa' output with no-asm:
38e1051a39Sopenharmony_ci *
39e1051a39Sopenharmony_ci *                        sign    verify    sign/s verify/s
40e1051a39Sopenharmony_ci *      rsa  512 bits   0.0006s   0.0001s   1683.8  18456.2
41e1051a39Sopenharmony_ci *      rsa 1024 bits   0.0028s   0.0002s    356.0   6407.0
42e1051a39Sopenharmony_ci *      rsa 2048 bits   0.0172s   0.0005s     58.0   1957.8
43e1051a39Sopenharmony_ci *      rsa 4096 bits   0.1155s   0.0018s      8.7    555.6
44e1051a39Sopenharmony_ci *                        sign    verify    sign/s verify/s
45e1051a39Sopenharmony_ci *      dsa  512 bits   0.0005s   0.0006s   2100.8   1768.3
46e1051a39Sopenharmony_ci *      dsa 1024 bits   0.0014s   0.0018s    692.3    559.2
47e1051a39Sopenharmony_ci *      dsa 2048 bits   0.0049s   0.0061s    204.7    165.0
48e1051a39Sopenharmony_ci *
49e1051a39Sopenharmony_ci *    'apps/openssl speed rsa dsa' output with this module:
50e1051a39Sopenharmony_ci *
51e1051a39Sopenharmony_ci *                        sign    verify    sign/s verify/s
52e1051a39Sopenharmony_ci *      rsa  512 bits   0.0004s   0.0000s   2767.1  33297.9
53e1051a39Sopenharmony_ci *      rsa 1024 bits   0.0012s   0.0001s    867.4  14674.7
54e1051a39Sopenharmony_ci *      rsa 2048 bits   0.0061s   0.0002s    164.0   5270.0
55e1051a39Sopenharmony_ci *      rsa 4096 bits   0.0384s   0.0006s     26.1   1650.8
56e1051a39Sopenharmony_ci *                        sign    verify    sign/s verify/s
57e1051a39Sopenharmony_ci *      dsa  512 bits   0.0002s   0.0003s   4442.2   3786.3
58e1051a39Sopenharmony_ci *      dsa 1024 bits   0.0005s   0.0007s   1835.1   1497.4
59e1051a39Sopenharmony_ci *      dsa 2048 bits   0.0016s   0.0020s    620.4    504.6
60e1051a39Sopenharmony_ci *
61e1051a39Sopenharmony_ci *    For the reference. IA-32 assembler implementation performs
62e1051a39Sopenharmony_ci *    very much like 64-bit code compiled with no-asm on the same
63e1051a39Sopenharmony_ci *    machine.
64e1051a39Sopenharmony_ci */
65e1051a39Sopenharmony_ci
66e1051a39Sopenharmony_ci# undef mul
67e1051a39Sopenharmony_ci# undef mul_add
68e1051a39Sopenharmony_ci
69e1051a39Sopenharmony_ci/*-
70e1051a39Sopenharmony_ci * "m"(a), "+m"(r)      is the way to favor DirectPath µ-code;
71e1051a39Sopenharmony_ci * "g"(0)               let the compiler to decide where does it
72e1051a39Sopenharmony_ci *                      want to keep the value of zero;
73e1051a39Sopenharmony_ci */
74e1051a39Sopenharmony_ci# define mul_add(r,a,word,carry) do {   \
75e1051a39Sopenharmony_ci        register BN_ULONG high,low;     \
76e1051a39Sopenharmony_ci        asm ("mulq %3"                  \
77e1051a39Sopenharmony_ci                : "=a"(low),"=d"(high)  \
78e1051a39Sopenharmony_ci                : "a"(word),"m"(a)      \
79e1051a39Sopenharmony_ci                : "cc");                \
80e1051a39Sopenharmony_ci        asm ("addq %2,%0; adcq %3,%1"   \
81e1051a39Sopenharmony_ci                : "+r"(carry),"+d"(high)\
82e1051a39Sopenharmony_ci                : "a"(low),"g"(0)       \
83e1051a39Sopenharmony_ci                : "cc");                \
84e1051a39Sopenharmony_ci        asm ("addq %2,%0; adcq %3,%1"   \
85e1051a39Sopenharmony_ci                : "+m"(r),"+d"(high)    \
86e1051a39Sopenharmony_ci                : "r"(carry),"g"(0)     \
87e1051a39Sopenharmony_ci                : "cc");                \
88e1051a39Sopenharmony_ci        carry=high;                     \
89e1051a39Sopenharmony_ci        } while (0)
90e1051a39Sopenharmony_ci
91e1051a39Sopenharmony_ci# define mul(r,a,word,carry) do {       \
92e1051a39Sopenharmony_ci        register BN_ULONG high,low;     \
93e1051a39Sopenharmony_ci        asm ("mulq %3"                  \
94e1051a39Sopenharmony_ci                : "=a"(low),"=d"(high)  \
95e1051a39Sopenharmony_ci                : "a"(word),"g"(a)      \
96e1051a39Sopenharmony_ci                : "cc");                \
97e1051a39Sopenharmony_ci        asm ("addq %2,%0; adcq %3,%1"   \
98e1051a39Sopenharmony_ci                : "+r"(carry),"+d"(high)\
99e1051a39Sopenharmony_ci                : "a"(low),"g"(0)       \
100e1051a39Sopenharmony_ci                : "cc");                \
101e1051a39Sopenharmony_ci        (r)=carry, carry=high;          \
102e1051a39Sopenharmony_ci        } while (0)
103e1051a39Sopenharmony_ci# undef sqr
104e1051a39Sopenharmony_ci# define sqr(r0,r1,a)                   \
105e1051a39Sopenharmony_ci        asm ("mulq %2"                  \
106e1051a39Sopenharmony_ci                : "=a"(r0),"=d"(r1)     \
107e1051a39Sopenharmony_ci                : "a"(a)                \
108e1051a39Sopenharmony_ci                : "cc");
109e1051a39Sopenharmony_ci
110e1051a39Sopenharmony_ciBN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
111e1051a39Sopenharmony_ci                          BN_ULONG w)
112e1051a39Sopenharmony_ci{
113e1051a39Sopenharmony_ci    BN_ULONG c1 = 0;
114e1051a39Sopenharmony_ci
115e1051a39Sopenharmony_ci    if (num <= 0)
116e1051a39Sopenharmony_ci        return c1;
117e1051a39Sopenharmony_ci
118e1051a39Sopenharmony_ci    while (num & ~3) {
119e1051a39Sopenharmony_ci        mul_add(rp[0], ap[0], w, c1);
120e1051a39Sopenharmony_ci        mul_add(rp[1], ap[1], w, c1);
121e1051a39Sopenharmony_ci        mul_add(rp[2], ap[2], w, c1);
122e1051a39Sopenharmony_ci        mul_add(rp[3], ap[3], w, c1);
123e1051a39Sopenharmony_ci        ap += 4;
124e1051a39Sopenharmony_ci        rp += 4;
125e1051a39Sopenharmony_ci        num -= 4;
126e1051a39Sopenharmony_ci    }
127e1051a39Sopenharmony_ci    if (num) {
128e1051a39Sopenharmony_ci        mul_add(rp[0], ap[0], w, c1);
129e1051a39Sopenharmony_ci        if (--num == 0)
130e1051a39Sopenharmony_ci            return c1;
131e1051a39Sopenharmony_ci        mul_add(rp[1], ap[1], w, c1);
132e1051a39Sopenharmony_ci        if (--num == 0)
133e1051a39Sopenharmony_ci            return c1;
134e1051a39Sopenharmony_ci        mul_add(rp[2], ap[2], w, c1);
135e1051a39Sopenharmony_ci        return c1;
136e1051a39Sopenharmony_ci    }
137e1051a39Sopenharmony_ci
138e1051a39Sopenharmony_ci    return c1;
139e1051a39Sopenharmony_ci}
140e1051a39Sopenharmony_ci
141e1051a39Sopenharmony_ciBN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
142e1051a39Sopenharmony_ci{
143e1051a39Sopenharmony_ci    BN_ULONG c1 = 0;
144e1051a39Sopenharmony_ci
145e1051a39Sopenharmony_ci    if (num <= 0)
146e1051a39Sopenharmony_ci        return c1;
147e1051a39Sopenharmony_ci
148e1051a39Sopenharmony_ci    while (num & ~3) {
149e1051a39Sopenharmony_ci        mul(rp[0], ap[0], w, c1);
150e1051a39Sopenharmony_ci        mul(rp[1], ap[1], w, c1);
151e1051a39Sopenharmony_ci        mul(rp[2], ap[2], w, c1);
152e1051a39Sopenharmony_ci        mul(rp[3], ap[3], w, c1);
153e1051a39Sopenharmony_ci        ap += 4;
154e1051a39Sopenharmony_ci        rp += 4;
155e1051a39Sopenharmony_ci        num -= 4;
156e1051a39Sopenharmony_ci    }
157e1051a39Sopenharmony_ci    if (num) {
158e1051a39Sopenharmony_ci        mul(rp[0], ap[0], w, c1);
159e1051a39Sopenharmony_ci        if (--num == 0)
160e1051a39Sopenharmony_ci            return c1;
161e1051a39Sopenharmony_ci        mul(rp[1], ap[1], w, c1);
162e1051a39Sopenharmony_ci        if (--num == 0)
163e1051a39Sopenharmony_ci            return c1;
164e1051a39Sopenharmony_ci        mul(rp[2], ap[2], w, c1);
165e1051a39Sopenharmony_ci    }
166e1051a39Sopenharmony_ci    return c1;
167e1051a39Sopenharmony_ci}
168e1051a39Sopenharmony_ci
169e1051a39Sopenharmony_civoid bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
170e1051a39Sopenharmony_ci{
171e1051a39Sopenharmony_ci    if (n <= 0)
172e1051a39Sopenharmony_ci        return;
173e1051a39Sopenharmony_ci
174e1051a39Sopenharmony_ci    while (n & ~3) {
175e1051a39Sopenharmony_ci        sqr(r[0], r[1], a[0]);
176e1051a39Sopenharmony_ci        sqr(r[2], r[3], a[1]);
177e1051a39Sopenharmony_ci        sqr(r[4], r[5], a[2]);
178e1051a39Sopenharmony_ci        sqr(r[6], r[7], a[3]);
179e1051a39Sopenharmony_ci        a += 4;
180e1051a39Sopenharmony_ci        r += 8;
181e1051a39Sopenharmony_ci        n -= 4;
182e1051a39Sopenharmony_ci    }
183e1051a39Sopenharmony_ci    if (n) {
184e1051a39Sopenharmony_ci        sqr(r[0], r[1], a[0]);
185e1051a39Sopenharmony_ci        if (--n == 0)
186e1051a39Sopenharmony_ci            return;
187e1051a39Sopenharmony_ci        sqr(r[2], r[3], a[1]);
188e1051a39Sopenharmony_ci        if (--n == 0)
189e1051a39Sopenharmony_ci            return;
190e1051a39Sopenharmony_ci        sqr(r[4], r[5], a[2]);
191e1051a39Sopenharmony_ci    }
192e1051a39Sopenharmony_ci}
193e1051a39Sopenharmony_ci
194e1051a39Sopenharmony_ciBN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
195e1051a39Sopenharmony_ci{
196e1051a39Sopenharmony_ci    BN_ULONG ret, waste;
197e1051a39Sopenharmony_ci
198e1051a39Sopenharmony_ci asm("divq      %4":"=a"(ret), "=d"(waste)
199e1051a39Sopenharmony_ci :     "a"(l), "d"(h), "r"(d)
200e1051a39Sopenharmony_ci :     "cc");
201e1051a39Sopenharmony_ci
202e1051a39Sopenharmony_ci    return ret;
203e1051a39Sopenharmony_ci}
204e1051a39Sopenharmony_ci
205e1051a39Sopenharmony_ciBN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
206e1051a39Sopenharmony_ci                      int n)
207e1051a39Sopenharmony_ci{
208e1051a39Sopenharmony_ci    BN_ULONG ret;
209e1051a39Sopenharmony_ci    size_t i = 0;
210e1051a39Sopenharmony_ci
211e1051a39Sopenharmony_ci    if (n <= 0)
212e1051a39Sopenharmony_ci        return 0;
213e1051a39Sopenharmony_ci
214e1051a39Sopenharmony_ci    asm volatile ("       subq    %0,%0           \n" /* clear carry */
215e1051a39Sopenharmony_ci                  "       jmp     1f              \n"
216e1051a39Sopenharmony_ci                  ".p2align 4                     \n"
217e1051a39Sopenharmony_ci                  "1:     movq    (%4,%2,8),%0    \n"
218e1051a39Sopenharmony_ci                  "       adcq    (%5,%2,8),%0    \n"
219e1051a39Sopenharmony_ci                  "       movq    %0,(%3,%2,8)    \n"
220e1051a39Sopenharmony_ci                  "       lea     1(%2),%2        \n"
221e1051a39Sopenharmony_ci                  "       dec     %1              \n"
222e1051a39Sopenharmony_ci                  "       jnz     1b              \n"
223e1051a39Sopenharmony_ci                  "       sbbq    %0,%0           \n"
224e1051a39Sopenharmony_ci                  :"=&r" (ret), "+c"(n), "+r"(i)
225e1051a39Sopenharmony_ci                  :"r"(rp), "r"(ap), "r"(bp)
226e1051a39Sopenharmony_ci                  :"cc", "memory");
227e1051a39Sopenharmony_ci
228e1051a39Sopenharmony_ci    return ret & 1;
229e1051a39Sopenharmony_ci}
230e1051a39Sopenharmony_ci
231e1051a39Sopenharmony_ci# ifndef SIMICS
232e1051a39Sopenharmony_ciBN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
233e1051a39Sopenharmony_ci                      int n)
234e1051a39Sopenharmony_ci{
235e1051a39Sopenharmony_ci    BN_ULONG ret;
236e1051a39Sopenharmony_ci    size_t i = 0;
237e1051a39Sopenharmony_ci
238e1051a39Sopenharmony_ci    if (n <= 0)
239e1051a39Sopenharmony_ci        return 0;
240e1051a39Sopenharmony_ci
241e1051a39Sopenharmony_ci    asm volatile ("       subq    %0,%0           \n" /* clear borrow */
242e1051a39Sopenharmony_ci                  "       jmp     1f              \n"
243e1051a39Sopenharmony_ci                  ".p2align 4                     \n"
244e1051a39Sopenharmony_ci                  "1:     movq    (%4,%2,8),%0    \n"
245e1051a39Sopenharmony_ci                  "       sbbq    (%5,%2,8),%0    \n"
246e1051a39Sopenharmony_ci                  "       movq    %0,(%3,%2,8)    \n"
247e1051a39Sopenharmony_ci                  "       lea     1(%2),%2        \n"
248e1051a39Sopenharmony_ci                  "       dec     %1              \n"
249e1051a39Sopenharmony_ci                  "       jnz     1b              \n"
250e1051a39Sopenharmony_ci                  "       sbbq    %0,%0           \n"
251e1051a39Sopenharmony_ci                  :"=&r" (ret), "+c"(n), "+r"(i)
252e1051a39Sopenharmony_ci                  :"r"(rp), "r"(ap), "r"(bp)
253e1051a39Sopenharmony_ci                  :"cc", "memory");
254e1051a39Sopenharmony_ci
255e1051a39Sopenharmony_ci    return ret & 1;
256e1051a39Sopenharmony_ci}
257e1051a39Sopenharmony_ci# else
258e1051a39Sopenharmony_ci/* Simics 1.4<7 has buggy sbbq:-( */
259e1051a39Sopenharmony_ci#  define BN_MASK2 0xffffffffffffffffL
260e1051a39Sopenharmony_ciBN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
261e1051a39Sopenharmony_ci{
262e1051a39Sopenharmony_ci    BN_ULONG t1, t2;
263e1051a39Sopenharmony_ci    int c = 0;
264e1051a39Sopenharmony_ci
265e1051a39Sopenharmony_ci    if (n <= 0)
266e1051a39Sopenharmony_ci        return (BN_ULONG)0;
267e1051a39Sopenharmony_ci
268e1051a39Sopenharmony_ci    for (;;) {
269e1051a39Sopenharmony_ci        t1 = a[0];
270e1051a39Sopenharmony_ci        t2 = b[0];
271e1051a39Sopenharmony_ci        r[0] = (t1 - t2 - c) & BN_MASK2;
272e1051a39Sopenharmony_ci        if (t1 != t2)
273e1051a39Sopenharmony_ci            c = (t1 < t2);
274e1051a39Sopenharmony_ci        if (--n <= 0)
275e1051a39Sopenharmony_ci            break;
276e1051a39Sopenharmony_ci
277e1051a39Sopenharmony_ci        t1 = a[1];
278e1051a39Sopenharmony_ci        t2 = b[1];
279e1051a39Sopenharmony_ci        r[1] = (t1 - t2 - c) & BN_MASK2;
280e1051a39Sopenharmony_ci        if (t1 != t2)
281e1051a39Sopenharmony_ci            c = (t1 < t2);
282e1051a39Sopenharmony_ci        if (--n <= 0)
283e1051a39Sopenharmony_ci            break;
284e1051a39Sopenharmony_ci
285e1051a39Sopenharmony_ci        t1 = a[2];
286e1051a39Sopenharmony_ci        t2 = b[2];
287e1051a39Sopenharmony_ci        r[2] = (t1 - t2 - c) & BN_MASK2;
288e1051a39Sopenharmony_ci        if (t1 != t2)
289e1051a39Sopenharmony_ci            c = (t1 < t2);
290e1051a39Sopenharmony_ci        if (--n <= 0)
291e1051a39Sopenharmony_ci            break;
292e1051a39Sopenharmony_ci
293e1051a39Sopenharmony_ci        t1 = a[3];
294e1051a39Sopenharmony_ci        t2 = b[3];
295e1051a39Sopenharmony_ci        r[3] = (t1 - t2 - c) & BN_MASK2;
296e1051a39Sopenharmony_ci        if (t1 != t2)
297e1051a39Sopenharmony_ci            c = (t1 < t2);
298e1051a39Sopenharmony_ci        if (--n <= 0)
299e1051a39Sopenharmony_ci            break;
300e1051a39Sopenharmony_ci
301e1051a39Sopenharmony_ci        a += 4;
302e1051a39Sopenharmony_ci        b += 4;
303e1051a39Sopenharmony_ci        r += 4;
304e1051a39Sopenharmony_ci    }
305e1051a39Sopenharmony_ci    return c;
306e1051a39Sopenharmony_ci}
307e1051a39Sopenharmony_ci# endif
308e1051a39Sopenharmony_ci
309e1051a39Sopenharmony_ci/* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
310e1051a39Sopenharmony_ci/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
311e1051a39Sopenharmony_ci/* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
312e1051a39Sopenharmony_ci/*
313e1051a39Sopenharmony_ci * sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number
314e1051a39Sopenharmony_ci * c=(c2,c1,c0)
315e1051a39Sopenharmony_ci */
316e1051a39Sopenharmony_ci
317e1051a39Sopenharmony_ci/*
318e1051a39Sopenharmony_ci * Keep in mind that carrying into high part of multiplication result
319e1051a39Sopenharmony_ci * can not overflow, because it cannot be all-ones.
320e1051a39Sopenharmony_ci */
321e1051a39Sopenharmony_ci# if 0
322e1051a39Sopenharmony_ci/* original macros are kept for reference purposes */
323e1051a39Sopenharmony_ci#  define mul_add_c(a,b,c0,c1,c2)       do {    \
324e1051a39Sopenharmony_ci        BN_ULONG ta = (a), tb = (b);            \
325e1051a39Sopenharmony_ci        BN_ULONG lo, hi;                        \
326e1051a39Sopenharmony_ci        BN_UMULT_LOHI(lo,hi,ta,tb);             \
327e1051a39Sopenharmony_ci        c0 += lo; hi += (c0<lo)?1:0;            \
328e1051a39Sopenharmony_ci        c1 += hi; c2 += (c1<hi)?1:0;            \
329e1051a39Sopenharmony_ci        } while(0)
330e1051a39Sopenharmony_ci
331e1051a39Sopenharmony_ci#  define mul_add_c2(a,b,c0,c1,c2)      do {    \
332e1051a39Sopenharmony_ci        BN_ULONG ta = (a), tb = (b);            \
333e1051a39Sopenharmony_ci        BN_ULONG lo, hi, tt;                    \
334e1051a39Sopenharmony_ci        BN_UMULT_LOHI(lo,hi,ta,tb);             \
335e1051a39Sopenharmony_ci        c0 += lo; tt = hi+((c0<lo)?1:0);        \
336e1051a39Sopenharmony_ci        c1 += tt; c2 += (c1<tt)?1:0;            \
337e1051a39Sopenharmony_ci        c0 += lo; hi += (c0<lo)?1:0;            \
338e1051a39Sopenharmony_ci        c1 += hi; c2 += (c1<hi)?1:0;            \
339e1051a39Sopenharmony_ci        } while(0)
340e1051a39Sopenharmony_ci
341e1051a39Sopenharmony_ci#  define sqr_add_c(a,i,c0,c1,c2)       do {    \
342e1051a39Sopenharmony_ci        BN_ULONG ta = (a)[i];                   \
343e1051a39Sopenharmony_ci        BN_ULONG lo, hi;                        \
344e1051a39Sopenharmony_ci        BN_UMULT_LOHI(lo,hi,ta,ta);             \
345e1051a39Sopenharmony_ci        c0 += lo; hi += (c0<lo)?1:0;            \
346e1051a39Sopenharmony_ci        c1 += hi; c2 += (c1<hi)?1:0;            \
347e1051a39Sopenharmony_ci        } while(0)
348e1051a39Sopenharmony_ci# else
349e1051a39Sopenharmony_ci#  define mul_add_c(a,b,c0,c1,c2) do {  \
350e1051a39Sopenharmony_ci        BN_ULONG t1,t2;                 \
351e1051a39Sopenharmony_ci        asm ("mulq %3"                  \
352e1051a39Sopenharmony_ci                : "=a"(t1),"=d"(t2)     \
353e1051a39Sopenharmony_ci                : "a"(a),"m"(b)         \
354e1051a39Sopenharmony_ci                : "cc");                \
355e1051a39Sopenharmony_ci        asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
356e1051a39Sopenharmony_ci                : "+r"(c0),"+r"(c1),"+r"(c2)            \
357e1051a39Sopenharmony_ci                : "r"(t1),"r"(t2),"g"(0)                \
358e1051a39Sopenharmony_ci                : "cc");                                \
359e1051a39Sopenharmony_ci        } while (0)
360e1051a39Sopenharmony_ci
361e1051a39Sopenharmony_ci#  define sqr_add_c(a,i,c0,c1,c2) do {  \
362e1051a39Sopenharmony_ci        BN_ULONG t1,t2;                 \
363e1051a39Sopenharmony_ci        asm ("mulq %2"                  \
364e1051a39Sopenharmony_ci                : "=a"(t1),"=d"(t2)     \
365e1051a39Sopenharmony_ci                : "a"(a[i])             \
366e1051a39Sopenharmony_ci                : "cc");                \
367e1051a39Sopenharmony_ci        asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
368e1051a39Sopenharmony_ci                : "+r"(c0),"+r"(c1),"+r"(c2)            \
369e1051a39Sopenharmony_ci                : "r"(t1),"r"(t2),"g"(0)                \
370e1051a39Sopenharmony_ci                : "cc");                                \
371e1051a39Sopenharmony_ci        } while (0)
372e1051a39Sopenharmony_ci
373e1051a39Sopenharmony_ci#  define mul_add_c2(a,b,c0,c1,c2) do { \
374e1051a39Sopenharmony_ci        BN_ULONG t1,t2;                 \
375e1051a39Sopenharmony_ci        asm ("mulq %3"                  \
376e1051a39Sopenharmony_ci                : "=a"(t1),"=d"(t2)     \
377e1051a39Sopenharmony_ci                : "a"(a),"m"(b)         \
378e1051a39Sopenharmony_ci                : "cc");                \
379e1051a39Sopenharmony_ci        asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
380e1051a39Sopenharmony_ci                : "+r"(c0),"+r"(c1),"+r"(c2)            \
381e1051a39Sopenharmony_ci                : "r"(t1),"r"(t2),"g"(0)                \
382e1051a39Sopenharmony_ci                : "cc");                                \
383e1051a39Sopenharmony_ci        asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
384e1051a39Sopenharmony_ci                : "+r"(c0),"+r"(c1),"+r"(c2)            \
385e1051a39Sopenharmony_ci                : "r"(t1),"r"(t2),"g"(0)                \
386e1051a39Sopenharmony_ci                : "cc");                                \
387e1051a39Sopenharmony_ci        } while (0)
388e1051a39Sopenharmony_ci# endif
389e1051a39Sopenharmony_ci
390e1051a39Sopenharmony_ci# define sqr_add_c2(a,i,j,c0,c1,c2)      \
391e1051a39Sopenharmony_ci        mul_add_c2((a)[i],(a)[j],c0,c1,c2)
392e1051a39Sopenharmony_ci
393e1051a39Sopenharmony_civoid bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
394e1051a39Sopenharmony_ci{
395e1051a39Sopenharmony_ci    BN_ULONG c1, c2, c3;
396e1051a39Sopenharmony_ci
397e1051a39Sopenharmony_ci    c1 = 0;
398e1051a39Sopenharmony_ci    c2 = 0;
399e1051a39Sopenharmony_ci    c3 = 0;
400e1051a39Sopenharmony_ci    mul_add_c(a[0], b[0], c1, c2, c3);
401e1051a39Sopenharmony_ci    r[0] = c1;
402e1051a39Sopenharmony_ci    c1 = 0;
403e1051a39Sopenharmony_ci    mul_add_c(a[0], b[1], c2, c3, c1);
404e1051a39Sopenharmony_ci    mul_add_c(a[1], b[0], c2, c3, c1);
405e1051a39Sopenharmony_ci    r[1] = c2;
406e1051a39Sopenharmony_ci    c2 = 0;
407e1051a39Sopenharmony_ci    mul_add_c(a[2], b[0], c3, c1, c2);
408e1051a39Sopenharmony_ci    mul_add_c(a[1], b[1], c3, c1, c2);
409e1051a39Sopenharmony_ci    mul_add_c(a[0], b[2], c3, c1, c2);
410e1051a39Sopenharmony_ci    r[2] = c3;
411e1051a39Sopenharmony_ci    c3 = 0;
412e1051a39Sopenharmony_ci    mul_add_c(a[0], b[3], c1, c2, c3);
413e1051a39Sopenharmony_ci    mul_add_c(a[1], b[2], c1, c2, c3);
414e1051a39Sopenharmony_ci    mul_add_c(a[2], b[1], c1, c2, c3);
415e1051a39Sopenharmony_ci    mul_add_c(a[3], b[0], c1, c2, c3);
416e1051a39Sopenharmony_ci    r[3] = c1;
417e1051a39Sopenharmony_ci    c1 = 0;
418e1051a39Sopenharmony_ci    mul_add_c(a[4], b[0], c2, c3, c1);
419e1051a39Sopenharmony_ci    mul_add_c(a[3], b[1], c2, c3, c1);
420e1051a39Sopenharmony_ci    mul_add_c(a[2], b[2], c2, c3, c1);
421e1051a39Sopenharmony_ci    mul_add_c(a[1], b[3], c2, c3, c1);
422e1051a39Sopenharmony_ci    mul_add_c(a[0], b[4], c2, c3, c1);
423e1051a39Sopenharmony_ci    r[4] = c2;
424e1051a39Sopenharmony_ci    c2 = 0;
425e1051a39Sopenharmony_ci    mul_add_c(a[0], b[5], c3, c1, c2);
426e1051a39Sopenharmony_ci    mul_add_c(a[1], b[4], c3, c1, c2);
427e1051a39Sopenharmony_ci    mul_add_c(a[2], b[3], c3, c1, c2);
428e1051a39Sopenharmony_ci    mul_add_c(a[3], b[2], c3, c1, c2);
429e1051a39Sopenharmony_ci    mul_add_c(a[4], b[1], c3, c1, c2);
430e1051a39Sopenharmony_ci    mul_add_c(a[5], b[0], c3, c1, c2);
431e1051a39Sopenharmony_ci    r[5] = c3;
432e1051a39Sopenharmony_ci    c3 = 0;
433e1051a39Sopenharmony_ci    mul_add_c(a[6], b[0], c1, c2, c3);
434e1051a39Sopenharmony_ci    mul_add_c(a[5], b[1], c1, c2, c3);
435e1051a39Sopenharmony_ci    mul_add_c(a[4], b[2], c1, c2, c3);
436e1051a39Sopenharmony_ci    mul_add_c(a[3], b[3], c1, c2, c3);
437e1051a39Sopenharmony_ci    mul_add_c(a[2], b[4], c1, c2, c3);
438e1051a39Sopenharmony_ci    mul_add_c(a[1], b[5], c1, c2, c3);
439e1051a39Sopenharmony_ci    mul_add_c(a[0], b[6], c1, c2, c3);
440e1051a39Sopenharmony_ci    r[6] = c1;
441e1051a39Sopenharmony_ci    c1 = 0;
442e1051a39Sopenharmony_ci    mul_add_c(a[0], b[7], c2, c3, c1);
443e1051a39Sopenharmony_ci    mul_add_c(a[1], b[6], c2, c3, c1);
444e1051a39Sopenharmony_ci    mul_add_c(a[2], b[5], c2, c3, c1);
445e1051a39Sopenharmony_ci    mul_add_c(a[3], b[4], c2, c3, c1);
446e1051a39Sopenharmony_ci    mul_add_c(a[4], b[3], c2, c3, c1);
447e1051a39Sopenharmony_ci    mul_add_c(a[5], b[2], c2, c3, c1);
448e1051a39Sopenharmony_ci    mul_add_c(a[6], b[1], c2, c3, c1);
449e1051a39Sopenharmony_ci    mul_add_c(a[7], b[0], c2, c3, c1);
450e1051a39Sopenharmony_ci    r[7] = c2;
451e1051a39Sopenharmony_ci    c2 = 0;
452e1051a39Sopenharmony_ci    mul_add_c(a[7], b[1], c3, c1, c2);
453e1051a39Sopenharmony_ci    mul_add_c(a[6], b[2], c3, c1, c2);
454e1051a39Sopenharmony_ci    mul_add_c(a[5], b[3], c3, c1, c2);
455e1051a39Sopenharmony_ci    mul_add_c(a[4], b[4], c3, c1, c2);
456e1051a39Sopenharmony_ci    mul_add_c(a[3], b[5], c3, c1, c2);
457e1051a39Sopenharmony_ci    mul_add_c(a[2], b[6], c3, c1, c2);
458e1051a39Sopenharmony_ci    mul_add_c(a[1], b[7], c3, c1, c2);
459e1051a39Sopenharmony_ci    r[8] = c3;
460e1051a39Sopenharmony_ci    c3 = 0;
461e1051a39Sopenharmony_ci    mul_add_c(a[2], b[7], c1, c2, c3);
462e1051a39Sopenharmony_ci    mul_add_c(a[3], b[6], c1, c2, c3);
463e1051a39Sopenharmony_ci    mul_add_c(a[4], b[5], c1, c2, c3);
464e1051a39Sopenharmony_ci    mul_add_c(a[5], b[4], c1, c2, c3);
465e1051a39Sopenharmony_ci    mul_add_c(a[6], b[3], c1, c2, c3);
466e1051a39Sopenharmony_ci    mul_add_c(a[7], b[2], c1, c2, c3);
467e1051a39Sopenharmony_ci    r[9] = c1;
468e1051a39Sopenharmony_ci    c1 = 0;
469e1051a39Sopenharmony_ci    mul_add_c(a[7], b[3], c2, c3, c1);
470e1051a39Sopenharmony_ci    mul_add_c(a[6], b[4], c2, c3, c1);
471e1051a39Sopenharmony_ci    mul_add_c(a[5], b[5], c2, c3, c1);
472e1051a39Sopenharmony_ci    mul_add_c(a[4], b[6], c2, c3, c1);
473e1051a39Sopenharmony_ci    mul_add_c(a[3], b[7], c2, c3, c1);
474e1051a39Sopenharmony_ci    r[10] = c2;
475e1051a39Sopenharmony_ci    c2 = 0;
476e1051a39Sopenharmony_ci    mul_add_c(a[4], b[7], c3, c1, c2);
477e1051a39Sopenharmony_ci    mul_add_c(a[5], b[6], c3, c1, c2);
478e1051a39Sopenharmony_ci    mul_add_c(a[6], b[5], c3, c1, c2);
479e1051a39Sopenharmony_ci    mul_add_c(a[7], b[4], c3, c1, c2);
480e1051a39Sopenharmony_ci    r[11] = c3;
481e1051a39Sopenharmony_ci    c3 = 0;
482e1051a39Sopenharmony_ci    mul_add_c(a[7], b[5], c1, c2, c3);
483e1051a39Sopenharmony_ci    mul_add_c(a[6], b[6], c1, c2, c3);
484e1051a39Sopenharmony_ci    mul_add_c(a[5], b[7], c1, c2, c3);
485e1051a39Sopenharmony_ci    r[12] = c1;
486e1051a39Sopenharmony_ci    c1 = 0;
487e1051a39Sopenharmony_ci    mul_add_c(a[6], b[7], c2, c3, c1);
488e1051a39Sopenharmony_ci    mul_add_c(a[7], b[6], c2, c3, c1);
489e1051a39Sopenharmony_ci    r[13] = c2;
490e1051a39Sopenharmony_ci    c2 = 0;
491e1051a39Sopenharmony_ci    mul_add_c(a[7], b[7], c3, c1, c2);
492e1051a39Sopenharmony_ci    r[14] = c3;
493e1051a39Sopenharmony_ci    r[15] = c1;
494e1051a39Sopenharmony_ci}
495e1051a39Sopenharmony_ci
496e1051a39Sopenharmony_civoid bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
497e1051a39Sopenharmony_ci{
498e1051a39Sopenharmony_ci    BN_ULONG c1, c2, c3;
499e1051a39Sopenharmony_ci
500e1051a39Sopenharmony_ci    c1 = 0;
501e1051a39Sopenharmony_ci    c2 = 0;
502e1051a39Sopenharmony_ci    c3 = 0;
503e1051a39Sopenharmony_ci    mul_add_c(a[0], b[0], c1, c2, c3);
504e1051a39Sopenharmony_ci    r[0] = c1;
505e1051a39Sopenharmony_ci    c1 = 0;
506e1051a39Sopenharmony_ci    mul_add_c(a[0], b[1], c2, c3, c1);
507e1051a39Sopenharmony_ci    mul_add_c(a[1], b[0], c2, c3, c1);
508e1051a39Sopenharmony_ci    r[1] = c2;
509e1051a39Sopenharmony_ci    c2 = 0;
510e1051a39Sopenharmony_ci    mul_add_c(a[2], b[0], c3, c1, c2);
511e1051a39Sopenharmony_ci    mul_add_c(a[1], b[1], c3, c1, c2);
512e1051a39Sopenharmony_ci    mul_add_c(a[0], b[2], c3, c1, c2);
513e1051a39Sopenharmony_ci    r[2] = c3;
514e1051a39Sopenharmony_ci    c3 = 0;
515e1051a39Sopenharmony_ci    mul_add_c(a[0], b[3], c1, c2, c3);
516e1051a39Sopenharmony_ci    mul_add_c(a[1], b[2], c1, c2, c3);
517e1051a39Sopenharmony_ci    mul_add_c(a[2], b[1], c1, c2, c3);
518e1051a39Sopenharmony_ci    mul_add_c(a[3], b[0], c1, c2, c3);
519e1051a39Sopenharmony_ci    r[3] = c1;
520e1051a39Sopenharmony_ci    c1 = 0;
521e1051a39Sopenharmony_ci    mul_add_c(a[3], b[1], c2, c3, c1);
522e1051a39Sopenharmony_ci    mul_add_c(a[2], b[2], c2, c3, c1);
523e1051a39Sopenharmony_ci    mul_add_c(a[1], b[3], c2, c3, c1);
524e1051a39Sopenharmony_ci    r[4] = c2;
525e1051a39Sopenharmony_ci    c2 = 0;
526e1051a39Sopenharmony_ci    mul_add_c(a[2], b[3], c3, c1, c2);
527e1051a39Sopenharmony_ci    mul_add_c(a[3], b[2], c3, c1, c2);
528e1051a39Sopenharmony_ci    r[5] = c3;
529e1051a39Sopenharmony_ci    c3 = 0;
530e1051a39Sopenharmony_ci    mul_add_c(a[3], b[3], c1, c2, c3);
531e1051a39Sopenharmony_ci    r[6] = c1;
532e1051a39Sopenharmony_ci    r[7] = c2;
533e1051a39Sopenharmony_ci}
534e1051a39Sopenharmony_ci
535e1051a39Sopenharmony_civoid bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
536e1051a39Sopenharmony_ci{
537e1051a39Sopenharmony_ci    BN_ULONG c1, c2, c3;
538e1051a39Sopenharmony_ci
539e1051a39Sopenharmony_ci    c1 = 0;
540e1051a39Sopenharmony_ci    c2 = 0;
541e1051a39Sopenharmony_ci    c3 = 0;
542e1051a39Sopenharmony_ci    sqr_add_c(a, 0, c1, c2, c3);
543e1051a39Sopenharmony_ci    r[0] = c1;
544e1051a39Sopenharmony_ci    c1 = 0;
545e1051a39Sopenharmony_ci    sqr_add_c2(a, 1, 0, c2, c3, c1);
546e1051a39Sopenharmony_ci    r[1] = c2;
547e1051a39Sopenharmony_ci    c2 = 0;
548e1051a39Sopenharmony_ci    sqr_add_c(a, 1, c3, c1, c2);
549e1051a39Sopenharmony_ci    sqr_add_c2(a, 2, 0, c3, c1, c2);
550e1051a39Sopenharmony_ci    r[2] = c3;
551e1051a39Sopenharmony_ci    c3 = 0;
552e1051a39Sopenharmony_ci    sqr_add_c2(a, 3, 0, c1, c2, c3);
553e1051a39Sopenharmony_ci    sqr_add_c2(a, 2, 1, c1, c2, c3);
554e1051a39Sopenharmony_ci    r[3] = c1;
555e1051a39Sopenharmony_ci    c1 = 0;
556e1051a39Sopenharmony_ci    sqr_add_c(a, 2, c2, c3, c1);
557e1051a39Sopenharmony_ci    sqr_add_c2(a, 3, 1, c2, c3, c1);
558e1051a39Sopenharmony_ci    sqr_add_c2(a, 4, 0, c2, c3, c1);
559e1051a39Sopenharmony_ci    r[4] = c2;
560e1051a39Sopenharmony_ci    c2 = 0;
561e1051a39Sopenharmony_ci    sqr_add_c2(a, 5, 0, c3, c1, c2);
562e1051a39Sopenharmony_ci    sqr_add_c2(a, 4, 1, c3, c1, c2);
563e1051a39Sopenharmony_ci    sqr_add_c2(a, 3, 2, c3, c1, c2);
564e1051a39Sopenharmony_ci    r[5] = c3;
565e1051a39Sopenharmony_ci    c3 = 0;
566e1051a39Sopenharmony_ci    sqr_add_c(a, 3, c1, c2, c3);
567e1051a39Sopenharmony_ci    sqr_add_c2(a, 4, 2, c1, c2, c3);
568e1051a39Sopenharmony_ci    sqr_add_c2(a, 5, 1, c1, c2, c3);
569e1051a39Sopenharmony_ci    sqr_add_c2(a, 6, 0, c1, c2, c3);
570e1051a39Sopenharmony_ci    r[6] = c1;
571e1051a39Sopenharmony_ci    c1 = 0;
572e1051a39Sopenharmony_ci    sqr_add_c2(a, 7, 0, c2, c3, c1);
573e1051a39Sopenharmony_ci    sqr_add_c2(a, 6, 1, c2, c3, c1);
574e1051a39Sopenharmony_ci    sqr_add_c2(a, 5, 2, c2, c3, c1);
575e1051a39Sopenharmony_ci    sqr_add_c2(a, 4, 3, c2, c3, c1);
576e1051a39Sopenharmony_ci    r[7] = c2;
577e1051a39Sopenharmony_ci    c2 = 0;
578e1051a39Sopenharmony_ci    sqr_add_c(a, 4, c3, c1, c2);
579e1051a39Sopenharmony_ci    sqr_add_c2(a, 5, 3, c3, c1, c2);
580e1051a39Sopenharmony_ci    sqr_add_c2(a, 6, 2, c3, c1, c2);
581e1051a39Sopenharmony_ci    sqr_add_c2(a, 7, 1, c3, c1, c2);
582e1051a39Sopenharmony_ci    r[8] = c3;
583e1051a39Sopenharmony_ci    c3 = 0;
584e1051a39Sopenharmony_ci    sqr_add_c2(a, 7, 2, c1, c2, c3);
585e1051a39Sopenharmony_ci    sqr_add_c2(a, 6, 3, c1, c2, c3);
586e1051a39Sopenharmony_ci    sqr_add_c2(a, 5, 4, c1, c2, c3);
587e1051a39Sopenharmony_ci    r[9] = c1;
588e1051a39Sopenharmony_ci    c1 = 0;
589e1051a39Sopenharmony_ci    sqr_add_c(a, 5, c2, c3, c1);
590e1051a39Sopenharmony_ci    sqr_add_c2(a, 6, 4, c2, c3, c1);
591e1051a39Sopenharmony_ci    sqr_add_c2(a, 7, 3, c2, c3, c1);
592e1051a39Sopenharmony_ci    r[10] = c2;
593e1051a39Sopenharmony_ci    c2 = 0;
594e1051a39Sopenharmony_ci    sqr_add_c2(a, 7, 4, c3, c1, c2);
595e1051a39Sopenharmony_ci    sqr_add_c2(a, 6, 5, c3, c1, c2);
596e1051a39Sopenharmony_ci    r[11] = c3;
597e1051a39Sopenharmony_ci    c3 = 0;
598e1051a39Sopenharmony_ci    sqr_add_c(a, 6, c1, c2, c3);
599e1051a39Sopenharmony_ci    sqr_add_c2(a, 7, 5, c1, c2, c3);
600e1051a39Sopenharmony_ci    r[12] = c1;
601e1051a39Sopenharmony_ci    c1 = 0;
602e1051a39Sopenharmony_ci    sqr_add_c2(a, 7, 6, c2, c3, c1);
603e1051a39Sopenharmony_ci    r[13] = c2;
604e1051a39Sopenharmony_ci    c2 = 0;
605e1051a39Sopenharmony_ci    sqr_add_c(a, 7, c3, c1, c2);
606e1051a39Sopenharmony_ci    r[14] = c3;
607e1051a39Sopenharmony_ci    r[15] = c1;
608e1051a39Sopenharmony_ci}
609e1051a39Sopenharmony_ci
610e1051a39Sopenharmony_civoid bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
611e1051a39Sopenharmony_ci{
612e1051a39Sopenharmony_ci    BN_ULONG c1, c2, c3;
613e1051a39Sopenharmony_ci
614e1051a39Sopenharmony_ci    c1 = 0;
615e1051a39Sopenharmony_ci    c2 = 0;
616e1051a39Sopenharmony_ci    c3 = 0;
617e1051a39Sopenharmony_ci    sqr_add_c(a, 0, c1, c2, c3);
618e1051a39Sopenharmony_ci    r[0] = c1;
619e1051a39Sopenharmony_ci    c1 = 0;
620e1051a39Sopenharmony_ci    sqr_add_c2(a, 1, 0, c2, c3, c1);
621e1051a39Sopenharmony_ci    r[1] = c2;
622e1051a39Sopenharmony_ci    c2 = 0;
623e1051a39Sopenharmony_ci    sqr_add_c(a, 1, c3, c1, c2);
624e1051a39Sopenharmony_ci    sqr_add_c2(a, 2, 0, c3, c1, c2);
625e1051a39Sopenharmony_ci    r[2] = c3;
626e1051a39Sopenharmony_ci    c3 = 0;
627e1051a39Sopenharmony_ci    sqr_add_c2(a, 3, 0, c1, c2, c3);
628e1051a39Sopenharmony_ci    sqr_add_c2(a, 2, 1, c1, c2, c3);
629e1051a39Sopenharmony_ci    r[3] = c1;
630e1051a39Sopenharmony_ci    c1 = 0;
631e1051a39Sopenharmony_ci    sqr_add_c(a, 2, c2, c3, c1);
632e1051a39Sopenharmony_ci    sqr_add_c2(a, 3, 1, c2, c3, c1);
633e1051a39Sopenharmony_ci    r[4] = c2;
634e1051a39Sopenharmony_ci    c2 = 0;
635e1051a39Sopenharmony_ci    sqr_add_c2(a, 3, 2, c3, c1, c2);
636e1051a39Sopenharmony_ci    r[5] = c3;
637e1051a39Sopenharmony_ci    c3 = 0;
638e1051a39Sopenharmony_ci    sqr_add_c(a, 3, c1, c2, c3);
639e1051a39Sopenharmony_ci    r[6] = c1;
640e1051a39Sopenharmony_ci    r[7] = c2;
641e1051a39Sopenharmony_ci}
642e1051a39Sopenharmony_ci#endif
643