1# Copyright (c) 2021-2022 Huawei Device Co., Ltd.
2# Licensed under the Apache License, Version 2.0 (the "License");
3# you may not use this file except in compliance with the License.
4# You may obtain a copy of the License at
5#
6# http://www.apache.org/licenses/LICENSE-2.0
7#
8# Unless required by applicable law or agreed to in writing, software
9# distributed under the License is distributed on an "AS IS" BASIS,
10# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11# See the License for the specific language governing permissions and
12# limitations under the License.
13
14definitions: []
15tests:
16  - file-name: "fp_calc"
17    isa:
18      title: Two address floating-point binary operation on accumulator
19      description: >
20        Perform specified floating-point binary operation on accumulator and register and store result into accumulator.
21        The results of instructions correspond IEEE-754 arithmetic rules.
22      exceptions:
23          - x_none
24    commands:
25      - file-name: "sin_cheb7"
26        isa:
27          instructions:
28            - sig: fadd2.64 v:in:f64
29              acc: inout:f64
30              format: [op_v_8]
31            - sig: fmul2.64 v:in:f64
32              acc: inout:f64
33              format: [op_v_8]
34        check-type: empty
35        tags: ['tsan', 'irtoc_ignore']
36        code-template: |
37          #
38              fmovi.64 v0, %s
39              call.short get_approx, v0
40
41              sta.64 v0
42              fldai.64 %s
43              fcmpg.64 v0
44              return
45          }
46
47          .function  f64 pow (i32 a0, f64 a1) {
48              mov.64 v2, a1  # v2 - value for calculation
49              fmovi.64 v1, 1.0 # v1 - initial value
50              mov v3, a0 # v3 - iteration counter
51          pow_iter:
52              ldai 0
53              jeq v3, exit
54              lda.64 v1
55              fmul2.64 v2
56              sta.64 v1
57              inci v3, -1
58              jmp pow_iter
59          exit:
60              lda.64 v1
61              return.64
62          }
63
64          .function f64 get_approx(f64 a0) {
65              # sin(PI * x / 2) approximation using polynom A1*x + A3*x^3 + A5*x^5 + A7*x^7
66              # Where A[i] is coefficients of Chebyshev polynomial approximation of sin(PI * x / 2) on [-1; 1]
67              fmovi.64 v0, 0.0 # Results
68              mov.64 v1, a0 # v1 = x
69              lda.64 a0 # x
70              fmovi.64 v2, 1.5707909877360093 # A1 coefficient
71              fmul2.64 v2 # x*A1
72              sta.64 v0 # v0 = A1 * x
73              movi v2, 3
74              call.short pow, v2, v1 # acc = x^3
75              fmovi.64 v2, -0.6458926627154391 # A3
76              fmul2.64 v2 # acc = A3 * x^3
77              fadd2.64 v0
78              sta.64 v0 # v0 = A1*x + A3*x^3
79              movi v2,5
80              call.short pow, v2, v1 # acc = x^5
81              fmovi.64 v2, 0.07943397081887174
82              fmul2.64 v2 # A5 * x^5
83              fadd2.64 v0
84              sta.64 v0 # v0 = A1*x + A3*x^3 + A5*x^5
85              movi v2,7
86              call.short pow, v2, v1 # acc = x^7
87              fmovi.64 v2, -0.0043328816450923634
88              fmul2.64 v2
89              fadd2.64 v0 #  acc = A1*x + A3*x^3 + A5*x^5 + A7*x^7
90              return.64
91          }
92        cases:
93          - values:
94              - "0.0"
95              - "0.0"
96          - values:
97              - "0.01"
98              - "0.015707263992640733"
99          - values:
100              - "0.1"
101              - "0.1564340000173055"
102          - values:
103              - "0.123456789"
104              - "0.19271172682380572"
105          - values:
106              - "0.14"
107              - "0.21814267639869997"
108          - values:
109              - "0.4"
110              - "0.5877855695485135"
111          - values:
112              - "0.7"
113              - "0.8910061441444742"
114          - values:
115              - "0.9"
116              - "0.9876886994127743"
117          - values:
118              - "0.99"
119              - "0.9998764601533426"
120          - values:
121              - "1.0"
122              - "0.9999994141943497"
123          - values:
124              - "-0.0"
125              - "0.0"
126          - values:
127              - "-0.01"
128              - "-0.015707263992640733"
129          - values:
130              - "-0.1"
131              - "-0.1564340000173055"
132          - values:
133              - "-0.123456789"
134              - "-0.19271172682380572"
135          - values:
136              - "-0.14"
137              - "-0.21814267639869997"
138          - values:
139              - "-0.4"
140              - "-0.5877855695485135"
141          - values:
142              - "-0.7"
143              - "-0.8910061441444742"
144          - values:
145              - "-0.9"
146              - "-0.9876886994127743"
147          - values:
148              - "-0.99"
149              - "-0.9998764601533426"
150          - values:
151              - "-1.0"
152              - "-0.9999994141943497"
153
154      - file-name: "cheb3"
155        isa:
156          instructions:
157            - sig: fadd2.64 v:in:f64
158              acc: inout:f64
159              format: [op_v_8]
160            - sig: fmul2.64 v:in:f64
161              acc: inout:f64
162              format: [op_v_8]
163        tags: ['tsan', 'irtoc_ignore']
164        check-type: empty
165        code-template: |
166          #
167              fmovi.64 v0, %s
168              call.short get_value, v0
169              sta.64 v0
170              fldai.64 %s
171              fcmpg.64 v0
172              return
173          }
174
175          .function f64 get_value(f64 a0) {
176
177              # Calculate Chebyshev polynom value iteratively
178              # Since on [-1; 1] result value is limited by [-1; 1] iteration is possible
179              movi v0, 100
180              mov.64 v1, a0
181          loop:
182              ldai 0
183              jeq  v0, exit
184
185              lda.64 v1
186              fmul2.64 v1
187              fmul2.64 v1
188              fmovi.64 v2, 4.0
189              fmul2.64 v2 # 4 * x^3
190
191              sta.64 v3
192
193              lda.64 v1
194              fmovi.64 v2, -3.0
195              fmul2.64 v2
196              fadd2.64 v3
197              sta.64 v1
198
199           #  4*x*x*x - 3*x
200
201              inci v0, -1
202              jmp loop
203          exit:
204              lda.64 v1
205              return.64
206          }
207        cases:
208          - values:
209              - "0.0"
210              - "0.0"
211          - values:
212              - "0.3333333333333333"
213              - "0.9711499729865962"
214          - values:
215              - "0.9"
216              - "0.6562849760306007"
217          - values:
218              - "0.12"
219              - "0.7565938935844838"
220          - values:
221              - "0.55"
222              - "-0.6575967480875675"
223          - values:
224              - "0.5"
225              - "-1.0"
226          - values:
227              - "0.66666"
228              - "0.9388838883478421"
229          - values:
230              - "0.87654321"
231              - "0.02200409867255329"
232          - values:
233              - "1.0"
234              - "1.0"
235          - values:
236              - "-0.0"
237              - "0.0"
238          - values:
239              - "-0.3333333333333333"
240              - "-0.9711499729865962"
241          - values:
242              - "-0.9"
243              - "-0.6562849760306007"
244          - values:
245              - "-0.12"
246              - "-0.7565938935844838"
247          - values:
248              - "-0.55"
249              - "0.6575967480875675"
250          - values:
251              - "-0.5"
252              - "1.0"
253          - values:
254              - "-0.66666"
255              - "-0.9388838883478421"
256          - values:
257              - "-0.87654321"
258              - "-0.02200409867255329"
259          - values:
260              - "-1.0"
261              - "-1.0"
262
263      - file-name: "cheb11"
264        isa:
265          instructions:
266            - sig: fadd2.64 v:in:f64
267              acc: inout:f64
268              format: [op_v_8]
269            - sig: fmul2.64 v:in:f64
270              acc: inout:f64
271              format: [op_v_8]
272        tags: ['tsan', 'irtoc_ignore']
273        check-type: empty
274        code-template: |
275          #
276              fmovi.64 v0, %s
277              call.short get_value, v0
278              sta.64 v0
279              fldai.64 %s
280              fcmpg.64 v0
281              return
282          }
283
284          .function f64 pow_mul(i32 a0, f64 a1, f64 a2) {
285              mov v0, a0
286              mov.64 v1, a1
287              mov.64 v2, a2
288              call.short pow, v0, v1
289              fmul2.64 v2
290              return.64
291          }
292
293          .function  f64 pow (i32 a0, f64 a1) {
294              mov.64 v2, a1  # v2 - value for calculation
295              fmovi.64 v1, 1.0 # v1 - initial value
296              mov v3, a0 # v3 - iteration counter
297          pow_iter:
298              ldai 0
299              jeq v3, exit
300              lda.64 v1
301              fmul2.64 v2
302              sta.64 v1
303              inci v3, -1
304              jmp pow_iter
305          exit:
306              lda.64 v1
307              return.64
308          }
309
310          .function f64 get_value(f64 a0) {
311
312              # Calculate Chebyshev polynom value iteratively
313              # x = 1024 * Math.pow(x, 11) - 2816 * Math.pow(x,9) + 2816 * Math.pow(x,7) - 1232 * Math.pow(x,5) + 220*Math.pow(x,3) - 11 *x;
314              # Since on [-1; 1] result value is limited by [-1; 1] iteration is possible
315              # Repeat 100 times
316
317              movi v0, 100
318              mov.64 v4, a0
319
320          loop:
321              mov.64 v1, v4
322              ldai 0
323              jeq  v0, exit
324
325              movi v2, 11
326              fmovi.64 v3, 1024.0
327              call pow_mul, v2, v1, v3 # acc = v1^11*1024
328              sta.64 v4
329
330              movi v2, 9
331              fmovi.64 v3, -2816.0
332              call pow_mul, v2, v1, v3 # acc = -v1^9*2816
333              fadd2.64 v4
334              sta.64 v4
335
336              movi v2, 7
337              fmovi.64 v3, 2816.0
338              call pow_mul, v2, v1, v3 # acc = v1^7*2816
339              fadd2.64 v4
340              sta.64 v4
341
342              movi v2, 5
343              fmovi.64 v3, -1232.0
344              call pow_mul, v2, v1, v3 # acc = -v1^5*1232
345              fadd2.64 v4
346              sta.64 v4
347
348              movi v2, 3
349              fmovi.64 v3, 220.0
350              call pow_mul, v2, v1, v3 # acc = v1^3*220
351              fadd2.64 v4
352              sta.64 v4
353
354              movi v2, 1
355              fmovi.64 v3, -11.0
356              call pow_mul, v2, v1, v3 # acc = -v1^1*11
357              fadd2.64 v4
358              sta.64 v4
359
360              inci v0, -1
361              jmp loop
362          exit:
363              lda.64 v4
364              return.64
365          }
366        cases:
367        - values:
368              - "0.0"
369              - "0.0"
370        - values:
371              - "0.3333333333333333"
372              - "0.7868241150060133"
373        - values:
374              - "0.9"
375              - "-0.22919667772362332"
376        - values:
377              - "0.12"
378              - "0.05719322440846497"
379        - values:
380              - "0.55"
381              - "-0.7222928963047401"
382        - values:
383              - "0.5"
384              - "0.5"
385        - values:
386              - "0.66666"
387              - "-0.9376142114702866"
388        - values:
389              - "0.87654321"
390              - "-0.2508041890501467"
391        - values:
392              - "1.0"
393              - "1.0"
394        - values:
395            - "-0.0"
396            - "0.0"
397        - values:
398            - "-0.3333333333333333"
399            - "-0.7868241150060133"
400        - values:
401            - "-0.9"
402            - "0.22919667772362332"
403        - values:
404            - "-0.12"
405            - "-0.05719322440846497"
406        - values:
407            - "-0.55"
408            - "0.7222928963047401"
409        - values:
410            - "-0.5"
411            - "-0.5"
412        - values:
413            - "-0.66666"
414            - "0.9376142114702866"
415        - values:
416            - "-0.87654321"
417            - "0.2508041890501467"
418        - values:
419            - "-1.0"
420            - "-1.0"
421
422      - file-name: "sin_taylor"
423        isa:
424          instructions:
425            - sig: fadd2.64 v:in:f64
426              acc: inout:f64
427              format: [op_v_8]
428            - sig: fmul2.64 v:in:f64
429              acc: inout:f64
430              format: [op_v_8]
431            - sig: fdiv2.64 v:in:f64
432              acc: inout:f64
433              format: [op_v_8]
434        tags: ['tsan', 'irtoc_ignore']
435        check-type: empty
436        code-template: |
437          #   Calculate sin(x) using Taylor series for small x
438          #   Check relative deviation
439              fmovi.64 v0, %s
440              call.short get_value, v0
441              sta.64 v0
442              fldai.64 %s
443              fsub2.64 v0
444              fdiv2.64 v0
445              sta.64 v0
446              fldai.64 0.0
447              fcmpg.64 v0
448              jeqz l1
449              lda.64 v0
450              fneg.64
451              sta.64 v0
452          l1:
453              fldai.64 0.0001
454              fcmpg.64 v0
455              jltz err
456
457              ldai 0
458              return
459          err:
460              ldai 1
461              return
462          }
463
464          .function f64 fact(i32 a0) {
465              mov v0, a0
466              fmovi.64 v1, 1.0
467          loop:
468              ldai 1
469              jeq v0, exit
470
471              lda v0
472              i32tof64
473              fmul2.64 v1
474              sta.64 v1
475              inci v0, -1
476              jmp loop
477
478          exit:
479              lda.64 v1
480              return.64
481          }
482
483          .function f64 pow_div(i32 a0, f64 a1, f64 a2) {
484              mov v0, a0
485              mov.64 v1, a1
486              mov.64 v2, a2
487              call.short pow, v0, v1
488              fdiv2.64 v2
489              return.64
490          }
491
492          .function  f64 pow (i32 a0, f64 a1) {
493              mov.64 v2, a1  # v2 - value for calculation
494              fmovi.64 v1, 1.0 # v1 - initial value
495              mov v3, a0 # v3 - iteration counter
496          pow_iter:
497              ldai 0
498              jeq v3, exit
499              lda.64 v1
500              fmul2.64 v2
501              sta.64 v1
502              inci v3, -1
503              jmp pow_iter
504          exit:
505              lda.64 v1
506              return.64
507          }
508
509          .function f64 get_value(f64 a0) {
510
511              # Taylor series for sin(x)
512              movi v0, 1 # n
513              fmovi.64 v1, 1.0 # sign
514              fmovi.64 v2, 0.0 # result
515              mov.64 v3, a0
516
517          loop:
518
519              ldai 20
520              jlt  v0, exit
521
522
523              call fact, v0 # acc = v0!
524              sta.64 v4 # v4 = n!
525
526              call pow_div, v0, v3, v4 # x^n/n!
527              fmul2.64 v1 # *sign
528              fadd2.64 v2
529              sta.64 v2 # result
530
531              fldai.64 -1.0
532              fmul2.64 v1 # flip sign
533              sta.64 v1
534
535              inci v0, 2
536              jmp loop
537
538          exit:
539              lda.64 v2
540              return.64
541          }
542        cases:
543          - values:
544              - "1.0e-4"
545              - "9.999999950000001e-5"
546          - values:
547              - "0.001"
548              - "9.999995000000417e-4"
549          - values:
550              - "0.0015"
551              - "0.0014999983125003164"
552          - values:
553              - "0.005"
554              - "0.004999937500130208"
555          - values:
556              - "0.01"
557              - "0.009999500004166653"
558          - values:
559              - "0.0123456789"
560              - "0.012344738073763926"
561          - values:
562              - "0.015"
563              - "0.014998312531640386"
564          - values:
565              - "-1.0e-4"
566              - "-9.999999950000001e-5"
567          - values:
568              - "-0.001"
569              - "-9.999995000000417e-4"
570          - values:
571              - "-0.0015"
572              - "-0.0014999983125003164"
573          - values:
574              - "-0.005"
575              - "-0.004999937500130208"
576          - values:
577              - "-0.01"
578              - "-0.009999500004166653"
579          - values:
580              - "-0.0123456789"
581              - "-0.012344738073763926"
582          - values:
583              - "-0.015"
584              - "-0.014998312531640386"
585