1bf215546Sopenharmony_ci#!/bin/bash
2bf215546Sopenharmony_ci#
3bf215546Sopenharmony_ci# Test various instructions to check whether half<->full widening/narrowing
4bf215546Sopenharmony_ci# works.  The basic premise is to perform the same instruction with and
5bf215546Sopenharmony_ci# without the widening/narrowing folded in and check if the results match.
6bf215546Sopenharmony_ci#
7bf215546Sopenharmony_ci# Note this doesn't currently diferentiate between signed/unsigned/bool,
8bf215546Sopenharmony_ci# and just assumes int is signed (since unsigned is basically(ish) like
9bf215546Sopenharmony_ci# signed but without sign extension)
10bf215546Sopenharmony_ci#
11bf215546Sopenharmony_ci# TODO probably good pick numeric src values that are better at triggering
12bf215546Sopenharmony_ci# edge cases, while still not loosing precision in a full->half->full
13bf215546Sopenharmony_ci# seqeuence.. but some instructions like absneg don't even appear to be
14bf215546Sopenharmony_ci# subtlely wrong when you try to fold in a precision conversion.
15bf215546Sopenharmony_ci#
16bf215546Sopenharmony_ci# add '-v' arg to see the result values
17bf215546Sopenharmony_ci
18bf215546Sopenharmony_ciset -e
19bf215546Sopenharmony_ci
20bf215546Sopenharmony_ci#
21bf215546Sopenharmony_ci# Templates for float->float instructions:
22bf215546Sopenharmony_ci#
23bf215546Sopenharmony_cif2f_instrs=(
24bf215546Sopenharmony_ci	'add.f $dst, $src1, $src2'
25bf215546Sopenharmony_ci	'min.f $dst, $src1, $src2'
26bf215546Sopenharmony_ci	'min.f $dst, $src2, $src1'
27bf215546Sopenharmony_ci	'max.f $dst, $src1, $src2'
28bf215546Sopenharmony_ci	'max.f $dst, $src2, $src1'
29bf215546Sopenharmony_ci	'mul.f $dst, $src1, $src2'
30bf215546Sopenharmony_ci	'sign.f $dst, $src1'
31bf215546Sopenharmony_ci	'absneg.f $dst, \(neg\)$src1'
32bf215546Sopenharmony_ci	'absneg.f $dst, \(abs\)$src1'
33bf215546Sopenharmony_ci	'floor.f $dst, $src1'
34bf215546Sopenharmony_ci	'ceil.f $dst, $src1'
35bf215546Sopenharmony_ci	'rndne.f $dst, $src1'
36bf215546Sopenharmony_ci	'rndaz.f $dst, $src1'
37bf215546Sopenharmony_ci	'trunc.f $dst, $src1'
38bf215546Sopenharmony_ci)
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci#
41bf215546Sopenharmony_ci# Templates for float->int instructions:
42bf215546Sopenharmony_ci#
43bf215546Sopenharmony_cif2i_instrs=(
44bf215546Sopenharmony_ci	'cmps.f.gt $dst, $src1, $src2'
45bf215546Sopenharmony_ci	'cmps.f.lt $dst, $src1, $src2'
46bf215546Sopenharmony_ci	'cmpv.f.gt $dst, $src1, $src2'
47bf215546Sopenharmony_ci	'cmpv.f.lt $dst, $src1, $src2'
48bf215546Sopenharmony_ci)
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci#
51bf215546Sopenharmony_ci# Templates for int->int instructions:
52bf215546Sopenharmony_ci#
53bf215546Sopenharmony_cii2i_instrs=(
54bf215546Sopenharmony_ci	'add.u $dst, $src1, $src2'
55bf215546Sopenharmony_ci	'add.s $dst, $src1, $src2'
56bf215546Sopenharmony_ci	'sub.u $dst, $src1, $src2'
57bf215546Sopenharmony_ci	'sub.s $dst, $src1, $src2'
58bf215546Sopenharmony_ci	'cmps.f.gt $dst, $src1, $src2'
59bf215546Sopenharmony_ci	'cmps.f.lt $dst, $src1, $src2'
60bf215546Sopenharmony_ci	'min.u $dst, $src1, $src2'
61bf215546Sopenharmony_ci	'min.u $dst, $src2, $src1'
62bf215546Sopenharmony_ci	'min.s $dst, $src1, $src2'
63bf215546Sopenharmony_ci	'min.s $dst, $src2, $src1'
64bf215546Sopenharmony_ci	'max.u $dst, $src1, $src2'
65bf215546Sopenharmony_ci	'max.u $dst, $src2, $src1'
66bf215546Sopenharmony_ci	'max.s $dst, $src1, $src2'
67bf215546Sopenharmony_ci	'max.s $dst, $src2, $src1'
68bf215546Sopenharmony_ci	'absneg.s $dst, \(neg\)$src1'
69bf215546Sopenharmony_ci	'absneg.s $dst, \(abs\)$src1'
70bf215546Sopenharmony_ci	'and.b $dst, $src2, $src3'
71bf215546Sopenharmony_ci	'or.b $dst, $src1, $src2'
72bf215546Sopenharmony_ci	'not.b $dst, $src1'
73bf215546Sopenharmony_ci	'xor.b $dst, $src1, $src2'
74bf215546Sopenharmony_ci	'cmpv.u.gt $dst, $src1, $src2'
75bf215546Sopenharmony_ci	'cmpv.u.lt $dst, $src1, $src2'
76bf215546Sopenharmony_ci	'cmpv.s.gt $dst, $src1, $src2'
77bf215546Sopenharmony_ci	'cmpv.s.lt $dst, $src1, $src2'
78bf215546Sopenharmony_ci	'mul.u24 $dst, $src1, $src2'
79bf215546Sopenharmony_ci	'mul.s24 $dst, $src1, $src2'
80bf215546Sopenharmony_ci	'mull.u $dst, $src1, $src2'
81bf215546Sopenharmony_ci	'bfrev.b $dst, $src1'
82bf215546Sopenharmony_ci	'clz.s $dst, $src2'
83bf215546Sopenharmony_ci	'clz.b $dst, $src2'
84bf215546Sopenharmony_ci	'shl.b $dst, $src1, $src2'
85bf215546Sopenharmony_ci	'shr.b $dst, $src3, $src1'
86bf215546Sopenharmony_ci	'ashr.b $dst, $src3, $src1'
87bf215546Sopenharmony_ci	'mgen.b $dst, $src1, $src2'
88bf215546Sopenharmony_ci	'getbit.b $dst, $src3, $src2'
89bf215546Sopenharmony_ci	'setrm $dst, $src1'
90bf215546Sopenharmony_ci	'cbits.b $dst, $src3'
91bf215546Sopenharmony_ci	'shb $dst, $src1, $src2'
92bf215546Sopenharmony_ci	'msad $dst, $src1, $src2'
93bf215546Sopenharmony_ci)
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci#
96bf215546Sopenharmony_ci# Helper to expand instruction template:
97bf215546Sopenharmony_ci#
98bf215546Sopenharmony_ciexpand() {
99bf215546Sopenharmony_ci	instr=$1
100bf215546Sopenharmony_ci	dst=$2
101bf215546Sopenharmony_ci	src1=$3
102bf215546Sopenharmony_ci	src2=$4
103bf215546Sopenharmony_ci	src3=$5
104bf215546Sopenharmony_ci	eval echo $instr
105bf215546Sopenharmony_ci}
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ciexpand_test() {
108bf215546Sopenharmony_ci	instr=$1
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci	echo '; control, half->half:'
111bf215546Sopenharmony_ci	expand $instr "hr1.x" "hr0.x" "hr0.y" "hr0.z"
112bf215546Sopenharmony_ci	echo '; test, full->half:'
113bf215546Sopenharmony_ci	expand $instr "hr1.y" "r1.x" "r1.y" "r1.z"
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci	echo '; control, full->full:'
116bf215546Sopenharmony_ci	expand $instr "r2.x" "r1.x" "r1.y" "r1.z"
117bf215546Sopenharmony_ci	echo '; test, half->full:'
118bf215546Sopenharmony_ci	expand $instr "r2.y" "hr0.x" "hr0.y" "hr0.z"
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci	echo "(rpt5)nop"
121bf215546Sopenharmony_ci}
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci#
124bf215546Sopenharmony_ci# Helpers to construct test program assembly:
125bf215546Sopenharmony_ci#
126bf215546Sopenharmony_ciheader_asm() {
127bf215546Sopenharmony_ci	cat <<EOF
128bf215546Sopenharmony_ci@localsize 1, 1, 1
129bf215546Sopenharmony_ci@buf 4  ; g[0]
130bf215546Sopenharmony_ciEOF
131bf215546Sopenharmony_ci}
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_cifooter_asm() {
134bf215546Sopenharmony_ci	cat <<EOF
135bf215546Sopenharmony_ci; dest offsets:
136bf215546Sopenharmony_cimov.u32u32 r3.x, 0
137bf215546Sopenharmony_cimov.u32u32 r3.y, 1
138bf215546Sopenharmony_cimov.u32u32 r3.z, 2
139bf215546Sopenharmony_cimov.u32u32 r3.w, 3
140bf215546Sopenharmony_ci(rpt5)nop
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci; and store results:
143bf215546Sopenharmony_cistib.untyped.1d.u32.1 r2.x, r3.x, 0   ; control: full->full
144bf215546Sopenharmony_cistib.untyped.1d.u32.1 r2.y, r3.y, 0   ; test:    half->full
145bf215546Sopenharmony_cistib.untyped.1d.u32.1 r2.z, r3.z, 0   ; control: half->half
146bf215546Sopenharmony_cistib.untyped.1d.u32.1 r2.w, r3.w, 0   ; test:    full->half
147bf215546Sopenharmony_ci(sy)nop
148bf215546Sopenharmony_ciend
149bf215546Sopenharmony_ciEOF
150bf215546Sopenharmony_ci}
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_cisetup_asm_float() {
153bf215546Sopenharmony_ci	cat <<EOF
154bf215546Sopenharmony_ci; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst
155bf215546Sopenharmony_ci; r1->r2 avail for full, r1 for src, r2 for dst
156bf215546Sopenharmony_cicov.f32f16 hr0.x, (1.0)
157bf215546Sopenharmony_cicov.f32f16 hr0.y, (2.0)
158bf215546Sopenharmony_cicov.f32f16 hr0.z, (3.0)
159bf215546Sopenharmony_cimov.f32f32 r1.x,  (1.0)
160bf215546Sopenharmony_cimov.f32f32 r1.y,  (2.0)
161bf215546Sopenharmony_cimov.f32f32 r1.z,  (3.0)
162bf215546Sopenharmony_ci(rpt5)nop
163bf215546Sopenharmony_ciEOF
164bf215546Sopenharmony_ci}
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_cisetup_asm_int() {
167bf215546Sopenharmony_ci	cat <<EOF
168bf215546Sopenharmony_ci; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst
169bf215546Sopenharmony_ci; r1->r2 avail for full, r1 for src, r2 for dst
170bf215546Sopenharmony_cicov.s32s16 hr0.x,  1
171bf215546Sopenharmony_cicov.s32s16 hr0.y, -2
172bf215546Sopenharmony_cicov.s32s16 hr0.z,  3
173bf215546Sopenharmony_cimov.s32s32 r1.x,   1
174bf215546Sopenharmony_cimov.s32s32 r1.y,  -2
175bf215546Sopenharmony_cimov.s32s32 r1.z,   3
176bf215546Sopenharmony_ci(rpt5)nop
177bf215546Sopenharmony_ciEOF
178bf215546Sopenharmony_ci}
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_ci#
181bf215546Sopenharmony_ci# Generate assembly code to test float->float opcode
182bf215546Sopenharmony_ci#
183bf215546Sopenharmony_cif2f_asm() {
184bf215546Sopenharmony_ci	instr=$1
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci	header_asm
187bf215546Sopenharmony_ci	setup_asm_float
188bf215546Sopenharmony_ci	expand_test $instr
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci	cat <<EOF
191bf215546Sopenharmony_ci; convert half results back to full:
192bf215546Sopenharmony_cicov.f16f32 r2.z, hr1.x
193bf215546Sopenharmony_cicov.f16f32 r2.w, hr1.y
194bf215546Sopenharmony_ciEOF
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci	footer_asm
197bf215546Sopenharmony_ci}
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci#
200bf215546Sopenharmony_ci# Generate assembly code to test float->int opcode
201bf215546Sopenharmony_ci#
202bf215546Sopenharmony_cif2i_asm() {
203bf215546Sopenharmony_ci	instr=$1
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci	header_asm
206bf215546Sopenharmony_ci	setup_asm_float
207bf215546Sopenharmony_ci	expand_test $instr
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci	cat <<EOF
210bf215546Sopenharmony_ci; convert half results back to full:
211bf215546Sopenharmony_cicov.s16s32 r2.z, hr1.x
212bf215546Sopenharmony_cicov.s16s32 r2.w, hr1.y
213bf215546Sopenharmony_ciEOF
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci	footer_asm
216bf215546Sopenharmony_ci}
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci#
219bf215546Sopenharmony_ci# Generate assembly code to test int->int opcode
220bf215546Sopenharmony_ci#
221bf215546Sopenharmony_cii2i_asm() {
222bf215546Sopenharmony_ci	instr=$1
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci	header_asm
225bf215546Sopenharmony_ci	setup_asm_int
226bf215546Sopenharmony_ci	expand_test $instr
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci	cat <<EOF
229bf215546Sopenharmony_ci; convert half results back to full:
230bf215546Sopenharmony_cicov.s16s32 r2.z, hr1.x
231bf215546Sopenharmony_cicov.s16s32 r2.w, hr1.y
232bf215546Sopenharmony_ciEOF
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_ci	footer_asm
235bf215546Sopenharmony_ci}
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci#
239bf215546Sopenharmony_ci# Helper to parse computerator output and print results:
240bf215546Sopenharmony_ci#
241bf215546Sopenharmony_cicheck_results() {
242bf215546Sopenharmony_ci	str=`cat - | grep "	" | head -1 | xargs`
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci	if [ "$verbose" = "true" ]; then
245bf215546Sopenharmony_ci		echo $str
246bf215546Sopenharmony_ci	fi
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci	# Split components of result buffer:
249bf215546Sopenharmony_ci	cf=$(echo $str | cut -f1 -d' ')
250bf215546Sopenharmony_ci	tf=$(echo $str | cut -f2 -d' ')
251bf215546Sopenharmony_ci	ch=$(echo $str | cut -f3 -d' ')
252bf215546Sopenharmony_ci	th=$(echo $str | cut -f4 -d' ')
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci	# Sanity test, make sure the control results match:
255bf215546Sopenharmony_ci	if [ $cf != $ch ]; then
256bf215546Sopenharmony_ci		echo "    FAIL: control results do not match!  Half vs full op is not equivalent!"
257bf215546Sopenharmony_ci		echo "    full=$cf half=$ch"
258bf215546Sopenharmony_ci	fi
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci	# Compare test (with conversion folded) to control:
261bf215546Sopenharmony_ci	if [ $cf != $tf ]; then
262bf215546Sopenharmony_ci		echo "    FAIL: half -> full widening result does not match control!"
263bf215546Sopenharmony_ci		echo "    control=$cf result=$tf"
264bf215546Sopenharmony_ci	fi
265bf215546Sopenharmony_ci	if [ $ch != $th ]; then
266bf215546Sopenharmony_ci		echo "    FAIL: full -> half narrowing result does not match control!"
267bf215546Sopenharmony_ci		echo "    control=$ch result=$th"
268bf215546Sopenharmony_ci	fi
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci	# HACK without a delay different invocations
271bf215546Sopenharmony_ci	# of computerator seem to somehow clobber each
272bf215546Sopenharmony_ci	# other.. which isn't great..
273bf215546Sopenharmony_ci	sleep 0.1
274bf215546Sopenharmony_ci}
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci#
277bf215546Sopenharmony_ci# Run the tests!
278bf215546Sopenharmony_ci#
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ciif [ "$1" = "-v" ]; then
281bf215546Sopenharmony_ci	verbose="true"
282bf215546Sopenharmony_cifi
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ciIFS=""
285bf215546Sopenharmony_cifor instr in ${f2f_instrs[@]}; do
286bf215546Sopenharmony_ci	echo "TEST: $instr"
287bf215546Sopenharmony_ci	f2f_asm $instr | ./computerator -g 1,1,1 | check_results
288bf215546Sopenharmony_cidone
289bf215546Sopenharmony_cifor instr in ${f2i_instrs[@]}; do
290bf215546Sopenharmony_ci	echo "TEST: $instr"
291bf215546Sopenharmony_ci	f2i_asm $instr | ./computerator -g 1,1,1 | check_results
292bf215546Sopenharmony_cidone
293bf215546Sopenharmony_cifor instr in ${i2i_instrs[@]}; do
294bf215546Sopenharmony_ci	echo "TEST: $instr"
295bf215546Sopenharmony_ci	i2i_asm $instr | ./computerator -g 1,1,1 | check_results
296bf215546Sopenharmony_cidone
297bf215546Sopenharmony_ci
298