1bf215546Sopenharmony_ci#!/bin/bash
2bf215546Sopenharmony_ci
3bf215546Sopenharmony_ci# This tests for the size of the register file. We do this by launching a
4bf215546Sopenharmony_ci# lot of workgroups with only one invocation, which causes the GPU to be
5bf215546Sopenharmony_ci# saturated with in-flight waves. Each thread records its wave id using "getwid"
6bf215546Sopenharmony_ci# (only available in a6xx+!) and stores it in the buffer. We then vary the
7bf215546Sopenharmony_ci# register footprint by introducing uses of higher and higher registers. This
8bf215546Sopenharmony_ci# lets us determine:
9bf215546Sopenharmony_ci# 1. The total number of waves available (always 16 for known models)
10bf215546Sopenharmony_ci# 2. The wave granularity (how many waves are always launched together, always 2
11bf215546Sopenharmony_ci# for known models).
12bf215546Sopenharmony_ci# 3. The total size of the register file that is divvied up between the waves.
13bf215546Sopenharmony_ci
14bf215546Sopenharmony_ciset -e
15bf215546Sopenharmony_ci
16bf215546Sopenharmony_cigen_shader() {
17bf215546Sopenharmony_ci	n=$1;
18bf215546Sopenharmony_ci	cat <<EOF
19bf215546Sopenharmony_ci@localsize 1, 1, 1
20bf215546Sopenharmony_ci@buf 128  ; g[0]
21bf215546Sopenharmony_ci@wgid(r48.x)
22bf215546Sopenharmony_cigetwid.u32 r1.x
23bf215546Sopenharmony_cimov.u32u32 r0.x, r48.x
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci; busy loop to make sure it actually uses all possible waves
26bf215546Sopenharmony_cimov.u32u32 r0.y, 16
27bf215546Sopenharmony_ci(rpt2)nop
28bf215546Sopenharmony_ciloop:
29bf215546Sopenharmony_cicmps.u.gt p0.x, r0.y, 0
30bf215546Sopenharmony_cisub.u r0.y, r0.y, 1
31bf215546Sopenharmony_ci(rpt5)nop
32bf215546Sopenharmony_cibr p0.x, #loop
33bf215546Sopenharmony_ciadd.f r1.y, r1.x, r$n.w
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_ci(ss)(sy)(rpt5)nop
36bf215546Sopenharmony_cistib.b.untyped.1d.u32.1.imm r1.x, r0.x, 0
37bf215546Sopenharmony_ciend
38bf215546Sopenharmony_cinop
39bf215546Sopenharmony_ciEOF
40bf215546Sopenharmony_ci}
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ci# generate reference:
43bf215546Sopenharmony_cigen_shader 1 | ./computerator -g 128,1,1 | tee reference.log
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_cifor n in `seq 2 32`; do
46bf215546Sopenharmony_ci	echo "Trying max reg: r$n"
47bf215546Sopenharmony_ci	gen_shader $n | ./computerator -g 128,1,1 | tee new.log
48bf215546Sopenharmony_ci	diff reference.log new.log
49bf215546Sopenharmony_ci	if [ "$?" != "0" ]; then
50bf215546Sopenharmony_ci		echo "Changes at r$n"
51bf215546Sopenharmony_ci		break
52bf215546Sopenharmony_ci	fi
53bf215546Sopenharmony_cidone
54