xref: /third_party/mesa3d/src/panfrost/util/lcra.c (revision bf215546)
1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2019 Collabora, Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors (Collabora):
24bf215546Sopenharmony_ci *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include <stdio.h>
28bf215546Sopenharmony_ci#include <assert.h>
29bf215546Sopenharmony_ci#include <stdlib.h>
30bf215546Sopenharmony_ci#include <string.h>
31bf215546Sopenharmony_ci#include <limits.h>
32bf215546Sopenharmony_ci#include "util/macros.h"
33bf215546Sopenharmony_ci#include "util/u_math.h"
34bf215546Sopenharmony_ci#include "lcra.h"
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci/* This module is the reference implementation of "Linearly Constrained
37bf215546Sopenharmony_ci * Register Allocation". The paper is available in PDF form
38bf215546Sopenharmony_ci * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
39bf215546Sopenharmony_ci * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
40bf215546Sopenharmony_ci */
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_cistruct lcra_state *
43bf215546Sopenharmony_cilcra_alloc_equations(
44bf215546Sopenharmony_ci                unsigned node_count, unsigned class_count)
45bf215546Sopenharmony_ci{
46bf215546Sopenharmony_ci        struct lcra_state *l = calloc(1, sizeof(*l));
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci        l->node_count = node_count;
49bf215546Sopenharmony_ci        l->class_count = class_count;
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci        l->alignment = calloc(sizeof(l->alignment[0]), node_count);
52bf215546Sopenharmony_ci        l->linear = calloc(sizeof(l->linear[0]), node_count * node_count);
53bf215546Sopenharmony_ci        l->modulus = calloc(sizeof(l->modulus[0]), node_count);
54bf215546Sopenharmony_ci        l->class = calloc(sizeof(l->class[0]), node_count);
55bf215546Sopenharmony_ci        l->class_start = calloc(sizeof(l->class_start[0]), class_count);
56bf215546Sopenharmony_ci        l->class_disjoint = calloc(sizeof(l->class_disjoint[0]), class_count * class_count);
57bf215546Sopenharmony_ci        l->class_size = calloc(sizeof(l->class_size[0]), class_count);
58bf215546Sopenharmony_ci        l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count);
59bf215546Sopenharmony_ci        l->solutions = calloc(sizeof(l->solutions[0]), node_count);
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci        memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci        return l;
64bf215546Sopenharmony_ci}
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_civoid
67bf215546Sopenharmony_cilcra_free(struct lcra_state *l)
68bf215546Sopenharmony_ci{
69bf215546Sopenharmony_ci        if (!l)
70bf215546Sopenharmony_ci                return;
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci        free(l->alignment);
73bf215546Sopenharmony_ci        free(l->linear);
74bf215546Sopenharmony_ci        free(l->modulus);
75bf215546Sopenharmony_ci        free(l->class);
76bf215546Sopenharmony_ci        free(l->class_start);
77bf215546Sopenharmony_ci        free(l->class_disjoint);
78bf215546Sopenharmony_ci        free(l->class_size);
79bf215546Sopenharmony_ci        free(l->spill_cost);
80bf215546Sopenharmony_ci        free(l->solutions);
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci        free(l);
83bf215546Sopenharmony_ci}
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_civoid
86bf215546Sopenharmony_cilcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, unsigned bound)
87bf215546Sopenharmony_ci{
88bf215546Sopenharmony_ci        l->alignment[node] = (align_log2 + 1) | (bound << 16);
89bf215546Sopenharmony_ci}
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_civoid
92bf215546Sopenharmony_cilcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2)
93bf215546Sopenharmony_ci{
94bf215546Sopenharmony_ci        l->class_disjoint[(c1 * l->class_count) + c2] = true;
95bf215546Sopenharmony_ci        l->class_disjoint[(c2 * l->class_count) + c1] = true;
96bf215546Sopenharmony_ci}
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_civoid
99bf215546Sopenharmony_cilcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len)
100bf215546Sopenharmony_ci{
101bf215546Sopenharmony_ci        if (node < l->node_count && l->alignment[node]) {
102bf215546Sopenharmony_ci                unsigned BA = l->alignment[node];
103bf215546Sopenharmony_ci                unsigned alignment = (BA & 0xffff) - 1;
104bf215546Sopenharmony_ci                unsigned bound = BA >> 16;
105bf215546Sopenharmony_ci                l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment);
106bf215546Sopenharmony_ci        }
107bf215546Sopenharmony_ci}
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_civoid
110bf215546Sopenharmony_cilcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j)
111bf215546Sopenharmony_ci{
112bf215546Sopenharmony_ci        if (i == j)
113bf215546Sopenharmony_ci                return;
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci        if (l->class_disjoint[(l->class[i] * l->class_count) + l->class[j]])
116bf215546Sopenharmony_ci                return;
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci        uint32_t constraint_fw = 0;
119bf215546Sopenharmony_ci        uint32_t constraint_bw = 0;
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci        for (unsigned D = 0; D < 16; ++D) {
122bf215546Sopenharmony_ci                if (cmask_i & (cmask_j << D)) {
123bf215546Sopenharmony_ci                        constraint_bw |= (1 << (15 + D));
124bf215546Sopenharmony_ci                        constraint_fw |= (1 << (15 - D));
125bf215546Sopenharmony_ci                }
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci                if (cmask_i & (cmask_j >> D)) {
128bf215546Sopenharmony_ci                        constraint_fw |= (1 << (15 + D));
129bf215546Sopenharmony_ci                        constraint_bw |= (1 << (15 - D));
130bf215546Sopenharmony_ci                }
131bf215546Sopenharmony_ci        }
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci        l->linear[j * l->node_count + i] |= constraint_fw;
134bf215546Sopenharmony_ci        l->linear[i * l->node_count + j] |= constraint_bw;
135bf215546Sopenharmony_ci}
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_cistatic bool
138bf215546Sopenharmony_cilcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
139bf215546Sopenharmony_ci{
140bf215546Sopenharmony_ci        unsigned *row = &l->linear[i * l->node_count];
141bf215546Sopenharmony_ci        signed constant = solutions[i];
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci        for (unsigned j = 0; j < l->node_count; ++j) {
144bf215546Sopenharmony_ci                if (solutions[j] == ~0) continue;
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci                signed lhs = solutions[j] - constant;
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci                if (lhs < -15 || lhs > 15)
149bf215546Sopenharmony_ci                        continue;
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci                if (row[j] & (1 << (lhs + 15)))
152bf215546Sopenharmony_ci                        return false;
153bf215546Sopenharmony_ci        }
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci        return true;
156bf215546Sopenharmony_ci}
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_cibool
159bf215546Sopenharmony_cilcra_solve(struct lcra_state *l)
160bf215546Sopenharmony_ci{
161bf215546Sopenharmony_ci        for (unsigned step = 0; step < l->node_count; ++step) {
162bf215546Sopenharmony_ci                if (l->solutions[step] != ~0) continue;
163bf215546Sopenharmony_ci                if (l->alignment[step] == 0) continue;
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci                unsigned _class = l->class[step];
166bf215546Sopenharmony_ci                unsigned class_start = l->class_start[_class];
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci                unsigned BA = l->alignment[step];
169bf215546Sopenharmony_ci                unsigned shift = (BA & 0xffff) - 1;
170bf215546Sopenharmony_ci                unsigned bound = BA >> 16;
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci                unsigned P = bound >> shift;
173bf215546Sopenharmony_ci                unsigned Q = l->modulus[step];
174bf215546Sopenharmony_ci                unsigned r_max = l->class_size[_class];
175bf215546Sopenharmony_ci                unsigned k_max = r_max >> shift;
176bf215546Sopenharmony_ci                unsigned m_max = k_max / P;
177bf215546Sopenharmony_ci                bool succ = false;
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci                for (unsigned m = 0; m < m_max; ++m) {
180bf215546Sopenharmony_ci                        for (unsigned n = 0; n < Q; ++n) {
181bf215546Sopenharmony_ci                                l->solutions[step] = ((m * P + n) << shift) + class_start;
182bf215546Sopenharmony_ci                                succ = lcra_test_linear(l, l->solutions, step);
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci                                if (succ) break;
185bf215546Sopenharmony_ci                        }
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci                        if (succ) break;
188bf215546Sopenharmony_ci                }
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci                /* Out of registers - prepare to spill */
191bf215546Sopenharmony_ci                if (!succ) {
192bf215546Sopenharmony_ci                        l->spill_class = l->class[step];
193bf215546Sopenharmony_ci                        return false;
194bf215546Sopenharmony_ci                }
195bf215546Sopenharmony_ci        }
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci        return true;
198bf215546Sopenharmony_ci}
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci/* Register spilling is implemented with a cost-benefit system. Costs are set
201bf215546Sopenharmony_ci * by the user. Benefits are calculated from the constraints. */
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_civoid
204bf215546Sopenharmony_cilcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost)
205bf215546Sopenharmony_ci{
206bf215546Sopenharmony_ci        if (node < l->node_count)
207bf215546Sopenharmony_ci                l->spill_cost[node] = cost;
208bf215546Sopenharmony_ci}
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_cistatic unsigned
211bf215546Sopenharmony_cilcra_count_constraints(struct lcra_state *l, unsigned i)
212bf215546Sopenharmony_ci{
213bf215546Sopenharmony_ci        unsigned count = 0;
214bf215546Sopenharmony_ci        unsigned *constraints = &l->linear[i * l->node_count];
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci        for (unsigned j = 0; j < l->node_count; ++j)
217bf215546Sopenharmony_ci                count += util_bitcount(constraints[j]);
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci        return count;
220bf215546Sopenharmony_ci}
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_cisigned
223bf215546Sopenharmony_cilcra_get_best_spill_node(struct lcra_state *l)
224bf215546Sopenharmony_ci{
225bf215546Sopenharmony_ci        /* If there are no constraints on a node, do not pick it to spill under
226bf215546Sopenharmony_ci         * any circumstance, or else we would hang rather than fail RA */
227bf215546Sopenharmony_ci        float best_benefit = 0.0;
228bf215546Sopenharmony_ci        signed best_node = -1;
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci        for (unsigned i = 0; i < l->node_count; ++i) {
231bf215546Sopenharmony_ci                /* Find spillable nodes */
232bf215546Sopenharmony_ci                if (l->class[i] != l->spill_class) continue;
233bf215546Sopenharmony_ci                if (l->spill_cost[i] < 0) continue;
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci                /* Adapted from Chaitin's heuristic */
236bf215546Sopenharmony_ci                float constraints = lcra_count_constraints(l, i);
237bf215546Sopenharmony_ci                float cost = (l->spill_cost[i] + 1);
238bf215546Sopenharmony_ci                float benefit = constraints / cost;
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci                if (benefit > best_benefit) {
241bf215546Sopenharmony_ci                        best_benefit = benefit;
242bf215546Sopenharmony_ci                        best_node = i;
243bf215546Sopenharmony_ci                }
244bf215546Sopenharmony_ci        }
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci        return best_node;
247bf215546Sopenharmony_ci}
248