1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2019 Collabora, Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors (Collabora): 24bf215546Sopenharmony_ci * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include <stdio.h> 28bf215546Sopenharmony_ci#include <assert.h> 29bf215546Sopenharmony_ci#include <stdlib.h> 30bf215546Sopenharmony_ci#include <string.h> 31bf215546Sopenharmony_ci#include <limits.h> 32bf215546Sopenharmony_ci#include "util/macros.h" 33bf215546Sopenharmony_ci#include "util/u_math.h" 34bf215546Sopenharmony_ci#include "lcra.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci/* This module is the reference implementation of "Linearly Constrained 37bf215546Sopenharmony_ci * Register Allocation". The paper is available in PDF form 38bf215546Sopenharmony_ci * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX 39bf215546Sopenharmony_ci * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md) 40bf215546Sopenharmony_ci */ 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_cistruct lcra_state * 43bf215546Sopenharmony_cilcra_alloc_equations( 44bf215546Sopenharmony_ci unsigned node_count, unsigned class_count) 45bf215546Sopenharmony_ci{ 46bf215546Sopenharmony_ci struct lcra_state *l = calloc(1, sizeof(*l)); 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci l->node_count = node_count; 49bf215546Sopenharmony_ci l->class_count = class_count; 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_ci l->alignment = calloc(sizeof(l->alignment[0]), node_count); 52bf215546Sopenharmony_ci l->linear = calloc(sizeof(l->linear[0]), node_count * node_count); 53bf215546Sopenharmony_ci l->modulus = calloc(sizeof(l->modulus[0]), node_count); 54bf215546Sopenharmony_ci l->class = calloc(sizeof(l->class[0]), node_count); 55bf215546Sopenharmony_ci l->class_start = calloc(sizeof(l->class_start[0]), class_count); 56bf215546Sopenharmony_ci l->class_disjoint = calloc(sizeof(l->class_disjoint[0]), class_count * class_count); 57bf215546Sopenharmony_ci l->class_size = calloc(sizeof(l->class_size[0]), class_count); 58bf215546Sopenharmony_ci l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count); 59bf215546Sopenharmony_ci l->solutions = calloc(sizeof(l->solutions[0]), node_count); 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count); 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci return l; 64bf215546Sopenharmony_ci} 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_civoid 67bf215546Sopenharmony_cilcra_free(struct lcra_state *l) 68bf215546Sopenharmony_ci{ 69bf215546Sopenharmony_ci if (!l) 70bf215546Sopenharmony_ci return; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci free(l->alignment); 73bf215546Sopenharmony_ci free(l->linear); 74bf215546Sopenharmony_ci free(l->modulus); 75bf215546Sopenharmony_ci free(l->class); 76bf215546Sopenharmony_ci free(l->class_start); 77bf215546Sopenharmony_ci free(l->class_disjoint); 78bf215546Sopenharmony_ci free(l->class_size); 79bf215546Sopenharmony_ci free(l->spill_cost); 80bf215546Sopenharmony_ci free(l->solutions); 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci free(l); 83bf215546Sopenharmony_ci} 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_civoid 86bf215546Sopenharmony_cilcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, unsigned bound) 87bf215546Sopenharmony_ci{ 88bf215546Sopenharmony_ci l->alignment[node] = (align_log2 + 1) | (bound << 16); 89bf215546Sopenharmony_ci} 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_civoid 92bf215546Sopenharmony_cilcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2) 93bf215546Sopenharmony_ci{ 94bf215546Sopenharmony_ci l->class_disjoint[(c1 * l->class_count) + c2] = true; 95bf215546Sopenharmony_ci l->class_disjoint[(c2 * l->class_count) + c1] = true; 96bf215546Sopenharmony_ci} 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_civoid 99bf215546Sopenharmony_cilcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len) 100bf215546Sopenharmony_ci{ 101bf215546Sopenharmony_ci if (node < l->node_count && l->alignment[node]) { 102bf215546Sopenharmony_ci unsigned BA = l->alignment[node]; 103bf215546Sopenharmony_ci unsigned alignment = (BA & 0xffff) - 1; 104bf215546Sopenharmony_ci unsigned bound = BA >> 16; 105bf215546Sopenharmony_ci l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment); 106bf215546Sopenharmony_ci } 107bf215546Sopenharmony_ci} 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_civoid 110bf215546Sopenharmony_cilcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j) 111bf215546Sopenharmony_ci{ 112bf215546Sopenharmony_ci if (i == j) 113bf215546Sopenharmony_ci return; 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci if (l->class_disjoint[(l->class[i] * l->class_count) + l->class[j]]) 116bf215546Sopenharmony_ci return; 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci uint32_t constraint_fw = 0; 119bf215546Sopenharmony_ci uint32_t constraint_bw = 0; 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci for (unsigned D = 0; D < 16; ++D) { 122bf215546Sopenharmony_ci if (cmask_i & (cmask_j << D)) { 123bf215546Sopenharmony_ci constraint_bw |= (1 << (15 + D)); 124bf215546Sopenharmony_ci constraint_fw |= (1 << (15 - D)); 125bf215546Sopenharmony_ci } 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci if (cmask_i & (cmask_j >> D)) { 128bf215546Sopenharmony_ci constraint_fw |= (1 << (15 + D)); 129bf215546Sopenharmony_ci constraint_bw |= (1 << (15 - D)); 130bf215546Sopenharmony_ci } 131bf215546Sopenharmony_ci } 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci l->linear[j * l->node_count + i] |= constraint_fw; 134bf215546Sopenharmony_ci l->linear[i * l->node_count + j] |= constraint_bw; 135bf215546Sopenharmony_ci} 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_cistatic bool 138bf215546Sopenharmony_cilcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i) 139bf215546Sopenharmony_ci{ 140bf215546Sopenharmony_ci unsigned *row = &l->linear[i * l->node_count]; 141bf215546Sopenharmony_ci signed constant = solutions[i]; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci for (unsigned j = 0; j < l->node_count; ++j) { 144bf215546Sopenharmony_ci if (solutions[j] == ~0) continue; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci signed lhs = solutions[j] - constant; 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci if (lhs < -15 || lhs > 15) 149bf215546Sopenharmony_ci continue; 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci if (row[j] & (1 << (lhs + 15))) 152bf215546Sopenharmony_ci return false; 153bf215546Sopenharmony_ci } 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci return true; 156bf215546Sopenharmony_ci} 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_cibool 159bf215546Sopenharmony_cilcra_solve(struct lcra_state *l) 160bf215546Sopenharmony_ci{ 161bf215546Sopenharmony_ci for (unsigned step = 0; step < l->node_count; ++step) { 162bf215546Sopenharmony_ci if (l->solutions[step] != ~0) continue; 163bf215546Sopenharmony_ci if (l->alignment[step] == 0) continue; 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci unsigned _class = l->class[step]; 166bf215546Sopenharmony_ci unsigned class_start = l->class_start[_class]; 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci unsigned BA = l->alignment[step]; 169bf215546Sopenharmony_ci unsigned shift = (BA & 0xffff) - 1; 170bf215546Sopenharmony_ci unsigned bound = BA >> 16; 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci unsigned P = bound >> shift; 173bf215546Sopenharmony_ci unsigned Q = l->modulus[step]; 174bf215546Sopenharmony_ci unsigned r_max = l->class_size[_class]; 175bf215546Sopenharmony_ci unsigned k_max = r_max >> shift; 176bf215546Sopenharmony_ci unsigned m_max = k_max / P; 177bf215546Sopenharmony_ci bool succ = false; 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci for (unsigned m = 0; m < m_max; ++m) { 180bf215546Sopenharmony_ci for (unsigned n = 0; n < Q; ++n) { 181bf215546Sopenharmony_ci l->solutions[step] = ((m * P + n) << shift) + class_start; 182bf215546Sopenharmony_ci succ = lcra_test_linear(l, l->solutions, step); 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci if (succ) break; 185bf215546Sopenharmony_ci } 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci if (succ) break; 188bf215546Sopenharmony_ci } 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci /* Out of registers - prepare to spill */ 191bf215546Sopenharmony_ci if (!succ) { 192bf215546Sopenharmony_ci l->spill_class = l->class[step]; 193bf215546Sopenharmony_ci return false; 194bf215546Sopenharmony_ci } 195bf215546Sopenharmony_ci } 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci return true; 198bf215546Sopenharmony_ci} 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci/* Register spilling is implemented with a cost-benefit system. Costs are set 201bf215546Sopenharmony_ci * by the user. Benefits are calculated from the constraints. */ 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_civoid 204bf215546Sopenharmony_cilcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost) 205bf215546Sopenharmony_ci{ 206bf215546Sopenharmony_ci if (node < l->node_count) 207bf215546Sopenharmony_ci l->spill_cost[node] = cost; 208bf215546Sopenharmony_ci} 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_cistatic unsigned 211bf215546Sopenharmony_cilcra_count_constraints(struct lcra_state *l, unsigned i) 212bf215546Sopenharmony_ci{ 213bf215546Sopenharmony_ci unsigned count = 0; 214bf215546Sopenharmony_ci unsigned *constraints = &l->linear[i * l->node_count]; 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci for (unsigned j = 0; j < l->node_count; ++j) 217bf215546Sopenharmony_ci count += util_bitcount(constraints[j]); 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci return count; 220bf215546Sopenharmony_ci} 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_cisigned 223bf215546Sopenharmony_cilcra_get_best_spill_node(struct lcra_state *l) 224bf215546Sopenharmony_ci{ 225bf215546Sopenharmony_ci /* If there are no constraints on a node, do not pick it to spill under 226bf215546Sopenharmony_ci * any circumstance, or else we would hang rather than fail RA */ 227bf215546Sopenharmony_ci float best_benefit = 0.0; 228bf215546Sopenharmony_ci signed best_node = -1; 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci for (unsigned i = 0; i < l->node_count; ++i) { 231bf215546Sopenharmony_ci /* Find spillable nodes */ 232bf215546Sopenharmony_ci if (l->class[i] != l->spill_class) continue; 233bf215546Sopenharmony_ci if (l->spill_cost[i] < 0) continue; 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci /* Adapted from Chaitin's heuristic */ 236bf215546Sopenharmony_ci float constraints = lcra_count_constraints(l, i); 237bf215546Sopenharmony_ci float cost = (l->spill_cost[i] + 1); 238bf215546Sopenharmony_ci float benefit = constraints / cost; 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci if (benefit > best_benefit) { 241bf215546Sopenharmony_ci best_benefit = benefit; 242bf215546Sopenharmony_ci best_node = i; 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci } 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci return best_node; 247bf215546Sopenharmony_ci} 248