162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * SImple Tiler Allocator (SiTA): 2D and 1D allocation(reservation) algorithm 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Authors: Ravi Ramachandra <r.ramachandra@ti.com>, 662306a36Sopenharmony_ci * Lajos Molnar <molnar@ti.com> 762306a36Sopenharmony_ci * Andy Gross <andy.gross@ti.com> 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci#include <linux/init.h> 1262306a36Sopenharmony_ci#include <linux/module.h> 1362306a36Sopenharmony_ci#include <linux/errno.h> 1462306a36Sopenharmony_ci#include <linux/sched.h> 1562306a36Sopenharmony_ci#include <linux/wait.h> 1662306a36Sopenharmony_ci#include <linux/bitmap.h> 1762306a36Sopenharmony_ci#include <linux/slab.h> 1862306a36Sopenharmony_ci#include "tcm.h" 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_cistatic unsigned long mask[8]; 2162306a36Sopenharmony_ci/* 2262306a36Sopenharmony_ci * pos position in bitmap 2362306a36Sopenharmony_ci * w width in slots 2462306a36Sopenharmony_ci * h height in slots 2562306a36Sopenharmony_ci * map ptr to bitmap 2662306a36Sopenharmony_ci * stride slots in a row 2762306a36Sopenharmony_ci */ 2862306a36Sopenharmony_cistatic void free_slots(unsigned long pos, u16 w, u16 h, 2962306a36Sopenharmony_ci unsigned long *map, u16 stride) 3062306a36Sopenharmony_ci{ 3162306a36Sopenharmony_ci int i; 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci for (i = 0; i < h; i++, pos += stride) 3462306a36Sopenharmony_ci bitmap_clear(map, pos, w); 3562306a36Sopenharmony_ci} 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/* 3862306a36Sopenharmony_ci * w width in slots 3962306a36Sopenharmony_ci * pos ptr to position 4062306a36Sopenharmony_ci * map ptr to bitmap 4162306a36Sopenharmony_ci * num_bits number of bits in bitmap 4262306a36Sopenharmony_ci */ 4362306a36Sopenharmony_cistatic int r2l_b2t_1d(u16 w, unsigned long *pos, unsigned long *map, 4462306a36Sopenharmony_ci size_t num_bits) 4562306a36Sopenharmony_ci{ 4662306a36Sopenharmony_ci unsigned long search_count = 0; 4762306a36Sopenharmony_ci unsigned long bit; 4862306a36Sopenharmony_ci bool area_found = false; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci *pos = num_bits - w; 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci while (search_count < num_bits) { 5362306a36Sopenharmony_ci bit = find_next_bit(map, num_bits, *pos); 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci if (bit - *pos >= w) { 5662306a36Sopenharmony_ci /* found a long enough free area */ 5762306a36Sopenharmony_ci bitmap_set(map, *pos, w); 5862306a36Sopenharmony_ci area_found = true; 5962306a36Sopenharmony_ci break; 6062306a36Sopenharmony_ci } 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci search_count = num_bits - bit + w; 6362306a36Sopenharmony_ci *pos = bit - w; 6462306a36Sopenharmony_ci } 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci return (area_found) ? 0 : -ENOMEM; 6762306a36Sopenharmony_ci} 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci/* 7062306a36Sopenharmony_ci * w = width in slots 7162306a36Sopenharmony_ci * h = height in slots 7262306a36Sopenharmony_ci * a = align in slots (mask, 2^n-1, 0 is unaligned) 7362306a36Sopenharmony_ci * offset = offset in bytes from 4KiB 7462306a36Sopenharmony_ci * pos = position in bitmap for buffer 7562306a36Sopenharmony_ci * map = bitmap ptr 7662306a36Sopenharmony_ci * num_bits = size of bitmap 7762306a36Sopenharmony_ci * stride = bits in one row of container 7862306a36Sopenharmony_ci */ 7962306a36Sopenharmony_cistatic int l2r_t2b(u16 w, u16 h, u16 a, s16 offset, 8062306a36Sopenharmony_ci unsigned long *pos, unsigned long slot_bytes, 8162306a36Sopenharmony_ci unsigned long *map, size_t num_bits, size_t slot_stride) 8262306a36Sopenharmony_ci{ 8362306a36Sopenharmony_ci int i; 8462306a36Sopenharmony_ci unsigned long index; 8562306a36Sopenharmony_ci bool area_free = false; 8662306a36Sopenharmony_ci unsigned long slots_per_band = PAGE_SIZE / slot_bytes; 8762306a36Sopenharmony_ci unsigned long bit_offset = (offset > 0) ? offset / slot_bytes : 0; 8862306a36Sopenharmony_ci unsigned long curr_bit = bit_offset; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci /* reset alignment to 1 if we are matching a specific offset */ 9162306a36Sopenharmony_ci /* adjust alignment - 1 to get to the format expected in bitmaps */ 9262306a36Sopenharmony_ci a = (offset > 0) ? 0 : a - 1; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci /* FIXME Return error if slots_per_band > stride */ 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci while (curr_bit < num_bits) { 9762306a36Sopenharmony_ci *pos = bitmap_find_next_zero_area(map, num_bits, curr_bit, w, 9862306a36Sopenharmony_ci a); 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci /* skip forward if we are not at right offset */ 10162306a36Sopenharmony_ci if (bit_offset > 0 && (*pos % slots_per_band != bit_offset)) { 10262306a36Sopenharmony_ci curr_bit = ALIGN(*pos, slots_per_band) + bit_offset; 10362306a36Sopenharmony_ci continue; 10462306a36Sopenharmony_ci } 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci /* skip forward to next row if we overlap end of row */ 10762306a36Sopenharmony_ci if ((*pos % slot_stride) + w > slot_stride) { 10862306a36Sopenharmony_ci curr_bit = ALIGN(*pos, slot_stride) + bit_offset; 10962306a36Sopenharmony_ci continue; 11062306a36Sopenharmony_ci } 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci /* TODO: Handle overlapping 4K boundaries */ 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci /* break out of look if we will go past end of container */ 11562306a36Sopenharmony_ci if ((*pos + slot_stride * h) > num_bits) 11662306a36Sopenharmony_ci break; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci /* generate mask that represents out matching pattern */ 11962306a36Sopenharmony_ci bitmap_clear(mask, 0, slot_stride); 12062306a36Sopenharmony_ci bitmap_set(mask, (*pos % BITS_PER_LONG), w); 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci /* assume the area is free until we find an overlap */ 12362306a36Sopenharmony_ci area_free = true; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci /* check subsequent rows to see if complete area is free */ 12662306a36Sopenharmony_ci for (i = 1; i < h; i++) { 12762306a36Sopenharmony_ci index = *pos / BITS_PER_LONG + i * 8; 12862306a36Sopenharmony_ci if (bitmap_intersects(&map[index], mask, 12962306a36Sopenharmony_ci (*pos % BITS_PER_LONG) + w)) { 13062306a36Sopenharmony_ci area_free = false; 13162306a36Sopenharmony_ci break; 13262306a36Sopenharmony_ci } 13362306a36Sopenharmony_ci } 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci if (area_free) 13662306a36Sopenharmony_ci break; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci /* go forward past this match */ 13962306a36Sopenharmony_ci if (bit_offset > 0) 14062306a36Sopenharmony_ci curr_bit = ALIGN(*pos, slots_per_band) + bit_offset; 14162306a36Sopenharmony_ci else 14262306a36Sopenharmony_ci curr_bit = *pos + a + 1; 14362306a36Sopenharmony_ci } 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci if (area_free) { 14662306a36Sopenharmony_ci /* set area as in-use. iterate over rows */ 14762306a36Sopenharmony_ci for (i = 0, index = *pos; i < h; i++, index += slot_stride) 14862306a36Sopenharmony_ci bitmap_set(map, index, w); 14962306a36Sopenharmony_ci } 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci return (area_free) ? 0 : -ENOMEM; 15262306a36Sopenharmony_ci} 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_cistatic s32 sita_reserve_1d(struct tcm *tcm, u32 num_slots, 15562306a36Sopenharmony_ci struct tcm_area *area) 15662306a36Sopenharmony_ci{ 15762306a36Sopenharmony_ci unsigned long pos; 15862306a36Sopenharmony_ci int ret; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci spin_lock(&(tcm->lock)); 16162306a36Sopenharmony_ci ret = r2l_b2t_1d(num_slots, &pos, tcm->bitmap, tcm->map_size); 16262306a36Sopenharmony_ci if (!ret) { 16362306a36Sopenharmony_ci area->p0.x = pos % tcm->width; 16462306a36Sopenharmony_ci area->p0.y = pos / tcm->width; 16562306a36Sopenharmony_ci area->p1.x = (pos + num_slots - 1) % tcm->width; 16662306a36Sopenharmony_ci area->p1.y = (pos + num_slots - 1) / tcm->width; 16762306a36Sopenharmony_ci } 16862306a36Sopenharmony_ci spin_unlock(&(tcm->lock)); 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci return ret; 17162306a36Sopenharmony_ci} 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_cistatic s32 sita_reserve_2d(struct tcm *tcm, u16 h, u16 w, u16 align, 17462306a36Sopenharmony_ci s16 offset, u16 slot_bytes, 17562306a36Sopenharmony_ci struct tcm_area *area) 17662306a36Sopenharmony_ci{ 17762306a36Sopenharmony_ci unsigned long pos; 17862306a36Sopenharmony_ci int ret; 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci spin_lock(&(tcm->lock)); 18162306a36Sopenharmony_ci ret = l2r_t2b(w, h, align, offset, &pos, slot_bytes, tcm->bitmap, 18262306a36Sopenharmony_ci tcm->map_size, tcm->width); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci if (!ret) { 18562306a36Sopenharmony_ci area->p0.x = pos % tcm->width; 18662306a36Sopenharmony_ci area->p0.y = pos / tcm->width; 18762306a36Sopenharmony_ci area->p1.x = area->p0.x + w - 1; 18862306a36Sopenharmony_ci area->p1.y = area->p0.y + h - 1; 18962306a36Sopenharmony_ci } 19062306a36Sopenharmony_ci spin_unlock(&(tcm->lock)); 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci return ret; 19362306a36Sopenharmony_ci} 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_cistatic void sita_deinit(struct tcm *tcm) 19662306a36Sopenharmony_ci{ 19762306a36Sopenharmony_ci kfree(tcm); 19862306a36Sopenharmony_ci} 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_cistatic s32 sita_free(struct tcm *tcm, struct tcm_area *area) 20162306a36Sopenharmony_ci{ 20262306a36Sopenharmony_ci unsigned long pos; 20362306a36Sopenharmony_ci u16 w, h; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci pos = area->p0.x + area->p0.y * tcm->width; 20662306a36Sopenharmony_ci if (area->is2d) { 20762306a36Sopenharmony_ci w = area->p1.x - area->p0.x + 1; 20862306a36Sopenharmony_ci h = area->p1.y - area->p0.y + 1; 20962306a36Sopenharmony_ci } else { 21062306a36Sopenharmony_ci w = area->p1.x + area->p1.y * tcm->width - pos + 1; 21162306a36Sopenharmony_ci h = 1; 21262306a36Sopenharmony_ci } 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci spin_lock(&(tcm->lock)); 21562306a36Sopenharmony_ci free_slots(pos, w, h, tcm->bitmap, tcm->width); 21662306a36Sopenharmony_ci spin_unlock(&(tcm->lock)); 21762306a36Sopenharmony_ci return 0; 21862306a36Sopenharmony_ci} 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_cistruct tcm *sita_init(u16 width, u16 height) 22162306a36Sopenharmony_ci{ 22262306a36Sopenharmony_ci struct tcm *tcm; 22362306a36Sopenharmony_ci size_t map_size = BITS_TO_LONGS(width*height) * sizeof(unsigned long); 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci if (width == 0 || height == 0) 22662306a36Sopenharmony_ci return NULL; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci tcm = kzalloc(sizeof(*tcm) + map_size, GFP_KERNEL); 22962306a36Sopenharmony_ci if (!tcm) 23062306a36Sopenharmony_ci goto error; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci /* Updating the pointers to SiTA implementation APIs */ 23362306a36Sopenharmony_ci tcm->height = height; 23462306a36Sopenharmony_ci tcm->width = width; 23562306a36Sopenharmony_ci tcm->reserve_2d = sita_reserve_2d; 23662306a36Sopenharmony_ci tcm->reserve_1d = sita_reserve_1d; 23762306a36Sopenharmony_ci tcm->free = sita_free; 23862306a36Sopenharmony_ci tcm->deinit = sita_deinit; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci spin_lock_init(&tcm->lock); 24162306a36Sopenharmony_ci tcm->bitmap = (unsigned long *)(tcm + 1); 24262306a36Sopenharmony_ci bitmap_clear(tcm->bitmap, 0, width*height); 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci tcm->map_size = width*height; 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci return tcm; 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_cierror: 24962306a36Sopenharmony_ci return NULL; 25062306a36Sopenharmony_ci} 251