1/* 2 * Copyright © 2016-2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <errno.h> 25#include <string.h> 26#include <unistd.h> 27#include <sys/types.h> 28#include <sys/mman.h> 29 30#include "aub_mem.h" 31#include "util/anon_file.h" 32 33struct bo_map { 34 struct list_head link; 35 struct intel_batch_decode_bo bo; 36 bool unmap_after_use; 37 bool ppgtt; 38}; 39 40struct ggtt_entry { 41 struct rb_node node; 42 uint64_t virt_addr; 43 uint64_t phys_addr; 44}; 45 46struct phys_mem { 47 struct rb_node node; 48 uint64_t fd_offset; 49 uint64_t phys_addr; 50 uint8_t *data; 51 const uint8_t *aub_data; 52}; 53 54static void 55add_gtt_bo_map(struct aub_mem *mem, struct intel_batch_decode_bo bo, bool ppgtt, bool unmap_after_use) 56{ 57 struct bo_map *m = calloc(1, sizeof(*m)); 58 59 m->ppgtt = ppgtt; 60 m->bo = bo; 61 m->unmap_after_use = unmap_after_use; 62 list_add(&m->link, &mem->maps); 63} 64 65void 66aub_mem_clear_bo_maps(struct aub_mem *mem) 67{ 68 list_for_each_entry_safe(struct bo_map, i, &mem->maps, link) { 69 if (i->unmap_after_use) 70 munmap((void *)i->bo.map, i->bo.size); 71 list_del(&i->link); 72 free(i); 73 } 74} 75 76static inline struct ggtt_entry * 77ggtt_entry_next(struct ggtt_entry *entry) 78{ 79 if (!entry) 80 return NULL; 81 struct rb_node *node = rb_node_next(&entry->node); 82 if (!node) 83 return NULL; 84 return rb_node_data(struct ggtt_entry, node, node); 85} 86 87static inline int 88cmp_uint64(uint64_t a, uint64_t b) 89{ 90 if (a < b) 91 return 1; 92 if (a > b) 93 return -1; 94 return 0; 95} 96 97static inline int 98cmp_ggtt_entry(const struct rb_node *node, const void *addr) 99{ 100 struct ggtt_entry *entry = rb_node_data(struct ggtt_entry, node, node); 101 return cmp_uint64(entry->virt_addr, *(const uint64_t *)addr); 102} 103 104static struct ggtt_entry * 105ensure_ggtt_entry(struct aub_mem *mem, uint64_t virt_addr) 106{ 107 struct rb_node *node = rb_tree_search_sloppy(&mem->ggtt, &virt_addr, 108 cmp_ggtt_entry); 109 int cmp = 0; 110 if (!node || (cmp = cmp_ggtt_entry(node, &virt_addr))) { 111 struct ggtt_entry *new_entry = calloc(1, sizeof(*new_entry)); 112 new_entry->virt_addr = virt_addr; 113 rb_tree_insert_at(&mem->ggtt, node, &new_entry->node, cmp < 0); 114 node = &new_entry->node; 115 } 116 117 return rb_node_data(struct ggtt_entry, node, node); 118} 119 120static struct ggtt_entry * 121search_ggtt_entry(struct aub_mem *mem, uint64_t virt_addr) 122{ 123 virt_addr &= ~0xfff; 124 125 struct rb_node *node = rb_tree_search(&mem->ggtt, &virt_addr, cmp_ggtt_entry); 126 127 if (!node) 128 return NULL; 129 130 return rb_node_data(struct ggtt_entry, node, node); 131} 132 133static inline int 134cmp_phys_mem(const struct rb_node *node, const void *addr) 135{ 136 struct phys_mem *mem = rb_node_data(struct phys_mem, node, node); 137 return cmp_uint64(mem->phys_addr, *(uint64_t *)addr); 138} 139 140static void 141check_mmap_result(const void *res) 142{ 143 if (res != MAP_FAILED) 144 return; 145 146 if (errno == ENOMEM) { 147 fprintf(stderr, 148 "Not enough memory available or maximum number of mappings reached. " 149 "Consider increasing sysctl vm.max_map_count.\n"); 150 } else { 151 perror("mmap"); 152 } 153 154 abort(); 155} 156 157static struct phys_mem * 158ensure_phys_mem(struct aub_mem *mem, uint64_t phys_addr) 159{ 160 struct rb_node *node = rb_tree_search_sloppy(&mem->mem, &phys_addr, cmp_phys_mem); 161 int cmp = 0; 162 if (!node || (cmp = cmp_phys_mem(node, &phys_addr))) { 163 struct phys_mem *new_mem = calloc(1, sizeof(*new_mem)); 164 new_mem->phys_addr = phys_addr; 165 new_mem->fd_offset = mem->mem_fd_len; 166 167 ASSERTED int ftruncate_res = ftruncate(mem->mem_fd, mem->mem_fd_len += 4096); 168 assert(ftruncate_res == 0); 169 170 new_mem->data = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, 171 mem->mem_fd, new_mem->fd_offset); 172 check_mmap_result(new_mem->data); 173 174 rb_tree_insert_at(&mem->mem, node, &new_mem->node, cmp < 0); 175 node = &new_mem->node; 176 } 177 178 return rb_node_data(struct phys_mem, node, node); 179} 180 181static struct phys_mem * 182search_phys_mem(struct aub_mem *mem, uint64_t phys_addr) 183{ 184 phys_addr &= ~0xfff; 185 186 struct rb_node *node = rb_tree_search(&mem->mem, &phys_addr, cmp_phys_mem); 187 188 if (!node) 189 return NULL; 190 191 return rb_node_data(struct phys_mem, node, node); 192} 193 194void 195aub_mem_local_write(void *_mem, uint64_t address, 196 const void *data, uint32_t size) 197{ 198 struct aub_mem *mem = _mem; 199 struct intel_batch_decode_bo bo = { 200 .map = data, 201 .addr = address, 202 .size = size, 203 }; 204 add_gtt_bo_map(mem, bo, false, false); 205} 206 207void 208aub_mem_ggtt_entry_write(void *_mem, uint64_t address, 209 const void *_data, uint32_t _size) 210{ 211 struct aub_mem *mem = _mem; 212 uint64_t virt_addr = (address / sizeof(uint64_t)) << 12; 213 const uint64_t *data = _data; 214 size_t size = _size / sizeof(*data); 215 for (const uint64_t *entry = data; 216 entry < data + size; 217 entry++, virt_addr += 4096) { 218 struct ggtt_entry *pt = ensure_ggtt_entry(mem, virt_addr); 219 pt->phys_addr = *entry; 220 } 221} 222 223void 224aub_mem_phys_write(void *_mem, uint64_t phys_address, 225 const void *data, uint32_t size) 226{ 227 struct aub_mem *mem = _mem; 228 uint32_t to_write = size; 229 for (uint64_t page = phys_address & ~0xfff; page < phys_address + size; page += 4096) { 230 struct phys_mem *pmem = ensure_phys_mem(mem, page); 231 uint64_t offset = MAX2(page, phys_address) - page; 232 uint32_t size_this_page = MIN2(to_write, 4096 - offset); 233 to_write -= size_this_page; 234 memcpy(pmem->data + offset, data, size_this_page); 235 pmem->aub_data = data - offset; 236 data = (const uint8_t *)data + size_this_page; 237 } 238} 239 240void 241aub_mem_ggtt_write(void *_mem, uint64_t virt_address, 242 const void *data, uint32_t size) 243{ 244 struct aub_mem *mem = _mem; 245 uint32_t to_write = size; 246 for (uint64_t page = virt_address & ~0xfff; page < virt_address + size; page += 4096) { 247 struct ggtt_entry *entry = search_ggtt_entry(mem, page); 248 assert(entry && entry->phys_addr & 0x1); 249 250 uint64_t offset = MAX2(page, virt_address) - page; 251 uint32_t size_this_page = MIN2(to_write, 4096 - offset); 252 to_write -= size_this_page; 253 254 uint64_t phys_page = entry->phys_addr & ~0xfff; /* Clear the validity bits. */ 255 aub_mem_phys_write(mem, phys_page + offset, data, size_this_page); 256 data = (const uint8_t *)data + size_this_page; 257 } 258} 259 260struct intel_batch_decode_bo 261aub_mem_get_ggtt_bo(void *_mem, uint64_t address) 262{ 263 struct aub_mem *mem = _mem; 264 struct intel_batch_decode_bo bo = {0}; 265 266 list_for_each_entry(struct bo_map, i, &mem->maps, link) 267 if (!i->ppgtt && i->bo.addr <= address && i->bo.addr + i->bo.size > address) 268 return i->bo; 269 270 address &= ~0xfff; 271 272 struct ggtt_entry *start = 273 (struct ggtt_entry *)rb_tree_search_sloppy(&mem->ggtt, &address, 274 cmp_ggtt_entry); 275 if (start && start->virt_addr < address) 276 start = ggtt_entry_next(start); 277 if (!start) 278 return bo; 279 280 struct ggtt_entry *last = start; 281 for (struct ggtt_entry *i = ggtt_entry_next(last); 282 i && last->virt_addr + 4096 == i->virt_addr; 283 last = i, i = ggtt_entry_next(last)) 284 ; 285 286 bo.addr = MIN2(address, start->virt_addr); 287 bo.size = last->virt_addr - bo.addr + 4096; 288 bo.map = mmap(NULL, bo.size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 289 check_mmap_result(bo.map); 290 291 for (struct ggtt_entry *i = start; 292 i; 293 i = i == last ? NULL : ggtt_entry_next(i)) { 294 uint64_t phys_addr = i->phys_addr & ~0xfff; 295 struct phys_mem *phys_mem = search_phys_mem(mem, phys_addr); 296 297 if (!phys_mem) 298 continue; 299 300 uint32_t map_offset = i->virt_addr - address; 301 void *res = mmap((uint8_t *)bo.map + map_offset, 4096, PROT_READ, 302 MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset); 303 check_mmap_result(res); 304 } 305 306 add_gtt_bo_map(mem, bo, false, true); 307 308 return bo; 309} 310 311static struct phys_mem * 312ppgtt_walk(struct aub_mem *mem, uint64_t pml4, uint64_t address) 313{ 314 uint64_t shift = 39; 315 uint64_t addr = pml4; 316 for (int level = 4; level > 0; level--) { 317 struct phys_mem *table = search_phys_mem(mem, addr); 318 if (!table) 319 return NULL; 320 int index = (address >> shift) & 0x1ff; 321 uint64_t entry = ((uint64_t *)table->data)[index]; 322 if (!(entry & 1)) 323 return NULL; 324 addr = entry & ~0xfff; 325 shift -= 9; 326 } 327 return search_phys_mem(mem, addr); 328} 329 330static bool 331ppgtt_mapped(struct aub_mem *mem, uint64_t pml4, uint64_t address) 332{ 333 return ppgtt_walk(mem, pml4, address) != NULL; 334} 335 336struct intel_batch_decode_bo 337aub_mem_get_ppgtt_bo(void *_mem, uint64_t address) 338{ 339 struct aub_mem *mem = _mem; 340 struct intel_batch_decode_bo bo = {0}; 341 342 list_for_each_entry(struct bo_map, i, &mem->maps, link) 343 if (i->ppgtt && i->bo.addr <= address && i->bo.addr + i->bo.size > address) 344 return i->bo; 345 346 address &= ~0xfff; 347 348 if (!ppgtt_mapped(mem, mem->pml4, address)) 349 return bo; 350 351 /* Map everything until the first gap since we don't know how much the 352 * decoder actually needs. 353 */ 354 uint64_t end = address; 355 while (ppgtt_mapped(mem, mem->pml4, end)) 356 end += 4096; 357 358 bo.addr = address; 359 bo.size = end - address; 360 bo.map = mmap(NULL, bo.size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 361 assert(bo.map != MAP_FAILED); 362 363 for (uint64_t page = address; page < end; page += 4096) { 364 struct phys_mem *phys_mem = ppgtt_walk(mem, mem->pml4, page); 365 366 void *res = mmap((uint8_t *)bo.map + (page - bo.addr), 4096, PROT_READ, 367 MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset); 368 check_mmap_result(res); 369 } 370 371 add_gtt_bo_map(mem, bo, true, true); 372 373 return bo; 374} 375 376bool 377aub_mem_init(struct aub_mem *mem) 378{ 379 memset(mem, 0, sizeof(*mem)); 380 381 list_inithead(&mem->maps); 382 383 mem->mem_fd = os_create_anonymous_file(0, "phys memory"); 384 385 return mem->mem_fd != -1; 386} 387 388void 389aub_mem_fini(struct aub_mem *mem) 390{ 391 if (mem->mem_fd == -1) 392 return; 393 394 aub_mem_clear_bo_maps(mem); 395 396 397 rb_tree_foreach_safe(struct ggtt_entry, entry, &mem->ggtt, node) { 398 rb_tree_remove(&mem->ggtt, &entry->node); 399 free(entry); 400 } 401 rb_tree_foreach_safe(struct phys_mem, entry, &mem->mem, node) { 402 rb_tree_remove(&mem->mem, &entry->node); 403 free(entry); 404 } 405 406 close(mem->mem_fd); 407 mem->mem_fd = -1; 408} 409 410struct intel_batch_decode_bo 411aub_mem_get_phys_addr_data(struct aub_mem *mem, uint64_t phys_addr) 412{ 413 struct phys_mem *page = search_phys_mem(mem, phys_addr); 414 return page ? 415 (struct intel_batch_decode_bo) { .map = page->data, .addr = page->phys_addr, .size = 4096 } : 416 (struct intel_batch_decode_bo) {}; 417} 418 419struct intel_batch_decode_bo 420aub_mem_get_ppgtt_addr_data(struct aub_mem *mem, uint64_t virt_addr) 421{ 422 struct phys_mem *page = ppgtt_walk(mem, mem->pml4, virt_addr); 423 return page ? 424 (struct intel_batch_decode_bo) { .map = page->data, .addr = virt_addr & ~((1ULL << 12) - 1), .size = 4096 } : 425 (struct intel_batch_decode_bo) {}; 426} 427 428struct intel_batch_decode_bo 429aub_mem_get_ppgtt_addr_aub_data(struct aub_mem *mem, uint64_t virt_addr) 430{ 431 struct phys_mem *page = ppgtt_walk(mem, mem->pml4, virt_addr); 432 return page ? 433 (struct intel_batch_decode_bo) { .map = page->aub_data, .addr = virt_addr & ~((1ULL << 12) - 1), .size = 4096 } : 434 (struct intel_batch_decode_bo) {}; 435} 436