1/*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "v3dv_private.h"
25
26#include <errno.h>
27#include <sys/mman.h>
28
29#include "drm-uapi/v3d_drm.h"
30#include "util/u_memory.h"
31
32/* Default max size of the bo cache, in MB.
33 *
34 * FIXME: we got this value when testing some apps using the rpi4 with 4GB,
35 * but it should depend on the total amount of RAM. But for that we would need
36 * to test on real hw with different amount of RAM. Using this value for now.
37 */
38#define DEFAULT_MAX_BO_CACHE_SIZE 512
39
40/* Discarded to use a V3D_DEBUG for this, as it would mean adding a run-time
41 * check for most of the calls
42 */
43static const bool dump_stats = false;
44
45static void
46bo_dump_stats(struct v3dv_device *device)
47{
48   struct v3dv_bo_cache *cache = &device->bo_cache;
49
50   fprintf(stderr, "  BOs allocated:   %d\n", device->bo_count);
51   fprintf(stderr, "  BOs size:        %dkb\n", device->bo_size / 1024);
52   fprintf(stderr, "  BOs cached:      %d\n", cache->cache_count);
53   fprintf(stderr, "  BOs cached size: %dkb\n", cache->cache_size / 1024);
54
55   if (!list_is_empty(&cache->time_list)) {
56      struct v3dv_bo *first = list_first_entry(&cache->time_list,
57                                              struct v3dv_bo,
58                                              time_list);
59      struct v3dv_bo *last = list_last_entry(&cache->time_list,
60                                            struct v3dv_bo,
61                                            time_list);
62
63      fprintf(stderr, "  oldest cache time: %ld\n",
64              (long)first->free_time);
65      fprintf(stderr, "  newest cache time: %ld\n",
66              (long)last->free_time);
67
68      struct timespec time;
69      clock_gettime(CLOCK_MONOTONIC, &time);
70      fprintf(stderr, "  now:               %lld\n",
71              (long long)time.tv_sec);
72   }
73
74   if (cache->size_list_size) {
75      uint32_t empty_size_list = 0;
76      for (uint32_t i = 0; i < cache->size_list_size; i++) {
77         if (list_is_empty(&cache->size_list[i]))
78            empty_size_list++;
79      }
80      fprintf(stderr, "  Empty size_list lists: %d\n", empty_size_list);
81   }
82}
83
84static void
85bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo)
86{
87   list_del(&bo->time_list);
88   list_del(&bo->size_list);
89
90   cache->cache_count--;
91   cache->cache_size -= bo->size;
92}
93
94static struct v3dv_bo *
95bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name)
96{
97   struct v3dv_bo_cache *cache = &device->bo_cache;
98   uint32_t page_index = size / 4096 - 1;
99
100   if (cache->size_list_size <= page_index)
101      return NULL;
102
103   struct v3dv_bo *bo = NULL;
104
105   mtx_lock(&cache->lock);
106   if (!list_is_empty(&cache->size_list[page_index])) {
107      bo = list_first_entry(&cache->size_list[page_index],
108                            struct v3dv_bo, size_list);
109
110      /* Check that the BO has gone idle.  If not, then we want to
111       * allocate something new instead, since we assume that the
112       * user will proceed to CPU map it and fill it with stuff.
113       */
114      if (!v3dv_bo_wait(device, bo, 0)) {
115         mtx_unlock(&cache->lock);
116         return NULL;
117      }
118
119      bo_remove_from_cache(cache, bo);
120      bo->name = name;
121      p_atomic_set(&bo->refcnt, 1);
122   }
123   mtx_unlock(&cache->lock);
124   return bo;
125}
126
127static bool
128bo_free(struct v3dv_device *device,
129        struct v3dv_bo *bo)
130{
131   if (!bo)
132      return true;
133
134   assert(p_atomic_read(&bo->refcnt) == 0);
135   assert(bo->map == NULL);
136
137   struct drm_gem_close c;
138   memset(&c, 0, sizeof(c));
139   c.handle = bo->handle;
140   int ret = v3dv_ioctl(device->pdevice->render_fd, DRM_IOCTL_GEM_CLOSE, &c);
141   if (ret != 0)
142      fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
143
144   device->bo_count--;
145   device->bo_size -= bo->size;
146
147   if (dump_stats) {
148      fprintf(stderr, "Freed %s%s%dkb:\n",
149              bo->name ? bo->name : "",
150              bo->name ? " " : "",
151              bo->size / 1024);
152      bo_dump_stats(device);
153   }
154
155   /* Our BO structs are stored in a sparse array in the physical device,
156    * so we don't want to free the BO pointer, instead we want to reset it
157    * to 0, to signal that array entry as being free.
158    */
159   memset(bo, 0, sizeof(*bo));
160
161   return ret == 0;
162}
163
164static void
165bo_cache_free_all(struct v3dv_device *device,
166                       bool with_lock)
167{
168   struct v3dv_bo_cache *cache = &device->bo_cache;
169
170   if (with_lock)
171      mtx_lock(&cache->lock);
172   list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
173                            time_list) {
174      bo_remove_from_cache(cache, bo);
175      bo_free(device, bo);
176   }
177   if (with_lock)
178      mtx_unlock(&cache->lock);
179
180}
181
182void
183v3dv_bo_init(struct v3dv_bo *bo,
184             uint32_t handle,
185             uint32_t size,
186             uint32_t offset,
187             const char *name,
188             bool private)
189{
190   p_atomic_set(&bo->refcnt, 1);
191   bo->handle = handle;
192   bo->handle_bit = 1ull << (handle % 64);
193   bo->size = size;
194   bo->offset = offset;
195   bo->map = NULL;
196   bo->map_size = 0;
197   bo->name = name;
198   bo->private = private;
199   bo->dumb_handle = -1;
200   list_inithead(&bo->list_link);
201}
202
203struct v3dv_bo *
204v3dv_bo_alloc(struct v3dv_device *device,
205              uint32_t size,
206              const char *name,
207              bool private)
208{
209   struct v3dv_bo *bo;
210
211   const uint32_t page_align = 4096; /* Always allocate full pages */
212   size = align(size, page_align);
213
214   if (private) {
215      bo = bo_from_cache(device, size, name);
216      if (bo) {
217         if (dump_stats) {
218            fprintf(stderr, "Allocated %s %dkb from cache:\n",
219                    name, size / 1024);
220            bo_dump_stats(device);
221         }
222         return bo;
223      }
224   }
225
226 retry:
227   ;
228
229   bool cleared_and_retried = false;
230   struct drm_v3d_create_bo create = {
231      .size = size
232   };
233
234   int ret = v3dv_ioctl(device->pdevice->render_fd,
235                        DRM_IOCTL_V3D_CREATE_BO, &create);
236   if (ret != 0) {
237      if (!list_is_empty(&device->bo_cache.time_list) &&
238          !cleared_and_retried) {
239         cleared_and_retried = true;
240         bo_cache_free_all(device, true);
241         goto retry;
242      }
243
244      fprintf(stderr, "Failed to allocate device memory for BO\n");
245      return NULL;
246   }
247
248   assert(create.offset % page_align == 0);
249   assert((create.offset & 0xffffffff) == create.offset);
250
251   bo = v3dv_device_lookup_bo(device->pdevice, create.handle);
252   assert(bo && bo->handle == 0);
253
254   v3dv_bo_init(bo, create.handle, size, create.offset, name, private);
255
256   device->bo_count++;
257   device->bo_size += bo->size;
258   if (dump_stats) {
259      fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
260      bo_dump_stats(device);
261   }
262
263   return bo;
264}
265
266bool
267v3dv_bo_map_unsynchronized(struct v3dv_device *device,
268                           struct v3dv_bo *bo,
269                           uint32_t size)
270{
271   assert(bo != NULL && size <= bo->size);
272
273   if (bo->map)
274      return bo->map;
275
276   struct drm_v3d_mmap_bo map;
277   memset(&map, 0, sizeof(map));
278   map.handle = bo->handle;
279   int ret = v3dv_ioctl(device->pdevice->render_fd,
280                        DRM_IOCTL_V3D_MMAP_BO, &map);
281   if (ret != 0) {
282      fprintf(stderr, "map ioctl failure\n");
283      return false;
284   }
285
286   bo->map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
287                  device->pdevice->render_fd, map.offset);
288   if (bo->map == MAP_FAILED) {
289      fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
290              bo->handle, (long long)map.offset, (uint32_t)bo->size);
291      return false;
292   }
293   VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
294
295   bo->map_size = size;
296
297   return true;
298}
299
300bool
301v3dv_bo_wait(struct v3dv_device *device,
302             struct v3dv_bo *bo,
303             uint64_t timeout_ns)
304{
305   struct drm_v3d_wait_bo wait = {
306      .handle = bo->handle,
307      .timeout_ns = timeout_ns,
308   };
309   return v3dv_ioctl(device->pdevice->render_fd,
310                     DRM_IOCTL_V3D_WAIT_BO, &wait) == 0;
311}
312
313bool
314v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size)
315{
316   assert(bo && size <= bo->size);
317
318   bool ok = v3dv_bo_map_unsynchronized(device, bo, size);
319   if (!ok)
320      return false;
321
322   ok = v3dv_bo_wait(device, bo, PIPE_TIMEOUT_INFINITE);
323   if (!ok) {
324      fprintf(stderr, "memory wait for map failed\n");
325      return false;
326   }
327
328   return true;
329}
330
331void
332v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo)
333{
334   assert(bo && bo->map && bo->map_size > 0);
335
336   munmap(bo->map, bo->map_size);
337   VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
338   bo->map = NULL;
339   bo->map_size = 0;
340}
341
342static boolean
343reallocate_size_list(struct v3dv_bo_cache *cache,
344                     struct v3dv_device *device,
345                     uint32_t size)
346{
347   struct list_head *new_list =
348      vk_alloc(&device->vk.alloc, sizeof(struct list_head) * size, 8,
349               VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
350
351   if (!new_list) {
352      fprintf(stderr, "Failed to allocate host memory for cache bo list\n");
353      return false;
354   }
355   struct list_head *old_list = cache->size_list;
356
357   /* Move old list contents over (since the array has moved, and
358    * therefore the pointers to the list heads have to change).
359    */
360   for (int i = 0; i < cache->size_list_size; i++) {
361      struct list_head *old_head = &cache->size_list[i];
362      if (list_is_empty(old_head)) {
363         list_inithead(&new_list[i]);
364      } else {
365         new_list[i].next = old_head->next;
366         new_list[i].prev = old_head->prev;
367         new_list[i].next->prev = &new_list[i];
368         new_list[i].prev->next = &new_list[i];
369      }
370   }
371   for (int i = cache->size_list_size; i < size; i++)
372      list_inithead(&new_list[i]);
373
374   cache->size_list = new_list;
375   cache->size_list_size = size;
376   vk_free(&device->vk.alloc, old_list);
377
378   return true;
379}
380
381void
382v3dv_bo_cache_init(struct v3dv_device *device)
383{
384   device->bo_size = 0;
385   device->bo_count = 0;
386   list_inithead(&device->bo_cache.time_list);
387   /* FIXME: perhaps set a initial size for the size-list, to avoid run-time
388    * reallocations
389    */
390   device->bo_cache.size_list_size = 0;
391
392   const char *max_cache_size_str = getenv("V3DV_MAX_BO_CACHE_SIZE");
393   if (max_cache_size_str == NULL)
394      device->bo_cache.max_cache_size = DEFAULT_MAX_BO_CACHE_SIZE;
395   else
396      device->bo_cache.max_cache_size = atoll(max_cache_size_str);
397
398   if (dump_stats) {
399      fprintf(stderr, "MAX BO CACHE SIZE: %iMB\n", device->bo_cache.max_cache_size);
400   }
401
402   device->bo_cache.max_cache_size *= 1024 * 1024;
403   device->bo_cache.cache_count = 0;
404   device->bo_cache.cache_size = 0;
405}
406
407void
408v3dv_bo_cache_destroy(struct v3dv_device *device)
409{
410   bo_cache_free_all(device, true);
411   vk_free(&device->vk.alloc, device->bo_cache.size_list);
412
413   if (dump_stats) {
414      fprintf(stderr, "BO stats after screen destroy:\n");
415      bo_dump_stats(device);
416   }
417}
418
419
420static void
421free_stale_bos(struct v3dv_device *device,
422               time_t time)
423{
424   struct v3dv_bo_cache *cache = &device->bo_cache;
425   bool freed_any = false;
426
427   list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
428                            time_list) {
429      /* If it's more than a second old, free it. */
430      if (time - bo->free_time > 2) {
431         if (dump_stats && !freed_any) {
432            fprintf(stderr, "Freeing stale BOs:\n");
433            bo_dump_stats(device);
434            freed_any = true;
435         }
436
437         bo_remove_from_cache(cache, bo);
438         bo_free(device, bo);
439      } else {
440         break;
441      }
442   }
443
444   if (dump_stats && freed_any) {
445      fprintf(stderr, "Freed stale BOs:\n");
446      bo_dump_stats(device);
447   }
448}
449
450bool
451v3dv_bo_free(struct v3dv_device *device,
452             struct v3dv_bo *bo)
453{
454   if (!bo)
455      return true;
456
457   if (!p_atomic_dec_zero(&bo->refcnt))
458      return true;
459
460   if (bo->map)
461      v3dv_bo_unmap(device, bo);
462
463   struct timespec time;
464   struct v3dv_bo_cache *cache = &device->bo_cache;
465   uint32_t page_index = bo->size / 4096 - 1;
466
467   if (bo->private &&
468       bo->size > cache->max_cache_size - cache->cache_size) {
469      clock_gettime(CLOCK_MONOTONIC, &time);
470      mtx_lock(&cache->lock);
471      free_stale_bos(device, time.tv_sec);
472      mtx_unlock(&cache->lock);
473   }
474
475   if (!bo->private ||
476       bo->size > cache->max_cache_size - cache->cache_size) {
477      return bo_free(device, bo);
478   }
479
480   clock_gettime(CLOCK_MONOTONIC, &time);
481   mtx_lock(&cache->lock);
482
483   if (cache->size_list_size <= page_index) {
484      if (!reallocate_size_list(cache, device, page_index + 1)) {
485         bool outcome = bo_free(device, bo);
486         /* If the reallocation failed, it usually means that we are out of
487          * memory, so we also free all the bo cache. We need to call it to
488          * not use the cache lock, as we are already under it.
489          */
490         bo_cache_free_all(device, false);
491         mtx_unlock(&cache->lock);
492         return outcome;
493      }
494   }
495
496   bo->free_time = time.tv_sec;
497   list_addtail(&bo->size_list, &cache->size_list[page_index]);
498   list_addtail(&bo->time_list, &cache->time_list);
499
500   cache->cache_count++;
501   cache->cache_size += bo->size;
502
503   if (dump_stats) {
504      fprintf(stderr, "Freed %s %dkb to cache:\n",
505              bo->name, bo->size / 1024);
506      bo_dump_stats(device);
507   }
508   bo->name = NULL;
509
510   free_stale_bos(device, time.tv_sec);
511
512   mtx_unlock(&cache->lock);
513
514   return true;
515}
516