1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Tegra host1x driver 4 * 5 * Copyright (c) 2010-2013, NVIDIA Corporation. 6 */ 7 8#include <linux/clk.h> 9#include <linux/dma-mapping.h> 10#include <linux/io.h> 11#include <linux/list.h> 12#include <linux/module.h> 13#include <linux/of_device.h> 14#include <linux/of.h> 15#include <linux/slab.h> 16 17#define CREATE_TRACE_POINTS 18#include <trace/events/host1x.h> 19#undef CREATE_TRACE_POINTS 20 21#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) 22#include <asm/dma-iommu.h> 23#endif 24 25#include "bus.h" 26#include "channel.h" 27#include "debug.h" 28#include "dev.h" 29#include "intr.h" 30 31#include "hw/host1x01.h" 32#include "hw/host1x02.h" 33#include "hw/host1x04.h" 34#include "hw/host1x05.h" 35#include "hw/host1x06.h" 36#include "hw/host1x07.h" 37 38void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r) 39{ 40 writel(v, host1x->hv_regs + r); 41} 42 43u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r) 44{ 45 return readl(host1x->hv_regs + r); 46} 47 48void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r) 49{ 50 void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; 51 52 writel(v, sync_regs + r); 53} 54 55u32 host1x_sync_readl(struct host1x *host1x, u32 r) 56{ 57 void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; 58 59 return readl(sync_regs + r); 60} 61 62void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r) 63{ 64 writel(v, ch->regs + r); 65} 66 67u32 host1x_ch_readl(struct host1x_channel *ch, u32 r) 68{ 69 return readl(ch->regs + r); 70} 71 72static const struct host1x_info host1x01_info = { 73 .nb_channels = 8, 74 .nb_pts = 32, 75 .nb_mlocks = 16, 76 .nb_bases = 8, 77 .init = host1x01_init, 78 .sync_offset = 0x3000, 79 .dma_mask = DMA_BIT_MASK(32), 80 .has_wide_gather = false, 81 .has_hypervisor = false, 82 .num_sid_entries = 0, 83 .sid_table = NULL, 84}; 85 86static const struct host1x_info host1x02_info = { 87 .nb_channels = 9, 88 .nb_pts = 32, 89 .nb_mlocks = 16, 90 .nb_bases = 12, 91 .init = host1x02_init, 92 .sync_offset = 0x3000, 93 .dma_mask = DMA_BIT_MASK(32), 94 .has_wide_gather = false, 95 .has_hypervisor = false, 96 .num_sid_entries = 0, 97 .sid_table = NULL, 98}; 99 100static const struct host1x_info host1x04_info = { 101 .nb_channels = 12, 102 .nb_pts = 192, 103 .nb_mlocks = 16, 104 .nb_bases = 64, 105 .init = host1x04_init, 106 .sync_offset = 0x2100, 107 .dma_mask = DMA_BIT_MASK(34), 108 .has_wide_gather = false, 109 .has_hypervisor = false, 110 .num_sid_entries = 0, 111 .sid_table = NULL, 112}; 113 114static const struct host1x_info host1x05_info = { 115 .nb_channels = 14, 116 .nb_pts = 192, 117 .nb_mlocks = 16, 118 .nb_bases = 64, 119 .init = host1x05_init, 120 .sync_offset = 0x2100, 121 .dma_mask = DMA_BIT_MASK(34), 122 .has_wide_gather = false, 123 .has_hypervisor = false, 124 .num_sid_entries = 0, 125 .sid_table = NULL, 126}; 127 128static const struct host1x_sid_entry tegra186_sid_table[] = { 129 { 130 /* VIC */ 131 .base = 0x1af0, 132 .offset = 0x30, 133 .limit = 0x34 134 }, 135}; 136 137static const struct host1x_info host1x06_info = { 138 .nb_channels = 63, 139 .nb_pts = 576, 140 .nb_mlocks = 24, 141 .nb_bases = 16, 142 .init = host1x06_init, 143 .sync_offset = 0x0, 144 .dma_mask = DMA_BIT_MASK(40), 145 .has_wide_gather = true, 146 .has_hypervisor = true, 147 .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), 148 .sid_table = tegra186_sid_table, 149}; 150 151static const struct host1x_sid_entry tegra194_sid_table[] = { 152 { 153 /* VIC */ 154 .base = 0x1af0, 155 .offset = 0x30, 156 .limit = 0x34 157 }, 158}; 159 160static const struct host1x_info host1x07_info = { 161 .nb_channels = 63, 162 .nb_pts = 704, 163 .nb_mlocks = 32, 164 .nb_bases = 0, 165 .init = host1x07_init, 166 .sync_offset = 0x0, 167 .dma_mask = DMA_BIT_MASK(40), 168 .has_wide_gather = true, 169 .has_hypervisor = true, 170 .num_sid_entries = ARRAY_SIZE(tegra194_sid_table), 171 .sid_table = tegra194_sid_table, 172}; 173 174static const struct of_device_id host1x_of_match[] = { 175 { .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, }, 176 { .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, }, 177 { .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, }, 178 { .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, }, 179 { .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, }, 180 { .compatible = "nvidia,tegra30-host1x", .data = &host1x01_info, }, 181 { .compatible = "nvidia,tegra20-host1x", .data = &host1x01_info, }, 182 { }, 183}; 184MODULE_DEVICE_TABLE(of, host1x_of_match); 185 186static void host1x_setup_sid_table(struct host1x *host) 187{ 188 const struct host1x_info *info = host->info; 189 unsigned int i; 190 191 for (i = 0; i < info->num_sid_entries; i++) { 192 const struct host1x_sid_entry *entry = &info->sid_table[i]; 193 194 host1x_hypervisor_writel(host, entry->offset, entry->base); 195 host1x_hypervisor_writel(host, entry->limit, entry->base + 4); 196 } 197} 198 199static bool host1x_wants_iommu(struct host1x *host1x) 200{ 201 /* Our IOMMU usage policy doesn't currently play well with GART */ 202 if (of_machine_is_compatible("nvidia,tegra20")) 203 return false; 204 205 /* 206 * If we support addressing a maximum of 32 bits of physical memory 207 * and if the host1x firewall is enabled, there's no need to enable 208 * IOMMU support. This can happen for example on Tegra20, Tegra30 209 * and Tegra114. 210 * 211 * Tegra124 and later can address up to 34 bits of physical memory and 212 * many platforms come equipped with more than 2 GiB of system memory, 213 * which requires crossing the 4 GiB boundary. But there's a catch: on 214 * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can 215 * only address up to 32 bits of memory in GATHER opcodes, which means 216 * that command buffers need to either be in the first 2 GiB of system 217 * memory (which could quickly lead to memory exhaustion), or command 218 * buffers need to be treated differently from other buffers (which is 219 * not possible with the current ABI). 220 * 221 * A third option is to use the IOMMU in these cases to make sure all 222 * buffers will be mapped into a 32-bit IOVA space that host1x can 223 * address. This allows all of the system memory to be used and works 224 * within the limitations of the host1x on these SoCs. 225 * 226 * In summary, default to enable IOMMU on Tegra124 and later. For any 227 * of the earlier SoCs, only use the IOMMU for additional safety when 228 * the host1x firewall is disabled. 229 */ 230 if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) { 231 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) 232 return false; 233 } 234 235 return true; 236} 237 238static struct iommu_domain *host1x_iommu_attach(struct host1x *host) 239{ 240 struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); 241 int err; 242 243#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) 244 if (host->dev->archdata.mapping) { 245 struct dma_iommu_mapping *mapping = 246 to_dma_iommu_mapping(host->dev); 247 arm_iommu_detach_device(host->dev); 248 arm_iommu_release_mapping(mapping); 249 250 domain = iommu_get_domain_for_dev(host->dev); 251 } 252#endif 253 254 /* 255 * We may not always want to enable IOMMU support (for example if the 256 * host1x firewall is already enabled and we don't support addressing 257 * more than 32 bits of physical memory), so check for that first. 258 * 259 * Similarly, if host1x is already attached to an IOMMU (via the DMA 260 * API), don't try to attach again. 261 */ 262 if (!host1x_wants_iommu(host) || domain) 263 return domain; 264 265 host->group = iommu_group_get(host->dev); 266 if (host->group) { 267 struct iommu_domain_geometry *geometry; 268 dma_addr_t start, end; 269 unsigned long order; 270 271 err = iova_cache_get(); 272 if (err < 0) 273 goto put_group; 274 275 host->domain = iommu_domain_alloc(&platform_bus_type); 276 if (!host->domain) { 277 err = -ENOMEM; 278 goto put_cache; 279 } 280 281 err = iommu_attach_group(host->domain, host->group); 282 if (err) { 283 if (err == -ENODEV) 284 err = 0; 285 286 goto free_domain; 287 } 288 289 geometry = &host->domain->geometry; 290 start = geometry->aperture_start & host->info->dma_mask; 291 end = geometry->aperture_end & host->info->dma_mask; 292 293 order = __ffs(host->domain->pgsize_bitmap); 294 init_iova_domain(&host->iova, 1UL << order, start >> order); 295 host->iova_end = end; 296 297 domain = host->domain; 298 } 299 300 return domain; 301 302free_domain: 303 iommu_domain_free(host->domain); 304 host->domain = NULL; 305put_cache: 306 iova_cache_put(); 307put_group: 308 iommu_group_put(host->group); 309 host->group = NULL; 310 311 return ERR_PTR(err); 312} 313 314static int host1x_iommu_init(struct host1x *host) 315{ 316 u64 mask = host->info->dma_mask; 317 struct iommu_domain *domain; 318 int err; 319 320 domain = host1x_iommu_attach(host); 321 if (IS_ERR(domain)) { 322 err = PTR_ERR(domain); 323 dev_err(host->dev, "failed to attach to IOMMU: %d\n", err); 324 return err; 325 } 326 327 /* 328 * If we're not behind an IOMMU make sure we don't get push buffers 329 * that are allocated outside of the range addressable by the GATHER 330 * opcode. 331 * 332 * Newer generations of Tegra (Tegra186 and later) support a wide 333 * variant of the GATHER opcode that allows addressing more bits. 334 */ 335 if (!domain && !host->info->has_wide_gather) 336 mask = DMA_BIT_MASK(32); 337 338 err = dma_coerce_mask_and_coherent(host->dev, mask); 339 if (err < 0) { 340 dev_err(host->dev, "failed to set DMA mask: %d\n", err); 341 return err; 342 } 343 344 return 0; 345} 346 347static void host1x_iommu_exit(struct host1x *host) 348{ 349 if (host->domain) { 350 put_iova_domain(&host->iova); 351 iommu_detach_group(host->domain, host->group); 352 353 iommu_domain_free(host->domain); 354 host->domain = NULL; 355 356 iova_cache_put(); 357 358 iommu_group_put(host->group); 359 host->group = NULL; 360 } 361} 362 363static int host1x_probe(struct platform_device *pdev) 364{ 365 struct host1x *host; 366 struct resource *regs, *hv_regs = NULL; 367 int syncpt_irq; 368 int err; 369 370 host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); 371 if (!host) 372 return -ENOMEM; 373 374 host->info = of_device_get_match_data(&pdev->dev); 375 376 if (host->info->has_hypervisor) { 377 regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm"); 378 if (!regs) { 379 dev_err(&pdev->dev, "failed to get vm registers\n"); 380 return -ENXIO; 381 } 382 383 hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, 384 "hypervisor"); 385 if (!hv_regs) { 386 dev_err(&pdev->dev, 387 "failed to get hypervisor registers\n"); 388 return -ENXIO; 389 } 390 } else { 391 regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); 392 if (!regs) { 393 dev_err(&pdev->dev, "failed to get registers\n"); 394 return -ENXIO; 395 } 396 } 397 398 syncpt_irq = platform_get_irq(pdev, 0); 399 if (syncpt_irq < 0) 400 return syncpt_irq; 401 402 mutex_init(&host->devices_lock); 403 INIT_LIST_HEAD(&host->devices); 404 INIT_LIST_HEAD(&host->list); 405 host->dev = &pdev->dev; 406 407 /* set common host1x device data */ 408 platform_set_drvdata(pdev, host); 409 410 host->regs = devm_ioremap_resource(&pdev->dev, regs); 411 if (IS_ERR(host->regs)) 412 return PTR_ERR(host->regs); 413 414 if (host->info->has_hypervisor) { 415 host->hv_regs = devm_ioremap_resource(&pdev->dev, hv_regs); 416 if (IS_ERR(host->hv_regs)) 417 return PTR_ERR(host->hv_regs); 418 } 419 420 host->dev->dma_parms = &host->dma_parms; 421 dma_set_max_seg_size(host->dev, UINT_MAX); 422 423 if (host->info->init) { 424 err = host->info->init(host); 425 if (err) 426 return err; 427 } 428 429 host->clk = devm_clk_get(&pdev->dev, NULL); 430 if (IS_ERR(host->clk)) { 431 err = PTR_ERR(host->clk); 432 433 if (err != -EPROBE_DEFER) 434 dev_err(&pdev->dev, "failed to get clock: %d\n", err); 435 436 return err; 437 } 438 439 host->rst = devm_reset_control_get(&pdev->dev, "host1x"); 440 if (IS_ERR(host->rst)) { 441 err = PTR_ERR(host->rst); 442 dev_err(&pdev->dev, "failed to get reset: %d\n", err); 443 return err; 444 } 445 446 err = host1x_iommu_init(host); 447 if (err < 0) { 448 dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err); 449 return err; 450 } 451 452 err = host1x_channel_list_init(&host->channel_list, 453 host->info->nb_channels); 454 if (err) { 455 dev_err(&pdev->dev, "failed to initialize channel list\n"); 456 goto iommu_exit; 457 } 458 459 err = clk_prepare_enable(host->clk); 460 if (err < 0) { 461 dev_err(&pdev->dev, "failed to enable clock\n"); 462 goto free_channels; 463 } 464 465 err = reset_control_deassert(host->rst); 466 if (err < 0) { 467 dev_err(&pdev->dev, "failed to deassert reset: %d\n", err); 468 goto unprepare_disable; 469 } 470 471 err = host1x_syncpt_init(host); 472 if (err) { 473 dev_err(&pdev->dev, "failed to initialize syncpts\n"); 474 goto reset_assert; 475 } 476 477 err = host1x_intr_init(host, syncpt_irq); 478 if (err) { 479 dev_err(&pdev->dev, "failed to initialize interrupts\n"); 480 goto deinit_syncpt; 481 } 482 483 host1x_debug_init(host); 484 485 if (host->info->has_hypervisor) 486 host1x_setup_sid_table(host); 487 488 err = host1x_register(host); 489 if (err < 0) 490 goto deinit_debugfs; 491 492 err = devm_of_platform_populate(&pdev->dev); 493 if (err < 0) 494 goto unregister; 495 496 return 0; 497 498unregister: 499 host1x_unregister(host); 500deinit_debugfs: 501 host1x_debug_deinit(host); 502 host1x_intr_deinit(host); 503deinit_syncpt: 504 host1x_syncpt_deinit(host); 505reset_assert: 506 reset_control_assert(host->rst); 507unprepare_disable: 508 clk_disable_unprepare(host->clk); 509free_channels: 510 host1x_channel_list_free(&host->channel_list); 511iommu_exit: 512 host1x_iommu_exit(host); 513 514 return err; 515} 516 517static int host1x_remove(struct platform_device *pdev) 518{ 519 struct host1x *host = platform_get_drvdata(pdev); 520 521 host1x_unregister(host); 522 host1x_debug_deinit(host); 523 host1x_intr_deinit(host); 524 host1x_syncpt_deinit(host); 525 reset_control_assert(host->rst); 526 clk_disable_unprepare(host->clk); 527 host1x_channel_list_free(&host->channel_list); 528 host1x_iommu_exit(host); 529 530 return 0; 531} 532 533static struct platform_driver tegra_host1x_driver = { 534 .driver = { 535 .name = "tegra-host1x", 536 .of_match_table = host1x_of_match, 537 }, 538 .probe = host1x_probe, 539 .remove = host1x_remove, 540}; 541 542static struct platform_driver * const drivers[] = { 543 &tegra_host1x_driver, 544 &tegra_mipi_driver, 545}; 546 547static int __init tegra_host1x_init(void) 548{ 549 int err; 550 551 err = bus_register(&host1x_bus_type); 552 if (err < 0) 553 return err; 554 555 err = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); 556 if (err < 0) 557 bus_unregister(&host1x_bus_type); 558 559 return err; 560} 561module_init(tegra_host1x_init); 562 563static void __exit tegra_host1x_exit(void) 564{ 565 platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); 566 bus_unregister(&host1x_bus_type); 567} 568module_exit(tegra_host1x_exit); 569 570/** 571 * host1x_get_dma_mask() - query the supported DMA mask for host1x 572 * @host1x: host1x instance 573 * 574 * Note that this returns the supported DMA mask for host1x, which can be 575 * different from the applicable DMA mask under certain circumstances. 576 */ 577u64 host1x_get_dma_mask(struct host1x *host1x) 578{ 579 return host1x->info->dma_mask; 580} 581EXPORT_SYMBOL(host1x_get_dma_mask); 582 583MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>"); 584MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>"); 585MODULE_DESCRIPTION("Host1x driver for Tegra products"); 586MODULE_LICENSE("GPL"); 587