18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Coherency fabric (Aurora) support for Armada 370, 375, 38x and XP
38c2ecf20Sopenharmony_ci * platforms.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2012 Marvell
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Yehuda Yitschak <yehuday@marvell.com>
88c2ecf20Sopenharmony_ci * Gregory Clement <gregory.clement@free-electrons.com>
98c2ecf20Sopenharmony_ci * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci * This file is licensed under the terms of the GNU General Public
128c2ecf20Sopenharmony_ci * License version 2.  This program is licensed "as is" without any
138c2ecf20Sopenharmony_ci * warranty of any kind, whether express or implied.
148c2ecf20Sopenharmony_ci *
158c2ecf20Sopenharmony_ci * The Armada 370, 375, 38x and XP SOCs have a coherency fabric which is
168c2ecf20Sopenharmony_ci * responsible for ensuring hardware coherency between all CPUs and between
178c2ecf20Sopenharmony_ci * CPUs and I/O masters. This file initializes the coherency fabric and
188c2ecf20Sopenharmony_ci * supplies basic routines for configuring and controlling hardware coherency
198c2ecf20Sopenharmony_ci */
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci#define pr_fmt(fmt) "mvebu-coherency: " fmt
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci#include <linux/kernel.h>
248c2ecf20Sopenharmony_ci#include <linux/init.h>
258c2ecf20Sopenharmony_ci#include <linux/of_address.h>
268c2ecf20Sopenharmony_ci#include <linux/io.h>
278c2ecf20Sopenharmony_ci#include <linux/smp.h>
288c2ecf20Sopenharmony_ci#include <linux/dma-map-ops.h>
298c2ecf20Sopenharmony_ci#include <linux/platform_device.h>
308c2ecf20Sopenharmony_ci#include <linux/slab.h>
318c2ecf20Sopenharmony_ci#include <linux/mbus.h>
328c2ecf20Sopenharmony_ci#include <linux/pci.h>
338c2ecf20Sopenharmony_ci#include <asm/smp_plat.h>
348c2ecf20Sopenharmony_ci#include <asm/cacheflush.h>
358c2ecf20Sopenharmony_ci#include <asm/mach/map.h>
368c2ecf20Sopenharmony_ci#include <asm/dma-mapping.h>
378c2ecf20Sopenharmony_ci#include "coherency.h"
388c2ecf20Sopenharmony_ci#include "mvebu-soc-id.h"
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ciunsigned long coherency_phys_base;
418c2ecf20Sopenharmony_civoid __iomem *coherency_base;
428c2ecf20Sopenharmony_cistatic void __iomem *coherency_cpu_base;
438c2ecf20Sopenharmony_cistatic void __iomem *cpu_config_base;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci/* Coherency fabric registers */
468c2ecf20Sopenharmony_ci#define IO_SYNC_BARRIER_CTL_OFFSET		   0x0
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_cienum {
498c2ecf20Sopenharmony_ci	COHERENCY_FABRIC_TYPE_NONE,
508c2ecf20Sopenharmony_ci	COHERENCY_FABRIC_TYPE_ARMADA_370_XP,
518c2ecf20Sopenharmony_ci	COHERENCY_FABRIC_TYPE_ARMADA_375,
528c2ecf20Sopenharmony_ci	COHERENCY_FABRIC_TYPE_ARMADA_380,
538c2ecf20Sopenharmony_ci};
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_cistatic const struct of_device_id of_coherency_table[] = {
568c2ecf20Sopenharmony_ci	{.compatible = "marvell,coherency-fabric",
578c2ecf20Sopenharmony_ci	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_370_XP },
588c2ecf20Sopenharmony_ci	{.compatible = "marvell,armada-375-coherency-fabric",
598c2ecf20Sopenharmony_ci	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_375 },
608c2ecf20Sopenharmony_ci	{.compatible = "marvell,armada-380-coherency-fabric",
618c2ecf20Sopenharmony_ci	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_380 },
628c2ecf20Sopenharmony_ci	{ /* end of list */ },
638c2ecf20Sopenharmony_ci};
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci/* Functions defined in coherency_ll.S */
668c2ecf20Sopenharmony_ciint ll_enable_coherency(void);
678c2ecf20Sopenharmony_civoid ll_add_cpu_to_smp_group(void);
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci#define CPU_CONFIG_SHARED_L2 BIT(16)
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci/*
728c2ecf20Sopenharmony_ci * Disable the "Shared L2 Present" bit in CPU Configuration register
738c2ecf20Sopenharmony_ci * on Armada XP.
748c2ecf20Sopenharmony_ci *
758c2ecf20Sopenharmony_ci * The "Shared L2 Present" bit affects the "level of coherence" value
768c2ecf20Sopenharmony_ci * in the clidr CP15 register.  Cache operation functions such as
778c2ecf20Sopenharmony_ci * "flush all" and "invalidate all" operate on all the cache levels
788c2ecf20Sopenharmony_ci * that included in the defined level of coherence. When HW I/O
798c2ecf20Sopenharmony_ci * coherency is used, this bit causes unnecessary flushes of the L2
808c2ecf20Sopenharmony_ci * cache.
818c2ecf20Sopenharmony_ci */
828c2ecf20Sopenharmony_cistatic void armada_xp_clear_shared_l2(void)
838c2ecf20Sopenharmony_ci{
848c2ecf20Sopenharmony_ci	u32 reg;
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	if (!cpu_config_base)
878c2ecf20Sopenharmony_ci		return;
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci	reg = readl(cpu_config_base);
908c2ecf20Sopenharmony_ci	reg &= ~CPU_CONFIG_SHARED_L2;
918c2ecf20Sopenharmony_ci	writel(reg, cpu_config_base);
928c2ecf20Sopenharmony_ci}
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_cistatic int mvebu_hwcc_notifier(struct notifier_block *nb,
958c2ecf20Sopenharmony_ci			       unsigned long event, void *__dev)
968c2ecf20Sopenharmony_ci{
978c2ecf20Sopenharmony_ci	struct device *dev = __dev;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	if (event != BUS_NOTIFY_ADD_DEVICE)
1008c2ecf20Sopenharmony_ci		return NOTIFY_DONE;
1018c2ecf20Sopenharmony_ci	set_dma_ops(dev, &arm_coherent_dma_ops);
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci	return NOTIFY_OK;
1048c2ecf20Sopenharmony_ci}
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_cistatic struct notifier_block mvebu_hwcc_nb = {
1078c2ecf20Sopenharmony_ci	.notifier_call = mvebu_hwcc_notifier,
1088c2ecf20Sopenharmony_ci};
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_cistatic struct notifier_block mvebu_hwcc_pci_nb __maybe_unused = {
1118c2ecf20Sopenharmony_ci	.notifier_call = mvebu_hwcc_notifier,
1128c2ecf20Sopenharmony_ci};
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_cistatic int armada_xp_clear_l2_starting(unsigned int cpu)
1158c2ecf20Sopenharmony_ci{
1168c2ecf20Sopenharmony_ci	armada_xp_clear_shared_l2();
1178c2ecf20Sopenharmony_ci	return 0;
1188c2ecf20Sopenharmony_ci}
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_cistatic void __init armada_370_coherency_init(struct device_node *np)
1218c2ecf20Sopenharmony_ci{
1228c2ecf20Sopenharmony_ci	struct resource res;
1238c2ecf20Sopenharmony_ci	struct device_node *cpu_config_np;
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci	of_address_to_resource(np, 0, &res);
1268c2ecf20Sopenharmony_ci	coherency_phys_base = res.start;
1278c2ecf20Sopenharmony_ci	/*
1288c2ecf20Sopenharmony_ci	 * Ensure secondary CPUs will see the updated value,
1298c2ecf20Sopenharmony_ci	 * which they read before they join the coherency
1308c2ecf20Sopenharmony_ci	 * fabric, and therefore before they are coherent with
1318c2ecf20Sopenharmony_ci	 * the boot CPU cache.
1328c2ecf20Sopenharmony_ci	 */
1338c2ecf20Sopenharmony_ci	sync_cache_w(&coherency_phys_base);
1348c2ecf20Sopenharmony_ci	coherency_base = of_iomap(np, 0);
1358c2ecf20Sopenharmony_ci	coherency_cpu_base = of_iomap(np, 1);
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci	cpu_config_np = of_find_compatible_node(NULL, NULL,
1388c2ecf20Sopenharmony_ci						"marvell,armada-xp-cpu-config");
1398c2ecf20Sopenharmony_ci	if (!cpu_config_np)
1408c2ecf20Sopenharmony_ci		goto exit;
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci	cpu_config_base = of_iomap(cpu_config_np, 0);
1438c2ecf20Sopenharmony_ci	if (!cpu_config_base) {
1448c2ecf20Sopenharmony_ci		of_node_put(cpu_config_np);
1458c2ecf20Sopenharmony_ci		goto exit;
1468c2ecf20Sopenharmony_ci	}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci	of_node_put(cpu_config_np);
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	cpuhp_setup_state_nocalls(CPUHP_AP_ARM_MVEBU_COHERENCY,
1518c2ecf20Sopenharmony_ci				  "arm/mvebu/coherency:starting",
1528c2ecf20Sopenharmony_ci				  armada_xp_clear_l2_starting, NULL);
1538c2ecf20Sopenharmony_ciexit:
1548c2ecf20Sopenharmony_ci	set_cpu_coherent();
1558c2ecf20Sopenharmony_ci}
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci/*
1588c2ecf20Sopenharmony_ci * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
1598c2ecf20Sopenharmony_ci * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
1608c2ecf20Sopenharmony_ci * needed for the HW I/O coherency mechanism to work properly without
1618c2ecf20Sopenharmony_ci * deadlock.
1628c2ecf20Sopenharmony_ci */
1638c2ecf20Sopenharmony_cistatic void __iomem *
1648c2ecf20Sopenharmony_ciarmada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
1658c2ecf20Sopenharmony_ci			 unsigned int mtype, void *caller)
1668c2ecf20Sopenharmony_ci{
1678c2ecf20Sopenharmony_ci	mtype = MT_UNCACHED;
1688c2ecf20Sopenharmony_ci	return __arm_ioremap_caller(phys_addr, size, mtype, caller);
1698c2ecf20Sopenharmony_ci}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_cistatic void __init armada_375_380_coherency_init(struct device_node *np)
1728c2ecf20Sopenharmony_ci{
1738c2ecf20Sopenharmony_ci	struct device_node *cache_dn;
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	coherency_cpu_base = of_iomap(np, 0);
1768c2ecf20Sopenharmony_ci	arch_ioremap_caller = armada_wa_ioremap_caller;
1778c2ecf20Sopenharmony_ci	pci_ioremap_set_mem_type(MT_UNCACHED);
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci	/*
1808c2ecf20Sopenharmony_ci	 * We should switch the PL310 to I/O coherency mode only if
1818c2ecf20Sopenharmony_ci	 * I/O coherency is actually enabled.
1828c2ecf20Sopenharmony_ci	 */
1838c2ecf20Sopenharmony_ci	if (!coherency_available())
1848c2ecf20Sopenharmony_ci		return;
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci	/*
1878c2ecf20Sopenharmony_ci	 * Add the PL310 property "arm,io-coherent". This makes sure the
1888c2ecf20Sopenharmony_ci	 * outer sync operation is not used, which allows to
1898c2ecf20Sopenharmony_ci	 * workaround the system erratum that causes deadlocks when
1908c2ecf20Sopenharmony_ci	 * doing PCIe in an SMP situation on Armada 375 and Armada
1918c2ecf20Sopenharmony_ci	 * 38x.
1928c2ecf20Sopenharmony_ci	 */
1938c2ecf20Sopenharmony_ci	for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") {
1948c2ecf20Sopenharmony_ci		struct property *p;
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci		p = kzalloc(sizeof(*p), GFP_KERNEL);
1978c2ecf20Sopenharmony_ci		p->name = kstrdup("arm,io-coherent", GFP_KERNEL);
1988c2ecf20Sopenharmony_ci		of_add_property(cache_dn, p);
1998c2ecf20Sopenharmony_ci	}
2008c2ecf20Sopenharmony_ci}
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_cistatic int coherency_type(void)
2038c2ecf20Sopenharmony_ci{
2048c2ecf20Sopenharmony_ci	struct device_node *np;
2058c2ecf20Sopenharmony_ci	const struct of_device_id *match;
2068c2ecf20Sopenharmony_ci	int type;
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci	/*
2098c2ecf20Sopenharmony_ci	 * The coherency fabric is needed:
2108c2ecf20Sopenharmony_ci	 * - For coherency between processors on Armada XP, so only
2118c2ecf20Sopenharmony_ci	 *   when SMP is enabled.
2128c2ecf20Sopenharmony_ci	 * - For coherency between the processor and I/O devices, but
2138c2ecf20Sopenharmony_ci	 *   this coherency requires many pre-requisites (write
2148c2ecf20Sopenharmony_ci	 *   allocate cache policy, shareable pages, SMP bit set) that
2158c2ecf20Sopenharmony_ci	 *   are only meant in SMP situations.
2168c2ecf20Sopenharmony_ci	 *
2178c2ecf20Sopenharmony_ci	 * Note that this means that on Armada 370, there is currently
2188c2ecf20Sopenharmony_ci	 * no way to use hardware I/O coherency, because even when
2198c2ecf20Sopenharmony_ci	 * CONFIG_SMP is enabled, is_smp() returns false due to the
2208c2ecf20Sopenharmony_ci	 * Armada 370 being a single-core processor. To lift this
2218c2ecf20Sopenharmony_ci	 * limitation, we would have to find a way to make the cache
2228c2ecf20Sopenharmony_ci	 * policy set to write-allocate (on all Armada SoCs), and to
2238c2ecf20Sopenharmony_ci	 * set the shareable attribute in page tables (on all Armada
2248c2ecf20Sopenharmony_ci	 * SoCs except the Armada 370). Unfortunately, such decisions
2258c2ecf20Sopenharmony_ci	 * are taken very early in the kernel boot process, at a point
2268c2ecf20Sopenharmony_ci	 * where we don't know yet on which SoC we are running.
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci	 */
2298c2ecf20Sopenharmony_ci	if (!is_smp())
2308c2ecf20Sopenharmony_ci		return COHERENCY_FABRIC_TYPE_NONE;
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci	np = of_find_matching_node_and_match(NULL, of_coherency_table, &match);
2338c2ecf20Sopenharmony_ci	if (!np)
2348c2ecf20Sopenharmony_ci		return COHERENCY_FABRIC_TYPE_NONE;
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci	type = (int) match->data;
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	of_node_put(np);
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	return type;
2418c2ecf20Sopenharmony_ci}
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ciint set_cpu_coherent(void)
2448c2ecf20Sopenharmony_ci{
2458c2ecf20Sopenharmony_ci	int type = coherency_type();
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci	if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP) {
2488c2ecf20Sopenharmony_ci		if (!coherency_base) {
2498c2ecf20Sopenharmony_ci			pr_warn("Can't make current CPU cache coherent.\n");
2508c2ecf20Sopenharmony_ci			pr_warn("Coherency fabric is not initialized\n");
2518c2ecf20Sopenharmony_ci			return 1;
2528c2ecf20Sopenharmony_ci		}
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_ci		armada_xp_clear_shared_l2();
2558c2ecf20Sopenharmony_ci		ll_add_cpu_to_smp_group();
2568c2ecf20Sopenharmony_ci		return ll_enable_coherency();
2578c2ecf20Sopenharmony_ci	}
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	return 0;
2608c2ecf20Sopenharmony_ci}
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ciint coherency_available(void)
2638c2ecf20Sopenharmony_ci{
2648c2ecf20Sopenharmony_ci	return coherency_type() != COHERENCY_FABRIC_TYPE_NONE;
2658c2ecf20Sopenharmony_ci}
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ciint __init coherency_init(void)
2688c2ecf20Sopenharmony_ci{
2698c2ecf20Sopenharmony_ci	int type = coherency_type();
2708c2ecf20Sopenharmony_ci	struct device_node *np;
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci	np = of_find_matching_node(NULL, of_coherency_table);
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci	if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP)
2758c2ecf20Sopenharmony_ci		armada_370_coherency_init(np);
2768c2ecf20Sopenharmony_ci	else if (type == COHERENCY_FABRIC_TYPE_ARMADA_375 ||
2778c2ecf20Sopenharmony_ci		 type == COHERENCY_FABRIC_TYPE_ARMADA_380)
2788c2ecf20Sopenharmony_ci		armada_375_380_coherency_init(np);
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_ci	of_node_put(np);
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci	return 0;
2838c2ecf20Sopenharmony_ci}
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_cistatic int __init coherency_late_init(void)
2868c2ecf20Sopenharmony_ci{
2878c2ecf20Sopenharmony_ci	if (coherency_available())
2888c2ecf20Sopenharmony_ci		bus_register_notifier(&platform_bus_type,
2898c2ecf20Sopenharmony_ci				      &mvebu_hwcc_nb);
2908c2ecf20Sopenharmony_ci	return 0;
2918c2ecf20Sopenharmony_ci}
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_cipostcore_initcall(coherency_late_init);
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_PCI)
2968c2ecf20Sopenharmony_cistatic int __init coherency_pci_init(void)
2978c2ecf20Sopenharmony_ci{
2988c2ecf20Sopenharmony_ci	if (coherency_available())
2998c2ecf20Sopenharmony_ci		bus_register_notifier(&pci_bus_type,
3008c2ecf20Sopenharmony_ci				       &mvebu_hwcc_pci_nb);
3018c2ecf20Sopenharmony_ci	return 0;
3028c2ecf20Sopenharmony_ci}
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ciarch_initcall(coherency_pci_init);
3058c2ecf20Sopenharmony_ci#endif
306