162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Coherency fabric (Aurora) support for Armada 370, 375, 38x and XP
462306a36Sopenharmony_ci * platforms.
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Copyright (C) 2012 Marvell
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Yehuda Yitschak <yehuday@marvell.com>
962306a36Sopenharmony_ci * Gregory Clement <gregory.clement@free-electrons.com>
1062306a36Sopenharmony_ci * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
1162306a36Sopenharmony_ci *
1262306a36Sopenharmony_ci * The Armada 370, 375, 38x and XP SOCs have a coherency fabric which is
1362306a36Sopenharmony_ci * responsible for ensuring hardware coherency between all CPUs and between
1462306a36Sopenharmony_ci * CPUs and I/O masters. This file initializes the coherency fabric and
1562306a36Sopenharmony_ci * supplies basic routines for configuring and controlling hardware coherency
1662306a36Sopenharmony_ci */
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#define pr_fmt(fmt) "mvebu-coherency: " fmt
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#include <linux/kernel.h>
2162306a36Sopenharmony_ci#include <linux/init.h>
2262306a36Sopenharmony_ci#include <linux/of_address.h>
2362306a36Sopenharmony_ci#include <linux/io.h>
2462306a36Sopenharmony_ci#include <linux/smp.h>
2562306a36Sopenharmony_ci#include <linux/dma-map-ops.h>
2662306a36Sopenharmony_ci#include <linux/platform_device.h>
2762306a36Sopenharmony_ci#include <linux/slab.h>
2862306a36Sopenharmony_ci#include <linux/mbus.h>
2962306a36Sopenharmony_ci#include <linux/pci.h>
3062306a36Sopenharmony_ci#include <asm/smp_plat.h>
3162306a36Sopenharmony_ci#include <asm/cacheflush.h>
3262306a36Sopenharmony_ci#include <asm/mach/map.h>
3362306a36Sopenharmony_ci#include <asm/dma-mapping.h>
3462306a36Sopenharmony_ci#include "coherency.h"
3562306a36Sopenharmony_ci#include "mvebu-soc-id.h"
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ciunsigned long coherency_phys_base;
3862306a36Sopenharmony_civoid __iomem *coherency_base;
3962306a36Sopenharmony_cistatic void __iomem *coherency_cpu_base;
4062306a36Sopenharmony_cistatic void __iomem *cpu_config_base;
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci/* Coherency fabric registers */
4362306a36Sopenharmony_ci#define IO_SYNC_BARRIER_CTL_OFFSET		   0x0
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_cienum {
4662306a36Sopenharmony_ci	COHERENCY_FABRIC_TYPE_NONE,
4762306a36Sopenharmony_ci	COHERENCY_FABRIC_TYPE_ARMADA_370_XP,
4862306a36Sopenharmony_ci	COHERENCY_FABRIC_TYPE_ARMADA_375,
4962306a36Sopenharmony_ci	COHERENCY_FABRIC_TYPE_ARMADA_380,
5062306a36Sopenharmony_ci};
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_cistatic const struct of_device_id of_coherency_table[] = {
5362306a36Sopenharmony_ci	{.compatible = "marvell,coherency-fabric",
5462306a36Sopenharmony_ci	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_370_XP },
5562306a36Sopenharmony_ci	{.compatible = "marvell,armada-375-coherency-fabric",
5662306a36Sopenharmony_ci	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_375 },
5762306a36Sopenharmony_ci	{.compatible = "marvell,armada-380-coherency-fabric",
5862306a36Sopenharmony_ci	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_380 },
5962306a36Sopenharmony_ci	{ /* end of list */ },
6062306a36Sopenharmony_ci};
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci/* Functions defined in coherency_ll.S */
6362306a36Sopenharmony_ciint ll_enable_coherency(void);
6462306a36Sopenharmony_civoid ll_add_cpu_to_smp_group(void);
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci#define CPU_CONFIG_SHARED_L2 BIT(16)
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci/*
6962306a36Sopenharmony_ci * Disable the "Shared L2 Present" bit in CPU Configuration register
7062306a36Sopenharmony_ci * on Armada XP.
7162306a36Sopenharmony_ci *
7262306a36Sopenharmony_ci * The "Shared L2 Present" bit affects the "level of coherence" value
7362306a36Sopenharmony_ci * in the clidr CP15 register.  Cache operation functions such as
7462306a36Sopenharmony_ci * "flush all" and "invalidate all" operate on all the cache levels
7562306a36Sopenharmony_ci * that included in the defined level of coherence. When HW I/O
7662306a36Sopenharmony_ci * coherency is used, this bit causes unnecessary flushes of the L2
7762306a36Sopenharmony_ci * cache.
7862306a36Sopenharmony_ci */
7962306a36Sopenharmony_cistatic void armada_xp_clear_shared_l2(void)
8062306a36Sopenharmony_ci{
8162306a36Sopenharmony_ci	u32 reg;
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	if (!cpu_config_base)
8462306a36Sopenharmony_ci		return;
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	reg = readl(cpu_config_base);
8762306a36Sopenharmony_ci	reg &= ~CPU_CONFIG_SHARED_L2;
8862306a36Sopenharmony_ci	writel(reg, cpu_config_base);
8962306a36Sopenharmony_ci}
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_cistatic int mvebu_hwcc_notifier(struct notifier_block *nb,
9262306a36Sopenharmony_ci			       unsigned long event, void *__dev)
9362306a36Sopenharmony_ci{
9462306a36Sopenharmony_ci	struct device *dev = __dev;
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	if (event != BUS_NOTIFY_ADD_DEVICE)
9762306a36Sopenharmony_ci		return NOTIFY_DONE;
9862306a36Sopenharmony_ci	dev->dma_coherent = true;
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	return NOTIFY_OK;
10162306a36Sopenharmony_ci}
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_cistatic struct notifier_block mvebu_hwcc_nb = {
10462306a36Sopenharmony_ci	.notifier_call = mvebu_hwcc_notifier,
10562306a36Sopenharmony_ci};
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_cistatic struct notifier_block mvebu_hwcc_pci_nb __maybe_unused = {
10862306a36Sopenharmony_ci	.notifier_call = mvebu_hwcc_notifier,
10962306a36Sopenharmony_ci};
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_cistatic int armada_xp_clear_l2_starting(unsigned int cpu)
11262306a36Sopenharmony_ci{
11362306a36Sopenharmony_ci	armada_xp_clear_shared_l2();
11462306a36Sopenharmony_ci	return 0;
11562306a36Sopenharmony_ci}
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_cistatic void __init armada_370_coherency_init(struct device_node *np)
11862306a36Sopenharmony_ci{
11962306a36Sopenharmony_ci	struct resource res;
12062306a36Sopenharmony_ci	struct device_node *cpu_config_np;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	of_address_to_resource(np, 0, &res);
12362306a36Sopenharmony_ci	coherency_phys_base = res.start;
12462306a36Sopenharmony_ci	/*
12562306a36Sopenharmony_ci	 * Ensure secondary CPUs will see the updated value,
12662306a36Sopenharmony_ci	 * which they read before they join the coherency
12762306a36Sopenharmony_ci	 * fabric, and therefore before they are coherent with
12862306a36Sopenharmony_ci	 * the boot CPU cache.
12962306a36Sopenharmony_ci	 */
13062306a36Sopenharmony_ci	sync_cache_w(&coherency_phys_base);
13162306a36Sopenharmony_ci	coherency_base = of_iomap(np, 0);
13262306a36Sopenharmony_ci	coherency_cpu_base = of_iomap(np, 1);
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	cpu_config_np = of_find_compatible_node(NULL, NULL,
13562306a36Sopenharmony_ci						"marvell,armada-xp-cpu-config");
13662306a36Sopenharmony_ci	if (!cpu_config_np)
13762306a36Sopenharmony_ci		goto exit;
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	cpu_config_base = of_iomap(cpu_config_np, 0);
14062306a36Sopenharmony_ci	if (!cpu_config_base) {
14162306a36Sopenharmony_ci		of_node_put(cpu_config_np);
14262306a36Sopenharmony_ci		goto exit;
14362306a36Sopenharmony_ci	}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	of_node_put(cpu_config_np);
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	cpuhp_setup_state_nocalls(CPUHP_AP_ARM_MVEBU_COHERENCY,
14862306a36Sopenharmony_ci				  "arm/mvebu/coherency:starting",
14962306a36Sopenharmony_ci				  armada_xp_clear_l2_starting, NULL);
15062306a36Sopenharmony_ciexit:
15162306a36Sopenharmony_ci	set_cpu_coherent();
15262306a36Sopenharmony_ci}
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci/*
15562306a36Sopenharmony_ci * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
15662306a36Sopenharmony_ci * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
15762306a36Sopenharmony_ci * needed for the HW I/O coherency mechanism to work properly without
15862306a36Sopenharmony_ci * deadlock.
15962306a36Sopenharmony_ci */
16062306a36Sopenharmony_cistatic void __iomem *
16162306a36Sopenharmony_ciarmada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
16262306a36Sopenharmony_ci			 unsigned int mtype, void *caller)
16362306a36Sopenharmony_ci{
16462306a36Sopenharmony_ci	mtype = MT_UNCACHED;
16562306a36Sopenharmony_ci	return __arm_ioremap_caller(phys_addr, size, mtype, caller);
16662306a36Sopenharmony_ci}
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_cistatic void __init armada_375_380_coherency_init(struct device_node *np)
16962306a36Sopenharmony_ci{
17062306a36Sopenharmony_ci	struct device_node *cache_dn;
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	coherency_cpu_base = of_iomap(np, 0);
17362306a36Sopenharmony_ci	arch_ioremap_caller = armada_wa_ioremap_caller;
17462306a36Sopenharmony_ci	pci_ioremap_set_mem_type(MT_UNCACHED);
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	/*
17762306a36Sopenharmony_ci	 * We should switch the PL310 to I/O coherency mode only if
17862306a36Sopenharmony_ci	 * I/O coherency is actually enabled.
17962306a36Sopenharmony_ci	 */
18062306a36Sopenharmony_ci	if (!coherency_available())
18162306a36Sopenharmony_ci		return;
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	/*
18462306a36Sopenharmony_ci	 * Add the PL310 property "arm,io-coherent". This makes sure the
18562306a36Sopenharmony_ci	 * outer sync operation is not used, which allows to
18662306a36Sopenharmony_ci	 * workaround the system erratum that causes deadlocks when
18762306a36Sopenharmony_ci	 * doing PCIe in an SMP situation on Armada 375 and Armada
18862306a36Sopenharmony_ci	 * 38x.
18962306a36Sopenharmony_ci	 */
19062306a36Sopenharmony_ci	for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") {
19162306a36Sopenharmony_ci		struct property *p;
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci		p = kzalloc(sizeof(*p), GFP_KERNEL);
19462306a36Sopenharmony_ci		p->name = kstrdup("arm,io-coherent", GFP_KERNEL);
19562306a36Sopenharmony_ci		of_add_property(cache_dn, p);
19662306a36Sopenharmony_ci	}
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_cistatic int coherency_type(void)
20062306a36Sopenharmony_ci{
20162306a36Sopenharmony_ci	struct device_node *np;
20262306a36Sopenharmony_ci	const struct of_device_id *match;
20362306a36Sopenharmony_ci	int type;
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	/*
20662306a36Sopenharmony_ci	 * The coherency fabric is needed:
20762306a36Sopenharmony_ci	 * - For coherency between processors on Armada XP, so only
20862306a36Sopenharmony_ci	 *   when SMP is enabled.
20962306a36Sopenharmony_ci	 * - For coherency between the processor and I/O devices, but
21062306a36Sopenharmony_ci	 *   this coherency requires many pre-requisites (write
21162306a36Sopenharmony_ci	 *   allocate cache policy, shareable pages, SMP bit set) that
21262306a36Sopenharmony_ci	 *   are only meant in SMP situations.
21362306a36Sopenharmony_ci	 *
21462306a36Sopenharmony_ci	 * Note that this means that on Armada 370, there is currently
21562306a36Sopenharmony_ci	 * no way to use hardware I/O coherency, because even when
21662306a36Sopenharmony_ci	 * CONFIG_SMP is enabled, is_smp() returns false due to the
21762306a36Sopenharmony_ci	 * Armada 370 being a single-core processor. To lift this
21862306a36Sopenharmony_ci	 * limitation, we would have to find a way to make the cache
21962306a36Sopenharmony_ci	 * policy set to write-allocate (on all Armada SoCs), and to
22062306a36Sopenharmony_ci	 * set the shareable attribute in page tables (on all Armada
22162306a36Sopenharmony_ci	 * SoCs except the Armada 370). Unfortunately, such decisions
22262306a36Sopenharmony_ci	 * are taken very early in the kernel boot process, at a point
22362306a36Sopenharmony_ci	 * where we don't know yet on which SoC we are running.
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	 */
22662306a36Sopenharmony_ci	if (!is_smp())
22762306a36Sopenharmony_ci		return COHERENCY_FABRIC_TYPE_NONE;
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci	np = of_find_matching_node_and_match(NULL, of_coherency_table, &match);
23062306a36Sopenharmony_ci	if (!np)
23162306a36Sopenharmony_ci		return COHERENCY_FABRIC_TYPE_NONE;
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	type = (int) match->data;
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	of_node_put(np);
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	return type;
23862306a36Sopenharmony_ci}
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ciint set_cpu_coherent(void)
24162306a36Sopenharmony_ci{
24262306a36Sopenharmony_ci	int type = coherency_type();
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP) {
24562306a36Sopenharmony_ci		if (!coherency_base) {
24662306a36Sopenharmony_ci			pr_warn("Can't make current CPU cache coherent.\n");
24762306a36Sopenharmony_ci			pr_warn("Coherency fabric is not initialized\n");
24862306a36Sopenharmony_ci			return 1;
24962306a36Sopenharmony_ci		}
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci		armada_xp_clear_shared_l2();
25262306a36Sopenharmony_ci		ll_add_cpu_to_smp_group();
25362306a36Sopenharmony_ci		return ll_enable_coherency();
25462306a36Sopenharmony_ci	}
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	return 0;
25762306a36Sopenharmony_ci}
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ciint coherency_available(void)
26062306a36Sopenharmony_ci{
26162306a36Sopenharmony_ci	return coherency_type() != COHERENCY_FABRIC_TYPE_NONE;
26262306a36Sopenharmony_ci}
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ciint __init coherency_init(void)
26562306a36Sopenharmony_ci{
26662306a36Sopenharmony_ci	int type = coherency_type();
26762306a36Sopenharmony_ci	struct device_node *np;
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	np = of_find_matching_node(NULL, of_coherency_table);
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP)
27262306a36Sopenharmony_ci		armada_370_coherency_init(np);
27362306a36Sopenharmony_ci	else if (type == COHERENCY_FABRIC_TYPE_ARMADA_375 ||
27462306a36Sopenharmony_ci		 type == COHERENCY_FABRIC_TYPE_ARMADA_380)
27562306a36Sopenharmony_ci		armada_375_380_coherency_init(np);
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	of_node_put(np);
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	return 0;
28062306a36Sopenharmony_ci}
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_cistatic int __init coherency_late_init(void)
28362306a36Sopenharmony_ci{
28462306a36Sopenharmony_ci	if (coherency_available())
28562306a36Sopenharmony_ci		bus_register_notifier(&platform_bus_type,
28662306a36Sopenharmony_ci				      &mvebu_hwcc_nb);
28762306a36Sopenharmony_ci	return 0;
28862306a36Sopenharmony_ci}
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_cipostcore_initcall(coherency_late_init);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_PCI)
29362306a36Sopenharmony_cistatic int __init coherency_pci_init(void)
29462306a36Sopenharmony_ci{
29562306a36Sopenharmony_ci	if (coherency_available())
29662306a36Sopenharmony_ci		bus_register_notifier(&pci_bus_type,
29762306a36Sopenharmony_ci				       &mvebu_hwcc_pci_nb);
29862306a36Sopenharmony_ci	return 0;
29962306a36Sopenharmony_ci}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ciarch_initcall(coherency_pci_init);
30262306a36Sopenharmony_ci#endif
303