xref: /kernel/linux/linux-5.10/drivers/acpi/nfit/mce.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * NFIT - Machine Check Handler
4 *
5 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
6 */
7#include <linux/notifier.h>
8#include <linux/acpi.h>
9#include <linux/nd.h>
10#include <asm/mce.h>
11#include "nfit.h"
12
13static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
14			void *data)
15{
16	struct mce *mce = (struct mce *)data;
17	struct acpi_nfit_desc *acpi_desc;
18	struct nfit_spa *nfit_spa;
19
20	/* We only care about uncorrectable memory errors */
21	if (!mce_is_memory_error(mce) || mce_is_correctable(mce))
22		return NOTIFY_DONE;
23
24	/* Verify the address reported in the MCE is valid. */
25	if (!mce_usable_address(mce))
26		return NOTIFY_DONE;
27
28	/*
29	 * mce->addr contains the physical addr accessed that caused the
30	 * machine check. We need to walk through the list of NFITs, and see
31	 * if any of them matches that address, and only then start a scrub.
32	 */
33	mutex_lock(&acpi_desc_lock);
34	list_for_each_entry(acpi_desc, &acpi_descs, list) {
35		struct device *dev = acpi_desc->dev;
36		int found_match = 0;
37
38		mutex_lock(&acpi_desc->init_mutex);
39		list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
40			struct acpi_nfit_system_address *spa = nfit_spa->spa;
41
42			if (nfit_spa_type(spa) != NFIT_SPA_PM)
43				continue;
44			/* find the spa that covers the mce addr */
45			if (spa->address > mce->addr)
46				continue;
47			if ((spa->address + spa->length - 1) < mce->addr)
48				continue;
49			found_match = 1;
50			dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
51				spa->range_index, spa->address, spa->length);
52			/*
53			 * We can break at the first match because we're going
54			 * to rescan all the SPA ranges. There shouldn't be any
55			 * aliasing anyway.
56			 */
57			break;
58		}
59		mutex_unlock(&acpi_desc->init_mutex);
60
61		if (!found_match)
62			continue;
63
64		/* If this fails due to an -ENOMEM, there is little we can do */
65		nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus,
66				ALIGN(mce->addr, L1_CACHE_BYTES),
67				L1_CACHE_BYTES);
68		nvdimm_region_notify(nfit_spa->nd_region,
69				NVDIMM_REVALIDATE_POISON);
70
71		if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
72			/*
73			 * We can ignore an -EBUSY here because if an ARS is
74			 * already in progress, just let that be the last
75			 * authoritative one
76			 */
77			acpi_nfit_ars_rescan(acpi_desc, 0);
78		}
79		mce->kflags |= MCE_HANDLED_NFIT;
80		break;
81	}
82
83	mutex_unlock(&acpi_desc_lock);
84	return NOTIFY_DONE;
85}
86
87static struct notifier_block nfit_mce_dec = {
88	.notifier_call	= nfit_handle_mce,
89	.priority	= MCE_PRIO_NFIT,
90};
91
92void nfit_mce_register(void)
93{
94	mce_register_decode_chain(&nfit_mce_dec);
95}
96
97void nfit_mce_unregister(void)
98{
99	mce_unregister_decode_chain(&nfit_mce_dec);
100}
101