xref: /kernel/linux/linux-6.6/drivers/edac/edac_mc.c (revision 62306a36)
1/*
2 * edac_mc kernel module
3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 *	http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
28#include <linux/ctype.h>
29#include <linux/edac.h>
30#include <linux/bitops.h>
31#include <linux/uaccess.h>
32#include <asm/page.h>
33#include "edac_mc.h"
34#include "edac_module.h"
35#include <ras/ras_event.h>
36
37#ifdef CONFIG_EDAC_ATOMIC_SCRUB
38#include <asm/edac.h>
39#else
40#define edac_atomic_scrub(va, size) do { } while (0)
41#endif
42
43int edac_op_state = EDAC_OPSTATE_INVAL;
44EXPORT_SYMBOL_GPL(edac_op_state);
45
46/* lock to memory controller's control array */
47static DEFINE_MUTEX(mem_ctls_mutex);
48static LIST_HEAD(mc_devices);
49
50/*
51 * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
52 *	apei/ghes and i7core_edac to be used at the same time.
53 */
54static const char *edac_mc_owner;
55
56static struct mem_ctl_info *error_desc_to_mci(struct edac_raw_error_desc *e)
57{
58	return container_of(e, struct mem_ctl_info, error_desc);
59}
60
61unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf,
62				     unsigned int len)
63{
64	struct mem_ctl_info *mci = dimm->mci;
65	int i, n, count = 0;
66	char *p = buf;
67
68	for (i = 0; i < mci->n_layers; i++) {
69		n = scnprintf(p, len, "%s %d ",
70			      edac_layer_name[mci->layers[i].type],
71			      dimm->location[i]);
72		p += n;
73		len -= n;
74		count += n;
75	}
76
77	return count;
78}
79
80#ifdef CONFIG_EDAC_DEBUG
81
82static void edac_mc_dump_channel(struct rank_info *chan)
83{
84	edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
85	edac_dbg(4, "    channel = %p\n", chan);
86	edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
87	edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
88}
89
90static void edac_mc_dump_dimm(struct dimm_info *dimm)
91{
92	char location[80];
93
94	if (!dimm->nr_pages)
95		return;
96
97	edac_dimm_info_location(dimm, location, sizeof(location));
98
99	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
100		 dimm->mci->csbased ? "rank" : "dimm",
101		 dimm->idx, location, dimm->csrow, dimm->cschannel);
102	edac_dbg(4, "  dimm = %p\n", dimm);
103	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
104	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
105	edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
106}
107
108static void edac_mc_dump_csrow(struct csrow_info *csrow)
109{
110	edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
111	edac_dbg(4, "  csrow = %p\n", csrow);
112	edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
113	edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
114	edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
115	edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
116	edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
117	edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
118}
119
120static void edac_mc_dump_mci(struct mem_ctl_info *mci)
121{
122	edac_dbg(3, "\tmci = %p\n", mci);
123	edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
124	edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
125	edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
126	edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
127	edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
128		 mci->nr_csrows, mci->csrows);
129	edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
130		 mci->tot_dimms, mci->dimms);
131	edac_dbg(3, "\tdev = %p\n", mci->pdev);
132	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
133		 mci->mod_name, mci->ctl_name);
134	edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
135}
136
137#endif				/* CONFIG_EDAC_DEBUG */
138
139const char * const edac_mem_types[] = {
140	[MEM_EMPTY]	= "Empty",
141	[MEM_RESERVED]	= "Reserved",
142	[MEM_UNKNOWN]	= "Unknown",
143	[MEM_FPM]	= "FPM",
144	[MEM_EDO]	= "EDO",
145	[MEM_BEDO]	= "BEDO",
146	[MEM_SDR]	= "Unbuffered-SDR",
147	[MEM_RDR]	= "Registered-SDR",
148	[MEM_DDR]	= "Unbuffered-DDR",
149	[MEM_RDDR]	= "Registered-DDR",
150	[MEM_RMBS]	= "RMBS",
151	[MEM_DDR2]	= "Unbuffered-DDR2",
152	[MEM_FB_DDR2]	= "FullyBuffered-DDR2",
153	[MEM_RDDR2]	= "Registered-DDR2",
154	[MEM_XDR]	= "XDR",
155	[MEM_DDR3]	= "Unbuffered-DDR3",
156	[MEM_RDDR3]	= "Registered-DDR3",
157	[MEM_LRDDR3]	= "Load-Reduced-DDR3-RAM",
158	[MEM_LPDDR3]	= "Low-Power-DDR3-RAM",
159	[MEM_DDR4]	= "Unbuffered-DDR4",
160	[MEM_RDDR4]	= "Registered-DDR4",
161	[MEM_LPDDR4]	= "Low-Power-DDR4-RAM",
162	[MEM_LRDDR4]	= "Load-Reduced-DDR4-RAM",
163	[MEM_DDR5]	= "Unbuffered-DDR5",
164	[MEM_RDDR5]	= "Registered-DDR5",
165	[MEM_LRDDR5]	= "Load-Reduced-DDR5-RAM",
166	[MEM_NVDIMM]	= "Non-volatile-RAM",
167	[MEM_WIO2]	= "Wide-IO-2",
168	[MEM_HBM2]	= "High-bandwidth-memory-Gen2",
169};
170EXPORT_SYMBOL_GPL(edac_mem_types);
171
172static void _edac_mc_free(struct mem_ctl_info *mci)
173{
174	put_device(&mci->dev);
175}
176
177static void mci_release(struct device *dev)
178{
179	struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
180	struct csrow_info *csr;
181	int i, chn, row;
182
183	if (mci->dimms) {
184		for (i = 0; i < mci->tot_dimms; i++)
185			kfree(mci->dimms[i]);
186		kfree(mci->dimms);
187	}
188
189	if (mci->csrows) {
190		for (row = 0; row < mci->nr_csrows; row++) {
191			csr = mci->csrows[row];
192			if (!csr)
193				continue;
194
195			if (csr->channels) {
196				for (chn = 0; chn < mci->num_cschannel; chn++)
197					kfree(csr->channels[chn]);
198				kfree(csr->channels);
199			}
200			kfree(csr);
201		}
202		kfree(mci->csrows);
203	}
204	kfree(mci->pvt_info);
205	kfree(mci->layers);
206	kfree(mci);
207}
208
209static int edac_mc_alloc_csrows(struct mem_ctl_info *mci)
210{
211	unsigned int tot_channels = mci->num_cschannel;
212	unsigned int tot_csrows = mci->nr_csrows;
213	unsigned int row, chn;
214
215	/*
216	 * Alocate and fill the csrow/channels structs
217	 */
218	mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
219	if (!mci->csrows)
220		return -ENOMEM;
221
222	for (row = 0; row < tot_csrows; row++) {
223		struct csrow_info *csr;
224
225		csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
226		if (!csr)
227			return -ENOMEM;
228
229		mci->csrows[row] = csr;
230		csr->csrow_idx = row;
231		csr->mci = mci;
232		csr->nr_channels = tot_channels;
233		csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
234					GFP_KERNEL);
235		if (!csr->channels)
236			return -ENOMEM;
237
238		for (chn = 0; chn < tot_channels; chn++) {
239			struct rank_info *chan;
240
241			chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
242			if (!chan)
243				return -ENOMEM;
244
245			csr->channels[chn] = chan;
246			chan->chan_idx = chn;
247			chan->csrow = csr;
248		}
249	}
250
251	return 0;
252}
253
254static int edac_mc_alloc_dimms(struct mem_ctl_info *mci)
255{
256	unsigned int pos[EDAC_MAX_LAYERS];
257	unsigned int row, chn, idx;
258	int layer;
259	void *p;
260
261	/*
262	 * Allocate and fill the dimm structs
263	 */
264	mci->dimms  = kcalloc(mci->tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
265	if (!mci->dimms)
266		return -ENOMEM;
267
268	memset(&pos, 0, sizeof(pos));
269	row = 0;
270	chn = 0;
271	for (idx = 0; idx < mci->tot_dimms; idx++) {
272		struct dimm_info *dimm;
273		struct rank_info *chan;
274		int n, len;
275
276		chan = mci->csrows[row]->channels[chn];
277
278		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
279		if (!dimm)
280			return -ENOMEM;
281		mci->dimms[idx] = dimm;
282		dimm->mci = mci;
283		dimm->idx = idx;
284
285		/*
286		 * Copy DIMM location and initialize it.
287		 */
288		len = sizeof(dimm->label);
289		p = dimm->label;
290		n = scnprintf(p, len, "mc#%u", mci->mc_idx);
291		p += n;
292		len -= n;
293		for (layer = 0; layer < mci->n_layers; layer++) {
294			n = scnprintf(p, len, "%s#%u",
295				      edac_layer_name[mci->layers[layer].type],
296				      pos[layer]);
297			p += n;
298			len -= n;
299			dimm->location[layer] = pos[layer];
300		}
301
302		/* Link it to the csrows old API data */
303		chan->dimm = dimm;
304		dimm->csrow = row;
305		dimm->cschannel = chn;
306
307		/* Increment csrow location */
308		if (mci->layers[0].is_virt_csrow) {
309			chn++;
310			if (chn == mci->num_cschannel) {
311				chn = 0;
312				row++;
313			}
314		} else {
315			row++;
316			if (row == mci->nr_csrows) {
317				row = 0;
318				chn++;
319			}
320		}
321
322		/* Increment dimm location */
323		for (layer = mci->n_layers - 1; layer >= 0; layer--) {
324			pos[layer]++;
325			if (pos[layer] < mci->layers[layer].size)
326				break;
327			pos[layer] = 0;
328		}
329	}
330
331	return 0;
332}
333
334struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
335				   unsigned int n_layers,
336				   struct edac_mc_layer *layers,
337				   unsigned int sz_pvt)
338{
339	struct mem_ctl_info *mci;
340	struct edac_mc_layer *layer;
341	unsigned int idx, tot_dimms = 1;
342	unsigned int tot_csrows = 1, tot_channels = 1;
343	bool per_rank = false;
344
345	if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
346		return NULL;
347
348	/*
349	 * Calculate the total amount of dimms and csrows/cschannels while
350	 * in the old API emulation mode
351	 */
352	for (idx = 0; idx < n_layers; idx++) {
353		tot_dimms *= layers[idx].size;
354
355		if (layers[idx].is_virt_csrow)
356			tot_csrows *= layers[idx].size;
357		else
358			tot_channels *= layers[idx].size;
359
360		if (layers[idx].type == EDAC_MC_LAYER_CHIP_SELECT)
361			per_rank = true;
362	}
363
364	mci = kzalloc(sizeof(struct mem_ctl_info), GFP_KERNEL);
365	if (!mci)
366		return NULL;
367
368	mci->layers = kcalloc(n_layers, sizeof(struct edac_mc_layer), GFP_KERNEL);
369	if (!mci->layers)
370		goto error;
371
372	mci->pvt_info = kzalloc(sz_pvt, GFP_KERNEL);
373	if (!mci->pvt_info)
374		goto error;
375
376	mci->dev.release = mci_release;
377	device_initialize(&mci->dev);
378
379	/* setup index and various internal pointers */
380	mci->mc_idx = mc_num;
381	mci->tot_dimms = tot_dimms;
382	mci->n_layers = n_layers;
383	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
384	mci->nr_csrows = tot_csrows;
385	mci->num_cschannel = tot_channels;
386	mci->csbased = per_rank;
387
388	if (edac_mc_alloc_csrows(mci))
389		goto error;
390
391	if (edac_mc_alloc_dimms(mci))
392		goto error;
393
394	mci->op_state = OP_ALLOC;
395
396	return mci;
397
398error:
399	_edac_mc_free(mci);
400
401	return NULL;
402}
403EXPORT_SYMBOL_GPL(edac_mc_alloc);
404
405void edac_mc_free(struct mem_ctl_info *mci)
406{
407	edac_dbg(1, "\n");
408
409	_edac_mc_free(mci);
410}
411EXPORT_SYMBOL_GPL(edac_mc_free);
412
413bool edac_has_mcs(void)
414{
415	bool ret;
416
417	mutex_lock(&mem_ctls_mutex);
418
419	ret = list_empty(&mc_devices);
420
421	mutex_unlock(&mem_ctls_mutex);
422
423	return !ret;
424}
425EXPORT_SYMBOL_GPL(edac_has_mcs);
426
427/* Caller must hold mem_ctls_mutex */
428static struct mem_ctl_info *__find_mci_by_dev(struct device *dev)
429{
430	struct mem_ctl_info *mci;
431	struct list_head *item;
432
433	edac_dbg(3, "\n");
434
435	list_for_each(item, &mc_devices) {
436		mci = list_entry(item, struct mem_ctl_info, link);
437
438		if (mci->pdev == dev)
439			return mci;
440	}
441
442	return NULL;
443}
444
445/**
446 * find_mci_by_dev
447 *
448 *	scan list of controllers looking for the one that manages
449 *	the 'dev' device
450 * @dev: pointer to a struct device related with the MCI
451 */
452struct mem_ctl_info *find_mci_by_dev(struct device *dev)
453{
454	struct mem_ctl_info *ret;
455
456	mutex_lock(&mem_ctls_mutex);
457	ret = __find_mci_by_dev(dev);
458	mutex_unlock(&mem_ctls_mutex);
459
460	return ret;
461}
462EXPORT_SYMBOL_GPL(find_mci_by_dev);
463
464/*
465 * edac_mc_workq_function
466 *	performs the operation scheduled by a workq request
467 */
468static void edac_mc_workq_function(struct work_struct *work_req)
469{
470	struct delayed_work *d_work = to_delayed_work(work_req);
471	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
472
473	mutex_lock(&mem_ctls_mutex);
474
475	if (mci->op_state != OP_RUNNING_POLL) {
476		mutex_unlock(&mem_ctls_mutex);
477		return;
478	}
479
480	if (edac_op_state == EDAC_OPSTATE_POLL)
481		mci->edac_check(mci);
482
483	mutex_unlock(&mem_ctls_mutex);
484
485	/* Queue ourselves again. */
486	edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));
487}
488
489/*
490 * edac_mc_reset_delay_period(unsigned long value)
491 *
492 *	user space has updated our poll period value, need to
493 *	reset our workq delays
494 */
495void edac_mc_reset_delay_period(unsigned long value)
496{
497	struct mem_ctl_info *mci;
498	struct list_head *item;
499
500	mutex_lock(&mem_ctls_mutex);
501
502	list_for_each(item, &mc_devices) {
503		mci = list_entry(item, struct mem_ctl_info, link);
504
505		if (mci->op_state == OP_RUNNING_POLL)
506			edac_mod_work(&mci->work, value);
507	}
508	mutex_unlock(&mem_ctls_mutex);
509}
510
511
512
513/* Return 0 on success, 1 on failure.
514 * Before calling this function, caller must
515 * assign a unique value to mci->mc_idx.
516 *
517 *	locking model:
518 *
519 *		called with the mem_ctls_mutex lock held
520 */
521static int add_mc_to_global_list(struct mem_ctl_info *mci)
522{
523	struct list_head *item, *insert_before;
524	struct mem_ctl_info *p;
525
526	insert_before = &mc_devices;
527
528	p = __find_mci_by_dev(mci->pdev);
529	if (unlikely(p != NULL))
530		goto fail0;
531
532	list_for_each(item, &mc_devices) {
533		p = list_entry(item, struct mem_ctl_info, link);
534
535		if (p->mc_idx >= mci->mc_idx) {
536			if (unlikely(p->mc_idx == mci->mc_idx))
537				goto fail1;
538
539			insert_before = item;
540			break;
541		}
542	}
543
544	list_add_tail_rcu(&mci->link, insert_before);
545	return 0;
546
547fail0:
548	edac_printk(KERN_WARNING, EDAC_MC,
549		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
550		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
551	return 1;
552
553fail1:
554	edac_printk(KERN_WARNING, EDAC_MC,
555		"bug in low-level driver: attempt to assign\n"
556		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
557	return 1;
558}
559
560static int del_mc_from_global_list(struct mem_ctl_info *mci)
561{
562	list_del_rcu(&mci->link);
563
564	/* these are for safe removal of devices from global list while
565	 * NMI handlers may be traversing list
566	 */
567	synchronize_rcu();
568	INIT_LIST_HEAD(&mci->link);
569
570	return list_empty(&mc_devices);
571}
572
573struct mem_ctl_info *edac_mc_find(int idx)
574{
575	struct mem_ctl_info *mci;
576	struct list_head *item;
577
578	mutex_lock(&mem_ctls_mutex);
579
580	list_for_each(item, &mc_devices) {
581		mci = list_entry(item, struct mem_ctl_info, link);
582		if (mci->mc_idx == idx)
583			goto unlock;
584	}
585
586	mci = NULL;
587unlock:
588	mutex_unlock(&mem_ctls_mutex);
589	return mci;
590}
591EXPORT_SYMBOL(edac_mc_find);
592
593const char *edac_get_owner(void)
594{
595	return edac_mc_owner;
596}
597EXPORT_SYMBOL_GPL(edac_get_owner);
598
599/* FIXME - should a warning be printed if no error detection? correction? */
600int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
601			       const struct attribute_group **groups)
602{
603	int ret = -EINVAL;
604	edac_dbg(0, "\n");
605
606#ifdef CONFIG_EDAC_DEBUG
607	if (edac_debug_level >= 3)
608		edac_mc_dump_mci(mci);
609
610	if (edac_debug_level >= 4) {
611		struct dimm_info *dimm;
612		int i;
613
614		for (i = 0; i < mci->nr_csrows; i++) {
615			struct csrow_info *csrow = mci->csrows[i];
616			u32 nr_pages = 0;
617			int j;
618
619			for (j = 0; j < csrow->nr_channels; j++)
620				nr_pages += csrow->channels[j]->dimm->nr_pages;
621			if (!nr_pages)
622				continue;
623			edac_mc_dump_csrow(csrow);
624			for (j = 0; j < csrow->nr_channels; j++)
625				if (csrow->channels[j]->dimm->nr_pages)
626					edac_mc_dump_channel(csrow->channels[j]);
627		}
628
629		mci_for_each_dimm(mci, dimm)
630			edac_mc_dump_dimm(dimm);
631	}
632#endif
633	mutex_lock(&mem_ctls_mutex);
634
635	if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
636		ret = -EPERM;
637		goto fail0;
638	}
639
640	if (add_mc_to_global_list(mci))
641		goto fail0;
642
643	/* set load time so that error rate can be tracked */
644	mci->start_time = jiffies;
645
646	mci->bus = edac_get_sysfs_subsys();
647
648	if (edac_create_sysfs_mci_device(mci, groups)) {
649		edac_mc_printk(mci, KERN_WARNING,
650			"failed to create sysfs device\n");
651		goto fail1;
652	}
653
654	if (mci->edac_check) {
655		mci->op_state = OP_RUNNING_POLL;
656
657		INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
658		edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));
659
660	} else {
661		mci->op_state = OP_RUNNING_INTERRUPT;
662	}
663
664	/* Report action taken */
665	edac_mc_printk(mci, KERN_INFO,
666		"Giving out device to module %s controller %s: DEV %s (%s)\n",
667		mci->mod_name, mci->ctl_name, mci->dev_name,
668		edac_op_state_to_string(mci->op_state));
669
670	edac_mc_owner = mci->mod_name;
671
672	mutex_unlock(&mem_ctls_mutex);
673	return 0;
674
675fail1:
676	del_mc_from_global_list(mci);
677
678fail0:
679	mutex_unlock(&mem_ctls_mutex);
680	return ret;
681}
682EXPORT_SYMBOL_GPL(edac_mc_add_mc_with_groups);
683
684struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
685{
686	struct mem_ctl_info *mci;
687
688	edac_dbg(0, "\n");
689
690	mutex_lock(&mem_ctls_mutex);
691
692	/* find the requested mci struct in the global list */
693	mci = __find_mci_by_dev(dev);
694	if (mci == NULL) {
695		mutex_unlock(&mem_ctls_mutex);
696		return NULL;
697	}
698
699	/* mark MCI offline: */
700	mci->op_state = OP_OFFLINE;
701
702	if (del_mc_from_global_list(mci))
703		edac_mc_owner = NULL;
704
705	mutex_unlock(&mem_ctls_mutex);
706
707	if (mci->edac_check)
708		edac_stop_work(&mci->work);
709
710	/* remove from sysfs */
711	edac_remove_sysfs_mci_device(mci);
712
713	edac_printk(KERN_INFO, EDAC_MC,
714		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
715		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
716
717	return mci;
718}
719EXPORT_SYMBOL_GPL(edac_mc_del_mc);
720
721static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
722				u32 size)
723{
724	struct page *pg;
725	void *virt_addr;
726	unsigned long flags = 0;
727
728	edac_dbg(3, "\n");
729
730	/* ECC error page was not in our memory. Ignore it. */
731	if (!pfn_valid(page))
732		return;
733
734	/* Find the actual page structure then map it and fix */
735	pg = pfn_to_page(page);
736
737	if (PageHighMem(pg))
738		local_irq_save(flags);
739
740	virt_addr = kmap_atomic(pg);
741
742	/* Perform architecture specific atomic scrub operation */
743	edac_atomic_scrub(virt_addr + offset, size);
744
745	/* Unmap and complete */
746	kunmap_atomic(virt_addr);
747
748	if (PageHighMem(pg))
749		local_irq_restore(flags);
750}
751
752/* FIXME - should return -1 */
753int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
754{
755	struct csrow_info **csrows = mci->csrows;
756	int row, i, j, n;
757
758	edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
759	row = -1;
760
761	for (i = 0; i < mci->nr_csrows; i++) {
762		struct csrow_info *csrow = csrows[i];
763		n = 0;
764		for (j = 0; j < csrow->nr_channels; j++) {
765			struct dimm_info *dimm = csrow->channels[j]->dimm;
766			n += dimm->nr_pages;
767		}
768		if (n == 0)
769			continue;
770
771		edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
772			 mci->mc_idx,
773			 csrow->first_page, page, csrow->last_page,
774			 csrow->page_mask);
775
776		if ((page >= csrow->first_page) &&
777		    (page <= csrow->last_page) &&
778		    ((page & csrow->page_mask) ==
779		     (csrow->first_page & csrow->page_mask))) {
780			row = i;
781			break;
782		}
783	}
784
785	if (row == -1)
786		edac_mc_printk(mci, KERN_ERR,
787			"could not look up page error address %lx\n",
788			(unsigned long)page);
789
790	return row;
791}
792EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
793
794const char *edac_layer_name[] = {
795	[EDAC_MC_LAYER_BRANCH] = "branch",
796	[EDAC_MC_LAYER_CHANNEL] = "channel",
797	[EDAC_MC_LAYER_SLOT] = "slot",
798	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
799	[EDAC_MC_LAYER_ALL_MEM] = "memory",
800};
801EXPORT_SYMBOL_GPL(edac_layer_name);
802
803static void edac_inc_ce_error(struct edac_raw_error_desc *e)
804{
805	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
806	struct mem_ctl_info *mci = error_desc_to_mci(e);
807	struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
808
809	mci->ce_mc += e->error_count;
810
811	if (dimm)
812		dimm->ce_count += e->error_count;
813	else
814		mci->ce_noinfo_count += e->error_count;
815}
816
817static void edac_inc_ue_error(struct edac_raw_error_desc *e)
818{
819	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
820	struct mem_ctl_info *mci = error_desc_to_mci(e);
821	struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
822
823	mci->ue_mc += e->error_count;
824
825	if (dimm)
826		dimm->ue_count += e->error_count;
827	else
828		mci->ue_noinfo_count += e->error_count;
829}
830
831static void edac_ce_error(struct edac_raw_error_desc *e)
832{
833	struct mem_ctl_info *mci = error_desc_to_mci(e);
834	unsigned long remapped_page;
835
836	if (edac_mc_get_log_ce()) {
837		edac_mc_printk(mci, KERN_WARNING,
838			"%d CE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx%s%s)\n",
839			e->error_count, e->msg,
840			*e->msg ? " " : "",
841			e->label, e->location, e->page_frame_number, e->offset_in_page,
842			e->grain, e->syndrome,
843			*e->other_detail ? " - " : "",
844			e->other_detail);
845	}
846
847	edac_inc_ce_error(e);
848
849	if (mci->scrub_mode == SCRUB_SW_SRC) {
850		/*
851			* Some memory controllers (called MCs below) can remap
852			* memory so that it is still available at a different
853			* address when PCI devices map into memory.
854			* MC's that can't do this, lose the memory where PCI
855			* devices are mapped. This mapping is MC-dependent
856			* and so we call back into the MC driver for it to
857			* map the MC page to a physical (CPU) page which can
858			* then be mapped to a virtual page - which can then
859			* be scrubbed.
860			*/
861		remapped_page = mci->ctl_page_to_phys ?
862			mci->ctl_page_to_phys(mci, e->page_frame_number) :
863			e->page_frame_number;
864
865		edac_mc_scrub_block(remapped_page, e->offset_in_page, e->grain);
866	}
867}
868
869static void edac_ue_error(struct edac_raw_error_desc *e)
870{
871	struct mem_ctl_info *mci = error_desc_to_mci(e);
872
873	if (edac_mc_get_log_ue()) {
874		edac_mc_printk(mci, KERN_WARNING,
875			"%d UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
876			e->error_count, e->msg,
877			*e->msg ? " " : "",
878			e->label, e->location, e->page_frame_number, e->offset_in_page,
879			e->grain,
880			*e->other_detail ? " - " : "",
881			e->other_detail);
882	}
883
884	edac_inc_ue_error(e);
885
886	if (edac_mc_get_panic_on_ue()) {
887		panic("UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
888			e->msg,
889			*e->msg ? " " : "",
890			e->label, e->location, e->page_frame_number, e->offset_in_page,
891			e->grain,
892			*e->other_detail ? " - " : "",
893			e->other_detail);
894	}
895}
896
897static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan)
898{
899	struct mem_ctl_info *mci = error_desc_to_mci(e);
900	enum hw_event_mc_err_type type = e->type;
901	u16 count = e->error_count;
902
903	if (row < 0)
904		return;
905
906	edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
907
908	if (type == HW_EVENT_ERR_CORRECTED) {
909		mci->csrows[row]->ce_count += count;
910		if (chan >= 0)
911			mci->csrows[row]->channels[chan]->ce_count += count;
912	} else {
913		mci->csrows[row]->ue_count += count;
914	}
915}
916
917void edac_raw_mc_handle_error(struct edac_raw_error_desc *e)
918{
919	struct mem_ctl_info *mci = error_desc_to_mci(e);
920	u8 grain_bits;
921
922	/* Sanity-check driver-supplied grain value. */
923	if (WARN_ON_ONCE(!e->grain))
924		e->grain = 1;
925
926	grain_bits = fls_long(e->grain - 1);
927
928	/* Report the error via the trace interface */
929	if (IS_ENABLED(CONFIG_RAS))
930		trace_mc_event(e->type, e->msg, e->label, e->error_count,
931			       mci->mc_idx, e->top_layer, e->mid_layer,
932			       e->low_layer,
933			       (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
934			       grain_bits, e->syndrome, e->other_detail);
935
936	if (e->type == HW_EVENT_ERR_CORRECTED)
937		edac_ce_error(e);
938	else
939		edac_ue_error(e);
940}
941EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
942
943void edac_mc_handle_error(const enum hw_event_mc_err_type type,
944			  struct mem_ctl_info *mci,
945			  const u16 error_count,
946			  const unsigned long page_frame_number,
947			  const unsigned long offset_in_page,
948			  const unsigned long syndrome,
949			  const int top_layer,
950			  const int mid_layer,
951			  const int low_layer,
952			  const char *msg,
953			  const char *other_detail)
954{
955	struct dimm_info *dimm;
956	char *p, *end;
957	int row = -1, chan = -1;
958	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
959	int i, n_labels = 0;
960	struct edac_raw_error_desc *e = &mci->error_desc;
961	bool any_memory = true;
962	const char *prefix;
963
964	edac_dbg(3, "MC%d\n", mci->mc_idx);
965
966	/* Fills the error report buffer */
967	memset(e, 0, sizeof (*e));
968	e->error_count = error_count;
969	e->type = type;
970	e->top_layer = top_layer;
971	e->mid_layer = mid_layer;
972	e->low_layer = low_layer;
973	e->page_frame_number = page_frame_number;
974	e->offset_in_page = offset_in_page;
975	e->syndrome = syndrome;
976	/* need valid strings here for both: */
977	e->msg = msg ?: "";
978	e->other_detail = other_detail ?: "";
979
980	/*
981	 * Check if the event report is consistent and if the memory location is
982	 * known. If it is, the DIMM(s) label info will be filled and the DIMM's
983	 * error counters will be incremented.
984	 */
985	for (i = 0; i < mci->n_layers; i++) {
986		if (pos[i] >= (int)mci->layers[i].size) {
987
988			edac_mc_printk(mci, KERN_ERR,
989				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
990				       edac_layer_name[mci->layers[i].type],
991				       pos[i], mci->layers[i].size);
992			/*
993			 * Instead of just returning it, let's use what's
994			 * known about the error. The increment routines and
995			 * the DIMM filter logic will do the right thing by
996			 * pointing the likely damaged DIMMs.
997			 */
998			pos[i] = -1;
999		}
1000		if (pos[i] >= 0)
1001			any_memory = false;
1002	}
1003
1004	/*
1005	 * Get the dimm label/grain that applies to the match criteria.
1006	 * As the error algorithm may not be able to point to just one memory
1007	 * stick, the logic here will get all possible labels that could
1008	 * pottentially be affected by the error.
1009	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1010	 * to have only the MC channel and the MC dimm (also called "branch")
1011	 * but the channel is not known, as the memory is arranged in pairs,
1012	 * where each memory belongs to a separate channel within the same
1013	 * branch.
1014	 */
1015	p = e->label;
1016	*p = '\0';
1017	end = p + sizeof(e->label);
1018	prefix = "";
1019
1020	mci_for_each_dimm(mci, dimm) {
1021		if (top_layer >= 0 && top_layer != dimm->location[0])
1022			continue;
1023		if (mid_layer >= 0 && mid_layer != dimm->location[1])
1024			continue;
1025		if (low_layer >= 0 && low_layer != dimm->location[2])
1026			continue;
1027
1028		/* get the max grain, over the error match range */
1029		if (dimm->grain > e->grain)
1030			e->grain = dimm->grain;
1031
1032		/*
1033		 * If the error is memory-controller wide, there's no need to
1034		 * seek for the affected DIMMs because the whole channel/memory
1035		 * controller/... may be affected. Also, don't show errors for
1036		 * empty DIMM slots.
1037		 */
1038		if (!dimm->nr_pages)
1039			continue;
1040
1041		n_labels++;
1042		if (n_labels > EDAC_MAX_LABELS) {
1043			p = e->label;
1044			*p = '\0';
1045		} else {
1046			p += scnprintf(p, end - p, "%s%s", prefix, dimm->label);
1047			prefix = OTHER_LABEL;
1048		}
1049
1050		/*
1051		 * get csrow/channel of the DIMM, in order to allow
1052		 * incrementing the compat API counters
1053		 */
1054		edac_dbg(4, "%s csrows map: (%d,%d)\n",
1055			mci->csbased ? "rank" : "dimm",
1056			dimm->csrow, dimm->cschannel);
1057		if (row == -1)
1058			row = dimm->csrow;
1059		else if (row >= 0 && row != dimm->csrow)
1060			row = -2;
1061
1062		if (chan == -1)
1063			chan = dimm->cschannel;
1064		else if (chan >= 0 && chan != dimm->cschannel)
1065			chan = -2;
1066	}
1067
1068	if (any_memory)
1069		strscpy(e->label, "any memory", sizeof(e->label));
1070	else if (!*e->label)
1071		strscpy(e->label, "unknown memory", sizeof(e->label));
1072
1073	edac_inc_csrow(e, row, chan);
1074
1075	/* Fill the RAM location data */
1076	p = e->location;
1077	end = p + sizeof(e->location);
1078	prefix = "";
1079
1080	for (i = 0; i < mci->n_layers; i++) {
1081		if (pos[i] < 0)
1082			continue;
1083
1084		p += scnprintf(p, end - p, "%s%s:%d", prefix,
1085			       edac_layer_name[mci->layers[i].type], pos[i]);
1086		prefix = " ";
1087	}
1088
1089	edac_raw_mc_handle_error(e);
1090}
1091EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1092