1/*
2 * Intel e7xxx Memory Controller kernel module
3 * (C) 2003 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * See "enum e7xxx_chips" below for supported chipsets
8 *
9 * Written by Thayne Harbaugh
10 * Based on work by Dan Hollis <goemon at anime dot net> and others.
11 *	http://www.anime.net/~goemon/linux-ecc/
12 *
13 * Datasheet:
14 *	http://www.intel.com/content/www/us/en/chipsets/e7501-chipset-memory-controller-hub-datasheet.html
15 *
16 * Contributors:
17 *	Eric Biederman (Linux Networx)
18 *	Tom Zimmerman (Linux Networx)
19 *	Jim Garlick (Lawrence Livermore National Labs)
20 *	Dave Peterson (Lawrence Livermore National Labs)
21 *	That One Guy (Some other place)
22 *	Wang Zhenyu (intel.com)
23 *
24 * $Id: edac_e7xxx.c,v 1.5.2.9 2005/10/05 00:43:44 dsp_llnl Exp $
25 *
26 */
27
28#include <linux/module.h>
29#include <linux/init.h>
30#include <linux/pci.h>
31#include <linux/pci_ids.h>
32#include <linux/edac.h>
33#include "edac_module.h"
34
35#define	EDAC_MOD_STR	"e7xxx_edac"
36
37#define e7xxx_printk(level, fmt, arg...) \
38	edac_printk(level, "e7xxx", fmt, ##arg)
39
40#define e7xxx_mc_printk(mci, level, fmt, arg...) \
41	edac_mc_chipset_printk(mci, level, "e7xxx", fmt, ##arg)
42
43#ifndef PCI_DEVICE_ID_INTEL_7205_0
44#define PCI_DEVICE_ID_INTEL_7205_0	0x255d
45#endif				/* PCI_DEVICE_ID_INTEL_7205_0 */
46
47#ifndef PCI_DEVICE_ID_INTEL_7205_1_ERR
48#define PCI_DEVICE_ID_INTEL_7205_1_ERR	0x2551
49#endif				/* PCI_DEVICE_ID_INTEL_7205_1_ERR */
50
51#ifndef PCI_DEVICE_ID_INTEL_7500_0
52#define PCI_DEVICE_ID_INTEL_7500_0	0x2540
53#endif				/* PCI_DEVICE_ID_INTEL_7500_0 */
54
55#ifndef PCI_DEVICE_ID_INTEL_7500_1_ERR
56#define PCI_DEVICE_ID_INTEL_7500_1_ERR	0x2541
57#endif				/* PCI_DEVICE_ID_INTEL_7500_1_ERR */
58
59#ifndef PCI_DEVICE_ID_INTEL_7501_0
60#define PCI_DEVICE_ID_INTEL_7501_0	0x254c
61#endif				/* PCI_DEVICE_ID_INTEL_7501_0 */
62
63#ifndef PCI_DEVICE_ID_INTEL_7501_1_ERR
64#define PCI_DEVICE_ID_INTEL_7501_1_ERR	0x2541
65#endif				/* PCI_DEVICE_ID_INTEL_7501_1_ERR */
66
67#ifndef PCI_DEVICE_ID_INTEL_7505_0
68#define PCI_DEVICE_ID_INTEL_7505_0	0x2550
69#endif				/* PCI_DEVICE_ID_INTEL_7505_0 */
70
71#ifndef PCI_DEVICE_ID_INTEL_7505_1_ERR
72#define PCI_DEVICE_ID_INTEL_7505_1_ERR	0x2551
73#endif				/* PCI_DEVICE_ID_INTEL_7505_1_ERR */
74
75#define E7XXX_NR_CSROWS		8	/* number of csrows */
76#define E7XXX_NR_DIMMS		8	/* 2 channels, 4 dimms/channel */
77
78/* E7XXX register addresses - device 0 function 0 */
79#define E7XXX_DRB		0x60	/* DRAM row boundary register (8b) */
80#define E7XXX_DRA		0x70	/* DRAM row attribute register (8b) */
81					/*
82					 * 31   Device width row 7 0=x8 1=x4
83					 * 27   Device width row 6
84					 * 23   Device width row 5
85					 * 19   Device width row 4
86					 * 15   Device width row 3
87					 * 11   Device width row 2
88					 *  7   Device width row 1
89					 *  3   Device width row 0
90					 */
91#define E7XXX_DRC		0x7C	/* DRAM controller mode reg (32b) */
92					/*
93					 * 22    Number channels 0=1,1=2
94					 * 19:18 DRB Granularity 32/64MB
95					 */
96#define E7XXX_TOLM		0xC4	/* DRAM top of low memory reg (16b) */
97#define E7XXX_REMAPBASE		0xC6	/* DRAM remap base address reg (16b) */
98#define E7XXX_REMAPLIMIT	0xC8	/* DRAM remap limit address reg (16b) */
99
100/* E7XXX register addresses - device 0 function 1 */
101#define E7XXX_DRAM_FERR		0x80	/* DRAM first error register (8b) */
102#define E7XXX_DRAM_NERR		0x82	/* DRAM next error register (8b) */
103#define E7XXX_DRAM_CELOG_ADD	0xA0	/* DRAM first correctable memory */
104					/*     error address register (32b) */
105					/*
106					 * 31:28 Reserved
107					 * 27:6  CE address (4k block 33:12)
108					 *  5:0  Reserved
109					 */
110#define E7XXX_DRAM_UELOG_ADD	0xB0	/* DRAM first uncorrectable memory */
111					/*     error address register (32b) */
112					/*
113					 * 31:28 Reserved
114					 * 27:6  CE address (4k block 33:12)
115					 *  5:0  Reserved
116					 */
117#define E7XXX_DRAM_CELOG_SYNDROME 0xD0	/* DRAM first correctable memory */
118					/*     error syndrome register (16b) */
119
120enum e7xxx_chips {
121	E7500 = 0,
122	E7501,
123	E7505,
124	E7205,
125};
126
127struct e7xxx_pvt {
128	struct pci_dev *bridge_ck;
129	u32 tolm;
130	u32 remapbase;
131	u32 remaplimit;
132	const struct e7xxx_dev_info *dev_info;
133};
134
135struct e7xxx_dev_info {
136	u16 err_dev;
137	const char *ctl_name;
138};
139
140struct e7xxx_error_info {
141	u8 dram_ferr;
142	u8 dram_nerr;
143	u32 dram_celog_add;
144	u16 dram_celog_syndrome;
145	u32 dram_uelog_add;
146};
147
148static struct edac_pci_ctl_info *e7xxx_pci;
149
150static const struct e7xxx_dev_info e7xxx_devs[] = {
151	[E7500] = {
152		.err_dev = PCI_DEVICE_ID_INTEL_7500_1_ERR,
153		.ctl_name = "E7500"},
154	[E7501] = {
155		.err_dev = PCI_DEVICE_ID_INTEL_7501_1_ERR,
156		.ctl_name = "E7501"},
157	[E7505] = {
158		.err_dev = PCI_DEVICE_ID_INTEL_7505_1_ERR,
159		.ctl_name = "E7505"},
160	[E7205] = {
161		.err_dev = PCI_DEVICE_ID_INTEL_7205_1_ERR,
162		.ctl_name = "E7205"},
163};
164
165/* FIXME - is this valid for both SECDED and S4ECD4ED? */
166static inline int e7xxx_find_channel(u16 syndrome)
167{
168	edac_dbg(3, "\n");
169
170	if ((syndrome & 0xff00) == 0)
171		return 0;
172
173	if ((syndrome & 0x00ff) == 0)
174		return 1;
175
176	if ((syndrome & 0xf000) == 0 || (syndrome & 0x0f00) == 0)
177		return 0;
178
179	return 1;
180}
181
182static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci,
183				unsigned long page)
184{
185	u32 remap;
186	struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info;
187
188	edac_dbg(3, "\n");
189
190	if ((page < pvt->tolm) ||
191		((page >= 0x100000) && (page < pvt->remapbase)))
192		return page;
193
194	remap = (page - pvt->tolm) + pvt->remapbase;
195
196	if (remap < pvt->remaplimit)
197		return remap;
198
199	e7xxx_printk(KERN_ERR, "Invalid page %lx - out of range\n", page);
200	return pvt->tolm - 1;
201}
202
203static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
204{
205	u32 error_1b, page;
206	u16 syndrome;
207	int row;
208	int channel;
209
210	edac_dbg(3, "\n");
211	/* read the error address */
212	error_1b = info->dram_celog_add;
213	/* FIXME - should use PAGE_SHIFT */
214	page = error_1b >> 6;	/* convert the address to 4k page */
215	/* read the syndrome */
216	syndrome = info->dram_celog_syndrome;
217	/* FIXME - check for -1 */
218	row = edac_mc_find_csrow_by_page(mci, page);
219	/* convert syndrome to channel */
220	channel = e7xxx_find_channel(syndrome);
221	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, page, 0, syndrome,
222			     row, channel, -1, "e7xxx CE", "");
223}
224
225static void process_ce_no_info(struct mem_ctl_info *mci)
226{
227	edac_dbg(3, "\n");
228	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
229			     "e7xxx CE log register overflow", "");
230}
231
232static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
233{
234	u32 error_2b, block_page;
235	int row;
236
237	edac_dbg(3, "\n");
238	/* read the error address */
239	error_2b = info->dram_uelog_add;
240	/* FIXME - should use PAGE_SHIFT */
241	block_page = error_2b >> 6;	/* convert to 4k address */
242	row = edac_mc_find_csrow_by_page(mci, block_page);
243
244	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, block_page, 0, 0,
245			     row, -1, -1, "e7xxx UE", "");
246}
247
248static void process_ue_no_info(struct mem_ctl_info *mci)
249{
250	edac_dbg(3, "\n");
251
252	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
253			     "e7xxx UE log register overflow", "");
254}
255
256static void e7xxx_get_error_info(struct mem_ctl_info *mci,
257				 struct e7xxx_error_info *info)
258{
259	struct e7xxx_pvt *pvt;
260
261	pvt = (struct e7xxx_pvt *)mci->pvt_info;
262	pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_FERR, &info->dram_ferr);
263	pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_NERR, &info->dram_nerr);
264
265	if ((info->dram_ferr & 1) || (info->dram_nerr & 1)) {
266		pci_read_config_dword(pvt->bridge_ck, E7XXX_DRAM_CELOG_ADD,
267				&info->dram_celog_add);
268		pci_read_config_word(pvt->bridge_ck,
269				E7XXX_DRAM_CELOG_SYNDROME,
270				&info->dram_celog_syndrome);
271	}
272
273	if ((info->dram_ferr & 2) || (info->dram_nerr & 2))
274		pci_read_config_dword(pvt->bridge_ck, E7XXX_DRAM_UELOG_ADD,
275				&info->dram_uelog_add);
276
277	if (info->dram_ferr & 3)
278		pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_FERR, 0x03, 0x03);
279
280	if (info->dram_nerr & 3)
281		pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_NERR, 0x03, 0x03);
282}
283
284static int e7xxx_process_error_info(struct mem_ctl_info *mci,
285				struct e7xxx_error_info *info,
286				int handle_errors)
287{
288	int error_found;
289
290	error_found = 0;
291
292	/* decode and report errors */
293	if (info->dram_ferr & 1) {	/* check first error correctable */
294		error_found = 1;
295
296		if (handle_errors)
297			process_ce(mci, info);
298	}
299
300	if (info->dram_ferr & 2) {	/* check first error uncorrectable */
301		error_found = 1;
302
303		if (handle_errors)
304			process_ue(mci, info);
305	}
306
307	if (info->dram_nerr & 1) {	/* check next error correctable */
308		error_found = 1;
309
310		if (handle_errors) {
311			if (info->dram_ferr & 1)
312				process_ce_no_info(mci);
313			else
314				process_ce(mci, info);
315		}
316	}
317
318	if (info->dram_nerr & 2) {	/* check next error uncorrectable */
319		error_found = 1;
320
321		if (handle_errors) {
322			if (info->dram_ferr & 2)
323				process_ue_no_info(mci);
324			else
325				process_ue(mci, info);
326		}
327	}
328
329	return error_found;
330}
331
332static void e7xxx_check(struct mem_ctl_info *mci)
333{
334	struct e7xxx_error_info info;
335
336	e7xxx_get_error_info(mci, &info);
337	e7xxx_process_error_info(mci, &info, 1);
338}
339
340/* Return 1 if dual channel mode is active.  Else return 0. */
341static inline int dual_channel_active(u32 drc, int dev_idx)
342{
343	return (dev_idx == E7501) ? ((drc >> 22) & 0x1) : 1;
344}
345
346/* Return DRB granularity (0=32mb, 1=64mb). */
347static inline int drb_granularity(u32 drc, int dev_idx)
348{
349	/* only e7501 can be single channel */
350	return (dev_idx == E7501) ? ((drc >> 18) & 0x3) : 1;
351}
352
353static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
354			int dev_idx, u32 drc)
355{
356	unsigned long last_cumul_size;
357	int index, j;
358	u8 value;
359	u32 dra, cumul_size, nr_pages;
360	int drc_chan, drc_drbg, drc_ddim, mem_dev;
361	struct csrow_info *csrow;
362	struct dimm_info *dimm;
363	enum edac_type edac_mode;
364
365	pci_read_config_dword(pdev, E7XXX_DRA, &dra);
366	drc_chan = dual_channel_active(drc, dev_idx);
367	drc_drbg = drb_granularity(drc, dev_idx);
368	drc_ddim = (drc >> 20) & 0x3;
369	last_cumul_size = 0;
370
371	/* The dram row boundary (DRB) reg values are boundary address
372	 * for each DRAM row with a granularity of 32 or 64MB (single/dual
373	 * channel operation).  DRB regs are cumulative; therefore DRB7 will
374	 * contain the total memory contained in all eight rows.
375	 */
376	for (index = 0; index < mci->nr_csrows; index++) {
377		/* mem_dev 0=x8, 1=x4 */
378		mem_dev = (dra >> (index * 4 + 3)) & 0x1;
379		csrow = mci->csrows[index];
380
381		pci_read_config_byte(pdev, E7XXX_DRB + index, &value);
382		/* convert a 64 or 32 MiB DRB to a page size. */
383		cumul_size = value << (25 + drc_drbg - PAGE_SHIFT);
384		edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
385		if (cumul_size == last_cumul_size)
386			continue;	/* not populated */
387
388		csrow->first_page = last_cumul_size;
389		csrow->last_page = cumul_size - 1;
390		nr_pages = cumul_size - last_cumul_size;
391		last_cumul_size = cumul_size;
392
393		/*
394		* if single channel or x8 devices then SECDED
395		* if dual channel and x4 then S4ECD4ED
396		*/
397		if (drc_ddim) {
398			if (drc_chan && mem_dev) {
399				edac_mode = EDAC_S4ECD4ED;
400				mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
401			} else {
402				edac_mode = EDAC_SECDED;
403				mci->edac_cap |= EDAC_FLAG_SECDED;
404			}
405		} else
406			edac_mode = EDAC_NONE;
407
408		for (j = 0; j < drc_chan + 1; j++) {
409			dimm = csrow->channels[j]->dimm;
410
411			dimm->nr_pages = nr_pages / (drc_chan + 1);
412			dimm->grain = 1 << 12;	/* 4KiB - resolution of CELOG */
413			dimm->mtype = MEM_RDDR;	/* only one type supported */
414			dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;
415			dimm->edac_mode = edac_mode;
416		}
417	}
418}
419
420static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx)
421{
422	u16 pci_data;
423	struct mem_ctl_info *mci = NULL;
424	struct edac_mc_layer layers[2];
425	struct e7xxx_pvt *pvt = NULL;
426	u32 drc;
427	int drc_chan;
428	struct e7xxx_error_info discard;
429
430	edac_dbg(0, "mci\n");
431
432	pci_read_config_dword(pdev, E7XXX_DRC, &drc);
433
434	drc_chan = dual_channel_active(drc, dev_idx);
435	/*
436	 * According with the datasheet, this device has a maximum of
437	 * 4 DIMMS per channel, either single-rank or dual-rank. So, the
438	 * total amount of dimms is 8 (E7XXX_NR_DIMMS).
439	 * That means that the DIMM is mapped as CSROWs, and the channel
440	 * will map the rank. So, an error to either channel should be
441	 * attributed to the same dimm.
442	 */
443	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
444	layers[0].size = E7XXX_NR_CSROWS;
445	layers[0].is_virt_csrow = true;
446	layers[1].type = EDAC_MC_LAYER_CHANNEL;
447	layers[1].size = drc_chan + 1;
448	layers[1].is_virt_csrow = false;
449	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
450	if (mci == NULL)
451		return -ENOMEM;
452
453	edac_dbg(3, "init mci\n");
454	mci->mtype_cap = MEM_FLAG_RDDR;
455	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED |
456		EDAC_FLAG_S4ECD4ED;
457	/* FIXME - what if different memory types are in different csrows? */
458	mci->mod_name = EDAC_MOD_STR;
459	mci->pdev = &pdev->dev;
460	edac_dbg(3, "init pvt\n");
461	pvt = (struct e7xxx_pvt *)mci->pvt_info;
462	pvt->dev_info = &e7xxx_devs[dev_idx];
463	pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL,
464					pvt->dev_info->err_dev, pvt->bridge_ck);
465
466	if (!pvt->bridge_ck) {
467		e7xxx_printk(KERN_ERR, "error reporting device not found:"
468			"vendor %x device 0x%x (broken BIOS?)\n",
469			PCI_VENDOR_ID_INTEL, e7xxx_devs[dev_idx].err_dev);
470		goto fail0;
471	}
472
473	edac_dbg(3, "more mci init\n");
474	mci->ctl_name = pvt->dev_info->ctl_name;
475	mci->dev_name = pci_name(pdev);
476	mci->edac_check = e7xxx_check;
477	mci->ctl_page_to_phys = ctl_page_to_phys;
478	e7xxx_init_csrows(mci, pdev, dev_idx, drc);
479	mci->edac_cap |= EDAC_FLAG_NONE;
480	edac_dbg(3, "tolm, remapbase, remaplimit\n");
481	/* load the top of low memory, remap base, and remap limit vars */
482	pci_read_config_word(pdev, E7XXX_TOLM, &pci_data);
483	pvt->tolm = ((u32) pci_data) << 4;
484	pci_read_config_word(pdev, E7XXX_REMAPBASE, &pci_data);
485	pvt->remapbase = ((u32) pci_data) << 14;
486	pci_read_config_word(pdev, E7XXX_REMAPLIMIT, &pci_data);
487	pvt->remaplimit = ((u32) pci_data) << 14;
488	e7xxx_printk(KERN_INFO,
489		"tolm = %x, remapbase = %x, remaplimit = %x\n", pvt->tolm,
490		pvt->remapbase, pvt->remaplimit);
491
492	/* clear any pending errors, or initial state bits */
493	e7xxx_get_error_info(mci, &discard);
494
495	/* Here we assume that we will never see multiple instances of this
496	 * type of memory controller.  The ID is therefore hardcoded to 0.
497	 */
498	if (edac_mc_add_mc(mci)) {
499		edac_dbg(3, "failed edac_mc_add_mc()\n");
500		goto fail1;
501	}
502
503	/* allocating generic PCI control info */
504	e7xxx_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
505	if (!e7xxx_pci) {
506		printk(KERN_WARNING
507			"%s(): Unable to create PCI control\n",
508			__func__);
509		printk(KERN_WARNING
510			"%s(): PCI error report via EDAC not setup\n",
511			__func__);
512	}
513
514	/* get this far and it's successful */
515	edac_dbg(3, "success\n");
516	return 0;
517
518fail1:
519	pci_dev_put(pvt->bridge_ck);
520
521fail0:
522	edac_mc_free(mci);
523
524	return -ENODEV;
525}
526
527/* returns count (>= 0), or negative on error */
528static int e7xxx_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
529{
530	edac_dbg(0, "\n");
531
532	/* wake up and enable device */
533	return pci_enable_device(pdev) ?
534		-EIO : e7xxx_probe1(pdev, ent->driver_data);
535}
536
537static void e7xxx_remove_one(struct pci_dev *pdev)
538{
539	struct mem_ctl_info *mci;
540	struct e7xxx_pvt *pvt;
541
542	edac_dbg(0, "\n");
543
544	if (e7xxx_pci)
545		edac_pci_release_generic_ctl(e7xxx_pci);
546
547	if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
548		return;
549
550	pvt = (struct e7xxx_pvt *)mci->pvt_info;
551	pci_dev_put(pvt->bridge_ck);
552	edac_mc_free(mci);
553}
554
555static const struct pci_device_id e7xxx_pci_tbl[] = {
556	{
557	 PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
558	 E7205},
559	{
560	 PCI_VEND_DEV(INTEL, 7500_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
561	 E7500},
562	{
563	 PCI_VEND_DEV(INTEL, 7501_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
564	 E7501},
565	{
566	 PCI_VEND_DEV(INTEL, 7505_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
567	 E7505},
568	{
569	 0,
570	 }			/* 0 terminated list. */
571};
572
573MODULE_DEVICE_TABLE(pci, e7xxx_pci_tbl);
574
575static struct pci_driver e7xxx_driver = {
576	.name = EDAC_MOD_STR,
577	.probe = e7xxx_init_one,
578	.remove = e7xxx_remove_one,
579	.id_table = e7xxx_pci_tbl,
580};
581
582static int __init e7xxx_init(void)
583{
584       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
585       opstate_init();
586
587	return pci_register_driver(&e7xxx_driver);
588}
589
590static void __exit e7xxx_exit(void)
591{
592	pci_unregister_driver(&e7xxx_driver);
593}
594
595module_init(e7xxx_init);
596module_exit(e7xxx_exit);
597
598MODULE_LICENSE("GPL");
599MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al");
600MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers");
601module_param(edac_op_state, int, 0444);
602MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
603