1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * This file contains an ECC algorithm that detects and corrects 1 bit 4 * errors in a 256 byte block of data. 5 * 6 * Copyright © 2008 Koninklijke Philips Electronics NV. 7 * Author: Frans Meulenbroeks 8 * 9 * Completely replaces the previous ECC implementation which was written by: 10 * Steven J. Hill (sjhill@realitydiluted.com) 11 * Thomas Gleixner (tglx@linutronix.de) 12 * 13 * Information on how this algorithm works and how it was developed 14 * can be found in Documentation/driver-api/mtd/nand_ecc.rst 15 */ 16 17#include <linux/types.h> 18#include <linux/kernel.h> 19#include <linux/module.h> 20#include <linux/mtd/mtd.h> 21#include <linux/mtd/rawnand.h> 22#include <linux/mtd/nand_ecc.h> 23#include <asm/byteorder.h> 24 25/* 26 * invparity is a 256 byte table that contains the odd parity 27 * for each byte. So if the number of bits in a byte is even, 28 * the array element is 1, and when the number of bits is odd 29 * the array eleemnt is 0. 30 */ 31static const char invparity[256] = { 32 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 33 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 34 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 35 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 36 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 37 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 38 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 39 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 40 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 41 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 42 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 43 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 44 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 45 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 46 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 47 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1 48}; 49 50/* 51 * bitsperbyte contains the number of bits per byte 52 * this is only used for testing and repairing parity 53 * (a precalculated value slightly improves performance) 54 */ 55static const char bitsperbyte[256] = { 56 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 57 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 58 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 59 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 60 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 61 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 62 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 63 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 64 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 65 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 66 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 67 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 68 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 69 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 70 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 71 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, 72}; 73 74/* 75 * addressbits is a lookup table to filter out the bits from the xor-ed 76 * ECC data that identify the faulty location. 77 * this is only used for repairing parity 78 * see the comments in nand_correct_data for more details 79 */ 80static const char addressbits[256] = { 81 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 82 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03, 83 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 84 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03, 85 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05, 86 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07, 87 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05, 88 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07, 89 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 90 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03, 91 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 92 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03, 93 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05, 94 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07, 95 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05, 96 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07, 97 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09, 98 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, 99 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09, 100 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, 101 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d, 102 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f, 103 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d, 104 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f, 105 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09, 106 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, 107 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09, 108 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, 109 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d, 110 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f, 111 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d, 112 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f 113}; 114 115/** 116 * __nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte 117 * block 118 * @buf: input buffer with raw data 119 * @eccsize: data bytes per ECC step (256 or 512) 120 * @code: output buffer with ECC 121 * @sm_order: Smart Media byte ordering 122 */ 123void __nand_calculate_ecc(const unsigned char *buf, unsigned int eccsize, 124 unsigned char *code, bool sm_order) 125{ 126 int i; 127 const uint32_t *bp = (uint32_t *)buf; 128 /* 256 or 512 bytes/ecc */ 129 const uint32_t eccsize_mult = eccsize >> 8; 130 uint32_t cur; /* current value in buffer */ 131 /* rp0..rp15..rp17 are the various accumulated parities (per byte) */ 132 uint32_t rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7; 133 uint32_t rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15, rp16; 134 uint32_t rp17; 135 uint32_t par; /* the cumulative parity for all data */ 136 uint32_t tmppar; /* the cumulative parity for this iteration; 137 for rp12, rp14 and rp16 at the end of the 138 loop */ 139 140 par = 0; 141 rp4 = 0; 142 rp6 = 0; 143 rp8 = 0; 144 rp10 = 0; 145 rp12 = 0; 146 rp14 = 0; 147 rp16 = 0; 148 149 /* 150 * The loop is unrolled a number of times; 151 * This avoids if statements to decide on which rp value to update 152 * Also we process the data by longwords. 153 * Note: passing unaligned data might give a performance penalty. 154 * It is assumed that the buffers are aligned. 155 * tmppar is the cumulative sum of this iteration. 156 * needed for calculating rp12, rp14, rp16 and par 157 * also used as a performance improvement for rp6, rp8 and rp10 158 */ 159 for (i = 0; i < eccsize_mult << 2; i++) { 160 cur = *bp++; 161 tmppar = cur; 162 rp4 ^= cur; 163 cur = *bp++; 164 tmppar ^= cur; 165 rp6 ^= tmppar; 166 cur = *bp++; 167 tmppar ^= cur; 168 rp4 ^= cur; 169 cur = *bp++; 170 tmppar ^= cur; 171 rp8 ^= tmppar; 172 173 cur = *bp++; 174 tmppar ^= cur; 175 rp4 ^= cur; 176 rp6 ^= cur; 177 cur = *bp++; 178 tmppar ^= cur; 179 rp6 ^= cur; 180 cur = *bp++; 181 tmppar ^= cur; 182 rp4 ^= cur; 183 cur = *bp++; 184 tmppar ^= cur; 185 rp10 ^= tmppar; 186 187 cur = *bp++; 188 tmppar ^= cur; 189 rp4 ^= cur; 190 rp6 ^= cur; 191 rp8 ^= cur; 192 cur = *bp++; 193 tmppar ^= cur; 194 rp6 ^= cur; 195 rp8 ^= cur; 196 cur = *bp++; 197 tmppar ^= cur; 198 rp4 ^= cur; 199 rp8 ^= cur; 200 cur = *bp++; 201 tmppar ^= cur; 202 rp8 ^= cur; 203 204 cur = *bp++; 205 tmppar ^= cur; 206 rp4 ^= cur; 207 rp6 ^= cur; 208 cur = *bp++; 209 tmppar ^= cur; 210 rp6 ^= cur; 211 cur = *bp++; 212 tmppar ^= cur; 213 rp4 ^= cur; 214 cur = *bp++; 215 tmppar ^= cur; 216 217 par ^= tmppar; 218 if ((i & 0x1) == 0) 219 rp12 ^= tmppar; 220 if ((i & 0x2) == 0) 221 rp14 ^= tmppar; 222 if (eccsize_mult == 2 && (i & 0x4) == 0) 223 rp16 ^= tmppar; 224 } 225 226 /* 227 * handle the fact that we use longword operations 228 * we'll bring rp4..rp14..rp16 back to single byte entities by 229 * shifting and xoring first fold the upper and lower 16 bits, 230 * then the upper and lower 8 bits. 231 */ 232 rp4 ^= (rp4 >> 16); 233 rp4 ^= (rp4 >> 8); 234 rp4 &= 0xff; 235 rp6 ^= (rp6 >> 16); 236 rp6 ^= (rp6 >> 8); 237 rp6 &= 0xff; 238 rp8 ^= (rp8 >> 16); 239 rp8 ^= (rp8 >> 8); 240 rp8 &= 0xff; 241 rp10 ^= (rp10 >> 16); 242 rp10 ^= (rp10 >> 8); 243 rp10 &= 0xff; 244 rp12 ^= (rp12 >> 16); 245 rp12 ^= (rp12 >> 8); 246 rp12 &= 0xff; 247 rp14 ^= (rp14 >> 16); 248 rp14 ^= (rp14 >> 8); 249 rp14 &= 0xff; 250 if (eccsize_mult == 2) { 251 rp16 ^= (rp16 >> 16); 252 rp16 ^= (rp16 >> 8); 253 rp16 &= 0xff; 254 } 255 256 /* 257 * we also need to calculate the row parity for rp0..rp3 258 * This is present in par, because par is now 259 * rp3 rp3 rp2 rp2 in little endian and 260 * rp2 rp2 rp3 rp3 in big endian 261 * as well as 262 * rp1 rp0 rp1 rp0 in little endian and 263 * rp0 rp1 rp0 rp1 in big endian 264 * First calculate rp2 and rp3 265 */ 266#ifdef __BIG_ENDIAN 267 rp2 = (par >> 16); 268 rp2 ^= (rp2 >> 8); 269 rp2 &= 0xff; 270 rp3 = par & 0xffff; 271 rp3 ^= (rp3 >> 8); 272 rp3 &= 0xff; 273#else 274 rp3 = (par >> 16); 275 rp3 ^= (rp3 >> 8); 276 rp3 &= 0xff; 277 rp2 = par & 0xffff; 278 rp2 ^= (rp2 >> 8); 279 rp2 &= 0xff; 280#endif 281 282 /* reduce par to 16 bits then calculate rp1 and rp0 */ 283 par ^= (par >> 16); 284#ifdef __BIG_ENDIAN 285 rp0 = (par >> 8) & 0xff; 286 rp1 = (par & 0xff); 287#else 288 rp1 = (par >> 8) & 0xff; 289 rp0 = (par & 0xff); 290#endif 291 292 /* finally reduce par to 8 bits */ 293 par ^= (par >> 8); 294 par &= 0xff; 295 296 /* 297 * and calculate rp5..rp15..rp17 298 * note that par = rp4 ^ rp5 and due to the commutative property 299 * of the ^ operator we can say: 300 * rp5 = (par ^ rp4); 301 * The & 0xff seems superfluous, but benchmarking learned that 302 * leaving it out gives slightly worse results. No idea why, probably 303 * it has to do with the way the pipeline in pentium is organized. 304 */ 305 rp5 = (par ^ rp4) & 0xff; 306 rp7 = (par ^ rp6) & 0xff; 307 rp9 = (par ^ rp8) & 0xff; 308 rp11 = (par ^ rp10) & 0xff; 309 rp13 = (par ^ rp12) & 0xff; 310 rp15 = (par ^ rp14) & 0xff; 311 if (eccsize_mult == 2) 312 rp17 = (par ^ rp16) & 0xff; 313 314 /* 315 * Finally calculate the ECC bits. 316 * Again here it might seem that there are performance optimisations 317 * possible, but benchmarks showed that on the system this is developed 318 * the code below is the fastest 319 */ 320 if (sm_order) { 321 code[0] = (invparity[rp7] << 7) | (invparity[rp6] << 6) | 322 (invparity[rp5] << 5) | (invparity[rp4] << 4) | 323 (invparity[rp3] << 3) | (invparity[rp2] << 2) | 324 (invparity[rp1] << 1) | (invparity[rp0]); 325 code[1] = (invparity[rp15] << 7) | (invparity[rp14] << 6) | 326 (invparity[rp13] << 5) | (invparity[rp12] << 4) | 327 (invparity[rp11] << 3) | (invparity[rp10] << 2) | 328 (invparity[rp9] << 1) | (invparity[rp8]); 329 } else { 330 code[1] = (invparity[rp7] << 7) | (invparity[rp6] << 6) | 331 (invparity[rp5] << 5) | (invparity[rp4] << 4) | 332 (invparity[rp3] << 3) | (invparity[rp2] << 2) | 333 (invparity[rp1] << 1) | (invparity[rp0]); 334 code[0] = (invparity[rp15] << 7) | (invparity[rp14] << 6) | 335 (invparity[rp13] << 5) | (invparity[rp12] << 4) | 336 (invparity[rp11] << 3) | (invparity[rp10] << 2) | 337 (invparity[rp9] << 1) | (invparity[rp8]); 338 } 339 340 if (eccsize_mult == 1) 341 code[2] = 342 (invparity[par & 0xf0] << 7) | 343 (invparity[par & 0x0f] << 6) | 344 (invparity[par & 0xcc] << 5) | 345 (invparity[par & 0x33] << 4) | 346 (invparity[par & 0xaa] << 3) | 347 (invparity[par & 0x55] << 2) | 348 3; 349 else 350 code[2] = 351 (invparity[par & 0xf0] << 7) | 352 (invparity[par & 0x0f] << 6) | 353 (invparity[par & 0xcc] << 5) | 354 (invparity[par & 0x33] << 4) | 355 (invparity[par & 0xaa] << 3) | 356 (invparity[par & 0x55] << 2) | 357 (invparity[rp17] << 1) | 358 (invparity[rp16] << 0); 359} 360EXPORT_SYMBOL(__nand_calculate_ecc); 361 362/** 363 * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte 364 * block 365 * @chip: NAND chip object 366 * @buf: input buffer with raw data 367 * @code: output buffer with ECC 368 */ 369int nand_calculate_ecc(struct nand_chip *chip, const unsigned char *buf, 370 unsigned char *code) 371{ 372 bool sm_order = chip->ecc.options & NAND_ECC_SOFT_HAMMING_SM_ORDER; 373 374 __nand_calculate_ecc(buf, chip->ecc.size, code, sm_order); 375 376 return 0; 377} 378EXPORT_SYMBOL(nand_calculate_ecc); 379 380/** 381 * __nand_correct_data - [NAND Interface] Detect and correct bit error(s) 382 * @buf: raw data read from the chip 383 * @read_ecc: ECC from the chip 384 * @calc_ecc: the ECC calculated from raw data 385 * @eccsize: data bytes per ECC step (256 or 512) 386 * @sm_order: Smart Media byte order 387 * 388 * Detect and correct a 1 bit error for eccsize byte block 389 */ 390int __nand_correct_data(unsigned char *buf, 391 unsigned char *read_ecc, unsigned char *calc_ecc, 392 unsigned int eccsize, bool sm_order) 393{ 394 unsigned char b0, b1, b2, bit_addr; 395 unsigned int byte_addr; 396 /* 256 or 512 bytes/ecc */ 397 const uint32_t eccsize_mult = eccsize >> 8; 398 399 /* 400 * b0 to b2 indicate which bit is faulty (if any) 401 * we might need the xor result more than once, 402 * so keep them in a local var 403 */ 404 if (sm_order) { 405 b0 = read_ecc[0] ^ calc_ecc[0]; 406 b1 = read_ecc[1] ^ calc_ecc[1]; 407 } else { 408 b0 = read_ecc[1] ^ calc_ecc[1]; 409 b1 = read_ecc[0] ^ calc_ecc[0]; 410 } 411 412 b2 = read_ecc[2] ^ calc_ecc[2]; 413 414 /* check if there are any bitfaults */ 415 416 /* repeated if statements are slightly more efficient than switch ... */ 417 /* ordered in order of likelihood */ 418 419 if ((b0 | b1 | b2) == 0) 420 return 0; /* no error */ 421 422 if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) && 423 (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) && 424 ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) || 425 (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) { 426 /* single bit error */ 427 /* 428 * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty 429 * byte, cp 5/3/1 indicate the faulty bit. 430 * A lookup table (called addressbits) is used to filter 431 * the bits from the byte they are in. 432 * A marginal optimisation is possible by having three 433 * different lookup tables. 434 * One as we have now (for b0), one for b2 435 * (that would avoid the >> 1), and one for b1 (with all values 436 * << 4). However it was felt that introducing two more tables 437 * hardly justify the gain. 438 * 439 * The b2 shift is there to get rid of the lowest two bits. 440 * We could also do addressbits[b2] >> 1 but for the 441 * performance it does not make any difference 442 */ 443 if (eccsize_mult == 1) 444 byte_addr = (addressbits[b1] << 4) + addressbits[b0]; 445 else 446 byte_addr = (addressbits[b2 & 0x3] << 8) + 447 (addressbits[b1] << 4) + addressbits[b0]; 448 bit_addr = addressbits[b2 >> 2]; 449 /* flip the bit */ 450 buf[byte_addr] ^= (1 << bit_addr); 451 return 1; 452 453 } 454 /* count nr of bits; use table lookup, faster than calculating it */ 455 if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1) 456 return 1; /* error in ECC data; no action needed */ 457 458 pr_err("%s: uncorrectable ECC error\n", __func__); 459 return -EBADMSG; 460} 461EXPORT_SYMBOL(__nand_correct_data); 462 463/** 464 * nand_correct_data - [NAND Interface] Detect and correct bit error(s) 465 * @chip: NAND chip object 466 * @buf: raw data read from the chip 467 * @read_ecc: ECC from the chip 468 * @calc_ecc: the ECC calculated from raw data 469 * 470 * Detect and correct a 1 bit error for 256/512 byte block 471 */ 472int nand_correct_data(struct nand_chip *chip, unsigned char *buf, 473 unsigned char *read_ecc, unsigned char *calc_ecc) 474{ 475 bool sm_order = chip->ecc.options & NAND_ECC_SOFT_HAMMING_SM_ORDER; 476 477 return __nand_correct_data(buf, read_ecc, calc_ecc, chip->ecc.size, 478 sm_order); 479} 480EXPORT_SYMBOL(nand_correct_data); 481 482MODULE_LICENSE("GPL"); 483MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>"); 484MODULE_DESCRIPTION("Generic NAND ECC support"); 485