1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Unified implementation of memcpy, memmove and the __copy_user backend. 7 * 8 * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org) 9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. 10 * Copyright (C) 2002 Broadcom, Inc. 11 * memcpy/copy_user author: Mark Vandevoorde 12 * 13 * Mnemonic names for arguments to memcpy/__copy_user 14 */ 15 16 #include <asm/asm.h> 17 #include <asm/asm-offsets.h> 18 #include <asm/export.h> 19 #include <asm/regdef.h> 20 21 #define dst a0 22 #define src a1 23 #define len a2 24 25 /* 26 * Spec 27 * 28 * memcpy copies len bytes from src to dst and sets v0 to dst. 29 * It assumes that 30 * - src and dst don't overlap 31 * - src is readable 32 * - dst is writable 33 * memcpy uses the standard calling convention 34 * 35 * __copy_user copies up to len bytes from src to dst and sets a2 (len) to 36 * the number of uncopied bytes due to an exception caused by a read or write. 37 * __copy_user assumes that src and dst don't overlap, and that the call is 38 * implementing one of the following: 39 * copy_to_user 40 * - src is readable (no exceptions when reading src) 41 * copy_from_user 42 * - dst is writable (no exceptions when writing dst) 43 * __copy_user uses a non-standard calling convention; see 44 * arch/mips/include/asm/uaccess.h 45 * 46 * When an exception happens on a load, the handler must 47 # ensure that all of the destination buffer is overwritten to prevent 48 * leaking information to user mode programs. 49 */ 50 51 /* 52 * Implementation 53 */ 54 55 /* 56 * The exception handler for loads requires that: 57 * 1- AT contain the address of the byte just past the end of the source 58 * of the copy, 59 * 2- src_entry <= src < AT, and 60 * 3- (dst - src) == (dst_entry - src_entry), 61 * The _entry suffix denotes values when __copy_user was called. 62 * 63 * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user 64 * (2) is met by incrementing src by the number of bytes copied 65 * (3) is met by not doing loads between a pair of increments of dst and src 66 * 67 * The exception handlers for stores adjust len (if necessary) and return. 68 * These handlers do not need to overwrite any data. 69 * 70 * For __rmemcpy and memmove an exception is always a kernel bug, therefore 71 * they're not protected. 72 */ 73 74 #define EXC(inst_reg,addr,handler) \ 75 9: inst_reg, addr; \ 76 .section __ex_table,"a"; \ 77 PTR 9b, handler; \ 78 .previous 79 80 /* 81 * Only on the 64-bit kernel we can made use of 64-bit registers. 82 */ 83 84 #define LOAD ld 85 #define LOADL ldl 86 #define LOADR ldr 87 #define STOREL sdl 88 #define STORER sdr 89 #define STORE sd 90 #define ADD daddu 91 #define SUB dsubu 92 #define SRL dsrl 93 #define SRA dsra 94 #define SLL dsll 95 #define SLLV dsllv 96 #define SRLV dsrlv 97 #define NBYTES 8 98 #define LOG_NBYTES 3 99 100 /* 101 * As we are sharing code base with the mips32 tree (which use the o32 ABI 102 * register definitions). We need to redefine the register definitions from 103 * the n64 ABI register naming to the o32 ABI register naming. 104 */ 105 #undef t0 106 #undef t1 107 #undef t2 108 #undef t3 109 #define t0 $8 110 #define t1 $9 111 #define t2 $10 112 #define t3 $11 113 #define t4 $12 114 #define t5 $13 115 #define t6 $14 116 #define t7 $15 117 118 #ifdef CONFIG_CPU_LITTLE_ENDIAN 119 #define LDFIRST LOADR 120 #define LDREST LOADL 121 #define STFIRST STORER 122 #define STREST STOREL 123 #define SHIFT_DISCARD SLLV 124 #else 125 #define LDFIRST LOADL 126 #define LDREST LOADR 127 #define STFIRST STOREL 128 #define STREST STORER 129 #define SHIFT_DISCARD SRLV 130 #endif 131 132 #define FIRST(unit) ((unit)*NBYTES) 133 #define REST(unit) (FIRST(unit)+NBYTES-1) 134 #define UNIT(unit) FIRST(unit) 135 136 #define ADDRMASK (NBYTES-1) 137 138 .text 139 .set noreorder 140 .set noat 141 142 /* 143 * A combined memcpy/__copy_user 144 * __copy_user sets len to 0 for success; else to an upper bound of 145 * the number of uncopied bytes. 146 * memcpy sets v0 to dst. 147 */ 148 .align 5 149 LEAF(memcpy) /* a0=dst a1=src a2=len */ 150 EXPORT_SYMBOL(memcpy) 151 move v0, dst /* return value */ 152 __memcpy: 153 FEXPORT(__copy_user) 154 EXPORT_SYMBOL(__copy_user) 155 /* 156 * Note: dst & src may be unaligned, len may be 0 157 * Temps 158 */ 159 # 160 # Octeon doesn't care if the destination is unaligned. The hardware 161 # can fix it faster than we can special case the assembly. 162 # 163 pref 0, 0(src) 164 sltu t0, len, NBYTES # Check if < 1 word 165 bnez t0, copy_bytes_checklen 166 and t0, src, ADDRMASK # Check if src unaligned 167 bnez t0, src_unaligned 168 sltu t0, len, 4*NBYTES # Check if < 4 words 169 bnez t0, less_than_4units 170 sltu t0, len, 8*NBYTES # Check if < 8 words 171 bnez t0, less_than_8units 172 sltu t0, len, 16*NBYTES # Check if < 16 words 173 bnez t0, cleanup_both_aligned 174 sltu t0, len, 128+1 # Check if len < 129 175 bnez t0, 1f # Skip prefetch if len is too short 176 sltu t0, len, 256+1 # Check if len < 257 177 bnez t0, 1f # Skip prefetch if len is too short 178 pref 0, 128(src) # We must not prefetch invalid addresses 179 # 180 # This is where we loop if there is more than 128 bytes left 181 2: pref 0, 256(src) # We must not prefetch invalid addresses 182 # 183 # This is where we loop if we can't prefetch anymore 184 1: 185 EXC( LOAD t0, UNIT(0)(src), l_exc) 186 EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 187 EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 188 EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 189 SUB len, len, 16*NBYTES 190 EXC( STORE t0, UNIT(0)(dst), s_exc_p16u) 191 EXC( STORE t1, UNIT(1)(dst), s_exc_p15u) 192 EXC( STORE t2, UNIT(2)(dst), s_exc_p14u) 193 EXC( STORE t3, UNIT(3)(dst), s_exc_p13u) 194 EXC( LOAD t0, UNIT(4)(src), l_exc_copy) 195 EXC( LOAD t1, UNIT(5)(src), l_exc_copy) 196 EXC( LOAD t2, UNIT(6)(src), l_exc_copy) 197 EXC( LOAD t3, UNIT(7)(src), l_exc_copy) 198 EXC( STORE t0, UNIT(4)(dst), s_exc_p12u) 199 EXC( STORE t1, UNIT(5)(dst), s_exc_p11u) 200 EXC( STORE t2, UNIT(6)(dst), s_exc_p10u) 201 ADD src, src, 16*NBYTES 202 EXC( STORE t3, UNIT(7)(dst), s_exc_p9u) 203 ADD dst, dst, 16*NBYTES 204 EXC( LOAD t0, UNIT(-8)(src), l_exc_copy_rewind16) 205 EXC( LOAD t1, UNIT(-7)(src), l_exc_copy_rewind16) 206 EXC( LOAD t2, UNIT(-6)(src), l_exc_copy_rewind16) 207 EXC( LOAD t3, UNIT(-5)(src), l_exc_copy_rewind16) 208 EXC( STORE t0, UNIT(-8)(dst), s_exc_p8u) 209 EXC( STORE t1, UNIT(-7)(dst), s_exc_p7u) 210 EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) 211 EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) 212 EXC( LOAD t0, UNIT(-4)(src), l_exc_copy_rewind16) 213 EXC( LOAD t1, UNIT(-3)(src), l_exc_copy_rewind16) 214 EXC( LOAD t2, UNIT(-2)(src), l_exc_copy_rewind16) 215 EXC( LOAD t3, UNIT(-1)(src), l_exc_copy_rewind16) 216 EXC( STORE t0, UNIT(-4)(dst), s_exc_p4u) 217 EXC( STORE t1, UNIT(-3)(dst), s_exc_p3u) 218 EXC( STORE t2, UNIT(-2)(dst), s_exc_p2u) 219 EXC( STORE t3, UNIT(-1)(dst), s_exc_p1u) 220 sltu t0, len, 256+1 # See if we can prefetch more 221 beqz t0, 2b 222 sltu t0, len, 128 # See if we can loop more time 223 beqz t0, 1b 224 nop 225 # 226 # Jump here if there are less than 16*NBYTES left. 227 # 228 cleanup_both_aligned: 229 beqz len, done 230 sltu t0, len, 8*NBYTES 231 bnez t0, less_than_8units 232 nop 233 EXC( LOAD t0, UNIT(0)(src), l_exc) 234 EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 235 EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 236 EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 237 SUB len, len, 8*NBYTES 238 EXC( STORE t0, UNIT(0)(dst), s_exc_p8u) 239 EXC( STORE t1, UNIT(1)(dst), s_exc_p7u) 240 EXC( STORE t2, UNIT(2)(dst), s_exc_p6u) 241 EXC( STORE t3, UNIT(3)(dst), s_exc_p5u) 242 EXC( LOAD t0, UNIT(4)(src), l_exc_copy) 243 EXC( LOAD t1, UNIT(5)(src), l_exc_copy) 244 EXC( LOAD t2, UNIT(6)(src), l_exc_copy) 245 EXC( LOAD t3, UNIT(7)(src), l_exc_copy) 246 EXC( STORE t0, UNIT(4)(dst), s_exc_p4u) 247 EXC( STORE t1, UNIT(5)(dst), s_exc_p3u) 248 EXC( STORE t2, UNIT(6)(dst), s_exc_p2u) 249 EXC( STORE t3, UNIT(7)(dst), s_exc_p1u) 250 ADD src, src, 8*NBYTES 251 beqz len, done 252 ADD dst, dst, 8*NBYTES 253 # 254 # Jump here if there are less than 8*NBYTES left. 255 # 256 less_than_8units: 257 sltu t0, len, 4*NBYTES 258 bnez t0, less_than_4units 259 nop 260 EXC( LOAD t0, UNIT(0)(src), l_exc) 261 EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 262 EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 263 EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 264 SUB len, len, 4*NBYTES 265 EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) 266 EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) 267 EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 268 EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 269 ADD src, src, 4*NBYTES 270 beqz len, done 271 ADD dst, dst, 4*NBYTES 272 # 273 # Jump here if there are less than 4*NBYTES left. This means 274 # we may need to copy up to 3 NBYTES words. 275 # 276 less_than_4units: 277 sltu t0, len, 1*NBYTES 278 bnez t0, copy_bytes_checklen 279 nop 280 # 281 # 1) Copy NBYTES, then check length again 282 # 283 EXC( LOAD t0, 0(src), l_exc) 284 SUB len, len, NBYTES 285 sltu t1, len, 8 286 EXC( STORE t0, 0(dst), s_exc_p1u) 287 ADD src, src, NBYTES 288 bnez t1, copy_bytes_checklen 289 ADD dst, dst, NBYTES 290 # 291 # 2) Copy NBYTES, then check length again 292 # 293 EXC( LOAD t0, 0(src), l_exc) 294 SUB len, len, NBYTES 295 sltu t1, len, 8 296 EXC( STORE t0, 0(dst), s_exc_p1u) 297 ADD src, src, NBYTES 298 bnez t1, copy_bytes_checklen 299 ADD dst, dst, NBYTES 300 # 301 # 3) Copy NBYTES, then check length again 302 # 303 EXC( LOAD t0, 0(src), l_exc) 304 SUB len, len, NBYTES 305 ADD src, src, NBYTES 306 ADD dst, dst, NBYTES 307 b copy_bytes_checklen 308 EXC( STORE t0, -8(dst), s_exc_p1u) 309 310 src_unaligned: 311 #define rem t8 312 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 313 beqz t0, cleanup_src_unaligned 314 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 315 1: 316 /* 317 * Avoid consecutive LD*'s to the same register since some mips 318 * implementations can't issue them in the same cycle. 319 * It's OK to load FIRST(N+1) before REST(N) because the two addresses 320 * are to the same unit (unless src is aligned, but it's not). 321 */ 322 EXC( LDFIRST t0, FIRST(0)(src), l_exc) 323 EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) 324 SUB len, len, 4*NBYTES 325 EXC( LDREST t0, REST(0)(src), l_exc_copy) 326 EXC( LDREST t1, REST(1)(src), l_exc_copy) 327 EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) 328 EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) 329 EXC( LDREST t2, REST(2)(src), l_exc_copy) 330 EXC( LDREST t3, REST(3)(src), l_exc_copy) 331 ADD src, src, 4*NBYTES 332 EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) 333 EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) 334 EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 335 EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 336 bne len, rem, 1b 337 ADD dst, dst, 4*NBYTES 338 339 cleanup_src_unaligned: 340 beqz len, done 341 and rem, len, NBYTES-1 # rem = len % NBYTES 342 beq rem, len, copy_bytes 343 nop 344 1: 345 EXC( LDFIRST t0, FIRST(0)(src), l_exc) 346 EXC( LDREST t0, REST(0)(src), l_exc_copy) 347 SUB len, len, NBYTES 348 EXC( STORE t0, 0(dst), s_exc_p1u) 349 ADD src, src, NBYTES 350 bne len, rem, 1b 351 ADD dst, dst, NBYTES 352 353 copy_bytes_checklen: 354 beqz len, done 355 nop 356 copy_bytes: 357 /* 0 < len < NBYTES */ 358 #define COPY_BYTE(N) \ 359 EXC( lb t0, N(src), l_exc); \ 360 SUB len, len, 1; \ 361 beqz len, done; \ 362 EXC( sb t0, N(dst), s_exc_p1) 363 364 COPY_BYTE(0) 365 COPY_BYTE(1) 366 COPY_BYTE(2) 367 COPY_BYTE(3) 368 COPY_BYTE(4) 369 COPY_BYTE(5) 370 EXC( lb t0, NBYTES-2(src), l_exc) 371 SUB len, len, 1 372 jr ra 373 EXC( sb t0, NBYTES-2(dst), s_exc_p1) 374 done: 375 jr ra 376 nop 377 END(memcpy) 378 379 l_exc_copy_rewind16: 380 /* Rewind src and dst by 16*NBYTES for l_exc_copy */ 381 SUB src, src, 16*NBYTES 382 SUB dst, dst, 16*NBYTES 383 l_exc_copy: 384 /* 385 * Copy bytes from src until faulting load address (or until a 386 * lb faults) 387 * 388 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28) 389 * may be more than a byte beyond the last address. 390 * Hence, the lb below may get an exception. 391 * 392 * Assumes src < THREAD_BUADDR($28) 393 */ 394 LOAD t0, TI_TASK($28) 395 LOAD t0, THREAD_BUADDR(t0) 396 1: 397 EXC( lb t1, 0(src), l_exc) 398 ADD src, src, 1 399 sb t1, 0(dst) # can't fault -- we're copy_from_user 400 bne src, t0, 1b 401 ADD dst, dst, 1 402 l_exc: 403 LOAD t0, TI_TASK($28) 404 LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address 405 SUB len, AT, t0 # len number of uncopied bytes 406 jr ra 407 nop 408 409 410 #define SEXC(n) \ 411 s_exc_p ## n ## u: \ 412 jr ra; \ 413 ADD len, len, n*NBYTES 414 415 SEXC(16) 416 SEXC(15) 417 SEXC(14) 418 SEXC(13) 419 SEXC(12) 420 SEXC(11) 421 SEXC(10) 422 SEXC(9) 423 SEXC(8) 424 SEXC(7) 425 SEXC(6) 426 SEXC(5) 427 SEXC(4) 428 SEXC(3) 429 SEXC(2) 430 SEXC(1) 431 432 s_exc_p1: 433 jr ra 434 ADD len, len, 1 435 s_exc: 436 jr ra 437 nop 438 439 .align 5 440 LEAF(memmove) 441 EXPORT_SYMBOL(memmove) 442 ADD t0, a0, a2 443 ADD t1, a1, a2 444 sltu t0, a1, t0 # dst + len <= src -> memcpy 445 sltu t1, a0, t1 # dst >= src + len -> memcpy 446 and t0, t1 447 beqz t0, __memcpy 448 move v0, a0 /* return value */ 449 beqz a2, r_out 450 END(memmove) 451 452 /* fall through to __rmemcpy */ 453 LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ 454 sltu t0, a1, a0 455 beqz t0, r_end_bytes_up # src >= dst 456 nop 457 ADD a0, a2 # dst = dst + len 458 ADD a1, a2 # src = src + len 459 460 r_end_bytes: 461 lb t0, -1(a1) 462 SUB a2, a2, 0x1 463 sb t0, -1(a0) 464 SUB a1, a1, 0x1 465 bnez a2, r_end_bytes 466 SUB a0, a0, 0x1 467 468 r_out: 469 jr ra 470 move a2, zero 471 472 r_end_bytes_up: 473 lb t0, (a1) 474 SUB a2, a2, 0x1 475 sb t0, (a0) 476 ADD a1, a1, 0x1 477 bnez a2, r_end_bytes_up 478 ADD a0, a0, 0x1 479 480 jr ra 481 move a2, zero 482 END(__rmemcpy) 483