1// SPDX-License-Identifier: GPL-2.0-only 2#include <crypto/hash.h> 3#include <linux/export.h> 4#include <linux/bvec.h> 5#include <linux/fault-inject-usercopy.h> 6#include <linux/uio.h> 7#include <linux/pagemap.h> 8#include <linux/slab.h> 9#include <linux/vmalloc.h> 10#include <linux/splice.h> 11#include <linux/compat.h> 12#include <net/checksum.h> 13#include <linux/scatterlist.h> 14#include <linux/instrumented.h> 15 16#define PIPE_PARANOIA /* for now */ 17 18#define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 19 size_t left; \ 20 size_t wanted = n; \ 21 __p = i->iov; \ 22 __v.iov_len = min(n, __p->iov_len - skip); \ 23 if (likely(__v.iov_len)) { \ 24 __v.iov_base = __p->iov_base + skip; \ 25 left = (STEP); \ 26 __v.iov_len -= left; \ 27 skip += __v.iov_len; \ 28 n -= __v.iov_len; \ 29 } else { \ 30 left = 0; \ 31 } \ 32 while (unlikely(!left && n)) { \ 33 __p++; \ 34 __v.iov_len = min(n, __p->iov_len); \ 35 if (unlikely(!__v.iov_len)) \ 36 continue; \ 37 __v.iov_base = __p->iov_base; \ 38 left = (STEP); \ 39 __v.iov_len -= left; \ 40 skip = __v.iov_len; \ 41 n -= __v.iov_len; \ 42 } \ 43 n = wanted - n; \ 44} 45 46#define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 47 size_t wanted = n; \ 48 __p = i->kvec; \ 49 __v.iov_len = min(n, __p->iov_len - skip); \ 50 if (likely(__v.iov_len)) { \ 51 __v.iov_base = __p->iov_base + skip; \ 52 (void)(STEP); \ 53 skip += __v.iov_len; \ 54 n -= __v.iov_len; \ 55 } \ 56 while (unlikely(n)) { \ 57 __p++; \ 58 __v.iov_len = min(n, __p->iov_len); \ 59 if (unlikely(!__v.iov_len)) \ 60 continue; \ 61 __v.iov_base = __p->iov_base; \ 62 (void)(STEP); \ 63 skip = __v.iov_len; \ 64 n -= __v.iov_len; \ 65 } \ 66 n = wanted; \ 67} 68 69#define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 70 struct bvec_iter __start; \ 71 __start.bi_size = n; \ 72 __start.bi_bvec_done = skip; \ 73 __start.bi_idx = 0; \ 74 for_each_bvec(__v, i->bvec, __bi, __start) { \ 75 if (!__v.bv_len) \ 76 continue; \ 77 (void)(STEP); \ 78 } \ 79} 80 81#define iterate_all_kinds(i, n, v, I, B, K) { \ 82 if (likely(n)) { \ 83 size_t skip = i->iov_offset; \ 84 if (unlikely(i->type & ITER_BVEC)) { \ 85 struct bio_vec v; \ 86 struct bvec_iter __bi; \ 87 iterate_bvec(i, n, v, __bi, skip, (B)) \ 88 } else if (unlikely(i->type & ITER_KVEC)) { \ 89 const struct kvec *kvec; \ 90 struct kvec v; \ 91 iterate_kvec(i, n, v, kvec, skip, (K)) \ 92 } else if (unlikely(i->type & ITER_DISCARD)) { \ 93 } else { \ 94 const struct iovec *iov; \ 95 struct iovec v; \ 96 iterate_iovec(i, n, v, iov, skip, (I)) \ 97 } \ 98 } \ 99} 100 101#define iterate_and_advance(i, n, v, I, B, K) { \ 102 if (unlikely(i->count < n)) \ 103 n = i->count; \ 104 if (i->count) { \ 105 size_t skip = i->iov_offset; \ 106 if (unlikely(i->type & ITER_BVEC)) { \ 107 const struct bio_vec *bvec = i->bvec; \ 108 struct bio_vec v; \ 109 struct bvec_iter __bi; \ 110 iterate_bvec(i, n, v, __bi, skip, (B)) \ 111 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 112 i->nr_segs -= i->bvec - bvec; \ 113 skip = __bi.bi_bvec_done; \ 114 } else if (unlikely(i->type & ITER_KVEC)) { \ 115 const struct kvec *kvec; \ 116 struct kvec v; \ 117 iterate_kvec(i, n, v, kvec, skip, (K)) \ 118 if (skip == kvec->iov_len) { \ 119 kvec++; \ 120 skip = 0; \ 121 } \ 122 i->nr_segs -= kvec - i->kvec; \ 123 i->kvec = kvec; \ 124 } else if (unlikely(i->type & ITER_DISCARD)) { \ 125 skip += n; \ 126 } else { \ 127 const struct iovec *iov; \ 128 struct iovec v; \ 129 iterate_iovec(i, n, v, iov, skip, (I)) \ 130 if (skip == iov->iov_len) { \ 131 iov++; \ 132 skip = 0; \ 133 } \ 134 i->nr_segs -= iov - i->iov; \ 135 i->iov = iov; \ 136 } \ 137 i->count -= n; \ 138 i->iov_offset = skip; \ 139 } \ 140} 141 142static int copyout(void __user *to, const void *from, size_t n) 143{ 144 if (should_fail_usercopy()) 145 return n; 146 if (access_ok(to, n)) { 147 instrument_copy_to_user(to, from, n); 148 n = raw_copy_to_user(to, from, n); 149 } 150 return n; 151} 152 153static int copyin(void *to, const void __user *from, size_t n) 154{ 155 if (should_fail_usercopy()) 156 return n; 157 if (access_ok(from, n)) { 158 instrument_copy_from_user(to, from, n); 159 n = raw_copy_from_user(to, from, n); 160 } 161 return n; 162} 163 164static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 165 struct iov_iter *i) 166{ 167 size_t skip, copy, left, wanted; 168 const struct iovec *iov; 169 char __user *buf; 170 void *kaddr, *from; 171 172 if (unlikely(bytes > i->count)) 173 bytes = i->count; 174 175 if (unlikely(!bytes)) 176 return 0; 177 178 might_fault(); 179 wanted = bytes; 180 iov = i->iov; 181 skip = i->iov_offset; 182 buf = iov->iov_base + skip; 183 copy = min(bytes, iov->iov_len - skip); 184 185 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 186 kaddr = kmap_atomic(page); 187 from = kaddr + offset; 188 189 /* first chunk, usually the only one */ 190 left = copyout(buf, from, copy); 191 copy -= left; 192 skip += copy; 193 from += copy; 194 bytes -= copy; 195 196 while (unlikely(!left && bytes)) { 197 iov++; 198 buf = iov->iov_base; 199 copy = min(bytes, iov->iov_len); 200 left = copyout(buf, from, copy); 201 copy -= left; 202 skip = copy; 203 from += copy; 204 bytes -= copy; 205 } 206 if (likely(!bytes)) { 207 kunmap_atomic(kaddr); 208 goto done; 209 } 210 offset = from - kaddr; 211 buf += copy; 212 kunmap_atomic(kaddr); 213 copy = min(bytes, iov->iov_len - skip); 214 } 215 /* Too bad - revert to non-atomic kmap */ 216 217 kaddr = kmap(page); 218 from = kaddr + offset; 219 left = copyout(buf, from, copy); 220 copy -= left; 221 skip += copy; 222 from += copy; 223 bytes -= copy; 224 while (unlikely(!left && bytes)) { 225 iov++; 226 buf = iov->iov_base; 227 copy = min(bytes, iov->iov_len); 228 left = copyout(buf, from, copy); 229 copy -= left; 230 skip = copy; 231 from += copy; 232 bytes -= copy; 233 } 234 kunmap(page); 235 236done: 237 if (skip == iov->iov_len) { 238 iov++; 239 skip = 0; 240 } 241 i->count -= wanted - bytes; 242 i->nr_segs -= iov - i->iov; 243 i->iov = iov; 244 i->iov_offset = skip; 245 return wanted - bytes; 246} 247 248static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 249 struct iov_iter *i) 250{ 251 size_t skip, copy, left, wanted; 252 const struct iovec *iov; 253 char __user *buf; 254 void *kaddr, *to; 255 256 if (unlikely(bytes > i->count)) 257 bytes = i->count; 258 259 if (unlikely(!bytes)) 260 return 0; 261 262 might_fault(); 263 wanted = bytes; 264 iov = i->iov; 265 skip = i->iov_offset; 266 buf = iov->iov_base + skip; 267 copy = min(bytes, iov->iov_len - skip); 268 269 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 270 kaddr = kmap_atomic(page); 271 to = kaddr + offset; 272 273 /* first chunk, usually the only one */ 274 left = copyin(to, buf, copy); 275 copy -= left; 276 skip += copy; 277 to += copy; 278 bytes -= copy; 279 280 while (unlikely(!left && bytes)) { 281 iov++; 282 buf = iov->iov_base; 283 copy = min(bytes, iov->iov_len); 284 left = copyin(to, buf, copy); 285 copy -= left; 286 skip = copy; 287 to += copy; 288 bytes -= copy; 289 } 290 if (likely(!bytes)) { 291 kunmap_atomic(kaddr); 292 goto done; 293 } 294 offset = to - kaddr; 295 buf += copy; 296 kunmap_atomic(kaddr); 297 copy = min(bytes, iov->iov_len - skip); 298 } 299 /* Too bad - revert to non-atomic kmap */ 300 301 kaddr = kmap(page); 302 to = kaddr + offset; 303 left = copyin(to, buf, copy); 304 copy -= left; 305 skip += copy; 306 to += copy; 307 bytes -= copy; 308 while (unlikely(!left && bytes)) { 309 iov++; 310 buf = iov->iov_base; 311 copy = min(bytes, iov->iov_len); 312 left = copyin(to, buf, copy); 313 copy -= left; 314 skip = copy; 315 to += copy; 316 bytes -= copy; 317 } 318 kunmap(page); 319 320done: 321 if (skip == iov->iov_len) { 322 iov++; 323 skip = 0; 324 } 325 i->count -= wanted - bytes; 326 i->nr_segs -= iov - i->iov; 327 i->iov = iov; 328 i->iov_offset = skip; 329 return wanted - bytes; 330} 331 332#ifdef PIPE_PARANOIA 333static bool sanity(const struct iov_iter *i) 334{ 335 struct pipe_inode_info *pipe = i->pipe; 336 unsigned int p_head = pipe->head; 337 unsigned int p_tail = pipe->tail; 338 unsigned int p_mask = pipe->ring_size - 1; 339 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); 340 unsigned int i_head = i->head; 341 unsigned int idx; 342 343 if (i->iov_offset) { 344 struct pipe_buffer *p; 345 if (unlikely(p_occupancy == 0)) 346 goto Bad; // pipe must be non-empty 347 if (unlikely(i_head != p_head - 1)) 348 goto Bad; // must be at the last buffer... 349 350 p = &pipe->bufs[i_head & p_mask]; 351 if (unlikely(p->offset + p->len != i->iov_offset)) 352 goto Bad; // ... at the end of segment 353 } else { 354 if (i_head != p_head) 355 goto Bad; // must be right after the last buffer 356 } 357 return true; 358Bad: 359 printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); 360 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", 361 p_head, p_tail, pipe->ring_size); 362 for (idx = 0; idx < pipe->ring_size; idx++) 363 printk(KERN_ERR "[%p %p %d %d]\n", 364 pipe->bufs[idx].ops, 365 pipe->bufs[idx].page, 366 pipe->bufs[idx].offset, 367 pipe->bufs[idx].len); 368 WARN_ON(1); 369 return false; 370} 371#else 372#define sanity(i) true 373#endif 374 375static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 376 struct iov_iter *i) 377{ 378 struct pipe_inode_info *pipe = i->pipe; 379 struct pipe_buffer *buf; 380 unsigned int p_tail = pipe->tail; 381 unsigned int p_mask = pipe->ring_size - 1; 382 unsigned int i_head = i->head; 383 size_t off; 384 385 if (unlikely(bytes > i->count)) 386 bytes = i->count; 387 388 if (unlikely(!bytes)) 389 return 0; 390 391 if (!sanity(i)) 392 return 0; 393 394 off = i->iov_offset; 395 buf = &pipe->bufs[i_head & p_mask]; 396 if (off) { 397 if (offset == off && buf->page == page) { 398 /* merge with the last one */ 399 buf->len += bytes; 400 i->iov_offset += bytes; 401 goto out; 402 } 403 i_head++; 404 buf = &pipe->bufs[i_head & p_mask]; 405 } 406 if (pipe_full(i_head, p_tail, pipe->max_usage)) 407 return 0; 408 409 buf->ops = &page_cache_pipe_buf_ops; 410 buf->flags = 0; 411 get_page(page); 412 buf->page = page; 413 buf->offset = offset; 414 buf->len = bytes; 415 416 pipe->head = i_head + 1; 417 i->iov_offset = offset + bytes; 418 i->head = i_head; 419out: 420 i->count -= bytes; 421 return bytes; 422} 423 424/* 425 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 426 * bytes. For each iovec, fault in each page that constitutes the iovec. 427 * 428 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 429 * because it is an invalid address). 430 */ 431int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 432{ 433 size_t skip = i->iov_offset; 434 const struct iovec *iov; 435 int err; 436 struct iovec v; 437 438 if (iter_is_iovec(i)) { 439 iterate_iovec(i, bytes, v, iov, skip, ({ 440 err = fault_in_pages_readable(v.iov_base, v.iov_len); 441 if (unlikely(err)) 442 return err; 443 0;})) 444 } 445 return 0; 446} 447EXPORT_SYMBOL(iov_iter_fault_in_readable); 448 449void iov_iter_init(struct iov_iter *i, unsigned int direction, 450 const struct iovec *iov, unsigned long nr_segs, 451 size_t count) 452{ 453 WARN_ON(direction & ~(READ | WRITE)); 454 direction &= READ | WRITE; 455 456 /* It will get better. Eventually... */ 457 if (uaccess_kernel()) { 458 i->type = ITER_KVEC | direction; 459 i->kvec = (struct kvec *)iov; 460 } else { 461 i->type = ITER_IOVEC | direction; 462 i->iov = iov; 463 } 464 i->nr_segs = nr_segs; 465 i->iov_offset = 0; 466 i->count = count; 467} 468EXPORT_SYMBOL(iov_iter_init); 469 470static void memzero_page(struct page *page, size_t offset, size_t len) 471{ 472 char *addr = kmap_atomic(page); 473 memset(addr + offset, 0, len); 474 kunmap_atomic(addr); 475} 476 477static inline bool allocated(struct pipe_buffer *buf) 478{ 479 return buf->ops == &default_pipe_buf_ops; 480} 481 482static inline void data_start(const struct iov_iter *i, 483 unsigned int *iter_headp, size_t *offp) 484{ 485 unsigned int p_mask = i->pipe->ring_size - 1; 486 unsigned int iter_head = i->head; 487 size_t off = i->iov_offset; 488 489 if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || 490 off == PAGE_SIZE)) { 491 iter_head++; 492 off = 0; 493 } 494 *iter_headp = iter_head; 495 *offp = off; 496} 497 498static size_t push_pipe(struct iov_iter *i, size_t size, 499 int *iter_headp, size_t *offp) 500{ 501 struct pipe_inode_info *pipe = i->pipe; 502 unsigned int p_tail = pipe->tail; 503 unsigned int p_mask = pipe->ring_size - 1; 504 unsigned int iter_head; 505 size_t off; 506 ssize_t left; 507 508 if (unlikely(size > i->count)) 509 size = i->count; 510 if (unlikely(!size)) 511 return 0; 512 513 left = size; 514 data_start(i, &iter_head, &off); 515 *iter_headp = iter_head; 516 *offp = off; 517 if (off) { 518 left -= PAGE_SIZE - off; 519 if (left <= 0) { 520 pipe->bufs[iter_head & p_mask].len += size; 521 return size; 522 } 523 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; 524 iter_head++; 525 } 526 while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { 527 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; 528 struct page *page = alloc_page(GFP_USER); 529 if (!page) 530 break; 531 532 buf->ops = &default_pipe_buf_ops; 533 buf->flags = 0; 534 buf->page = page; 535 buf->offset = 0; 536 buf->len = min_t(ssize_t, left, PAGE_SIZE); 537 left -= buf->len; 538 iter_head++; 539 pipe->head = iter_head; 540 541 if (left == 0) 542 return size; 543 } 544 return size - left; 545} 546 547static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 548 struct iov_iter *i) 549{ 550 struct pipe_inode_info *pipe = i->pipe; 551 unsigned int p_mask = pipe->ring_size - 1; 552 unsigned int i_head; 553 size_t n, off; 554 555 if (!sanity(i)) 556 return 0; 557 558 bytes = n = push_pipe(i, bytes, &i_head, &off); 559 if (unlikely(!n)) 560 return 0; 561 do { 562 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 563 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); 564 i->head = i_head; 565 i->iov_offset = off + chunk; 566 n -= chunk; 567 addr += chunk; 568 off = 0; 569 i_head++; 570 } while (n); 571 i->count -= bytes; 572 return bytes; 573} 574 575static __wsum csum_and_memcpy(void *to, const void *from, size_t len, 576 __wsum sum, size_t off) 577{ 578 __wsum next = csum_partial_copy_nocheck(from, to, len); 579 return csum_block_add(sum, next, off); 580} 581 582static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, 583 struct csum_state *csstate, 584 struct iov_iter *i) 585{ 586 struct pipe_inode_info *pipe = i->pipe; 587 unsigned int p_mask = pipe->ring_size - 1; 588 __wsum sum = csstate->csum; 589 size_t off = csstate->off; 590 unsigned int i_head; 591 size_t n, r; 592 593 if (!sanity(i)) 594 return 0; 595 596 bytes = n = push_pipe(i, bytes, &i_head, &r); 597 if (unlikely(!n)) 598 return 0; 599 do { 600 size_t chunk = min_t(size_t, n, PAGE_SIZE - r); 601 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page); 602 sum = csum_and_memcpy(p + r, addr, chunk, sum, off); 603 kunmap_atomic(p); 604 i->head = i_head; 605 i->iov_offset = r + chunk; 606 n -= chunk; 607 off += chunk; 608 addr += chunk; 609 r = 0; 610 i_head++; 611 } while (n); 612 i->count -= bytes; 613 csstate->csum = sum; 614 csstate->off = off; 615 return bytes; 616} 617 618size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 619{ 620 const char *from = addr; 621 if (unlikely(iov_iter_is_pipe(i))) 622 return copy_pipe_to_iter(addr, bytes, i); 623 if (iter_is_iovec(i)) 624 might_fault(); 625 iterate_and_advance(i, bytes, v, 626 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 627 memcpy_to_page(v.bv_page, v.bv_offset, 628 (from += v.bv_len) - v.bv_len, v.bv_len), 629 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 630 ) 631 632 return bytes; 633} 634EXPORT_SYMBOL(_copy_to_iter); 635 636#ifdef CONFIG_ARCH_HAS_COPY_MC 637static int copyout_mc(void __user *to, const void *from, size_t n) 638{ 639 if (access_ok(to, n)) { 640 instrument_copy_to_user(to, from, n); 641 n = copy_mc_to_user((__force void *) to, from, n); 642 } 643 return n; 644} 645 646static unsigned long copy_mc_to_page(struct page *page, size_t offset, 647 const char *from, size_t len) 648{ 649 unsigned long ret; 650 char *to; 651 652 to = kmap_atomic(page); 653 ret = copy_mc_to_kernel(to + offset, from, len); 654 kunmap_atomic(to); 655 656 return ret; 657} 658 659static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, 660 struct iov_iter *i) 661{ 662 struct pipe_inode_info *pipe = i->pipe; 663 unsigned int p_mask = pipe->ring_size - 1; 664 unsigned int i_head; 665 size_t n, off, xfer = 0; 666 667 if (!sanity(i)) 668 return 0; 669 670 bytes = n = push_pipe(i, bytes, &i_head, &off); 671 if (unlikely(!n)) 672 return 0; 673 do { 674 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 675 unsigned long rem; 676 677 rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, 678 off, addr, chunk); 679 i->head = i_head; 680 i->iov_offset = off + chunk - rem; 681 xfer += chunk - rem; 682 if (rem) 683 break; 684 n -= chunk; 685 addr += chunk; 686 off = 0; 687 i_head++; 688 } while (n); 689 i->count -= xfer; 690 return xfer; 691} 692 693/** 694 * _copy_mc_to_iter - copy to iter with source memory error exception handling 695 * @addr: source kernel address 696 * @bytes: total transfer length 697 * @iter: destination iterator 698 * 699 * The pmem driver deploys this for the dax operation 700 * (dax_copy_to_iter()) for dax reads (bypass page-cache and the 701 * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes 702 * successfully copied. 703 * 704 * The main differences between this and typical _copy_to_iter(). 705 * 706 * * Typical tail/residue handling after a fault retries the copy 707 * byte-by-byte until the fault happens again. Re-triggering machine 708 * checks is potentially fatal so the implementation uses source 709 * alignment and poison alignment assumptions to avoid re-triggering 710 * hardware exceptions. 711 * 712 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. 713 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return 714 * a short copy. 715 */ 716size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 717{ 718 const char *from = addr; 719 unsigned long rem, curr_addr, s_addr = (unsigned long) addr; 720 721 if (unlikely(iov_iter_is_pipe(i))) 722 return copy_mc_pipe_to_iter(addr, bytes, i); 723 if (iter_is_iovec(i)) 724 might_fault(); 725 iterate_and_advance(i, bytes, v, 726 copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, 727 v.iov_len), 728 ({ 729 rem = copy_mc_to_page(v.bv_page, v.bv_offset, 730 (from += v.bv_len) - v.bv_len, v.bv_len); 731 if (rem) { 732 curr_addr = (unsigned long) from; 733 bytes = curr_addr - s_addr - rem; 734 return bytes; 735 } 736 }), 737 ({ 738 rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) 739 - v.iov_len, v.iov_len); 740 if (rem) { 741 curr_addr = (unsigned long) from; 742 bytes = curr_addr - s_addr - rem; 743 return bytes; 744 } 745 }) 746 ) 747 748 return bytes; 749} 750EXPORT_SYMBOL_GPL(_copy_mc_to_iter); 751#endif /* CONFIG_ARCH_HAS_COPY_MC */ 752 753size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 754{ 755 char *to = addr; 756 if (unlikely(iov_iter_is_pipe(i))) { 757 WARN_ON(1); 758 return 0; 759 } 760 if (iter_is_iovec(i)) 761 might_fault(); 762 iterate_and_advance(i, bytes, v, 763 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 764 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 765 v.bv_offset, v.bv_len), 766 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 767 ) 768 769 return bytes; 770} 771EXPORT_SYMBOL(_copy_from_iter); 772 773bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 774{ 775 char *to = addr; 776 if (unlikely(iov_iter_is_pipe(i))) { 777 WARN_ON(1); 778 return false; 779 } 780 if (unlikely(i->count < bytes)) 781 return false; 782 783 if (iter_is_iovec(i)) 784 might_fault(); 785 iterate_all_kinds(i, bytes, v, ({ 786 if (copyin((to += v.iov_len) - v.iov_len, 787 v.iov_base, v.iov_len)) 788 return false; 789 0;}), 790 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 791 v.bv_offset, v.bv_len), 792 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 793 ) 794 795 iov_iter_advance(i, bytes); 796 return true; 797} 798EXPORT_SYMBOL(_copy_from_iter_full); 799 800size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 801{ 802 char *to = addr; 803 if (unlikely(iov_iter_is_pipe(i))) { 804 WARN_ON(1); 805 return 0; 806 } 807 iterate_and_advance(i, bytes, v, 808 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 809 v.iov_base, v.iov_len), 810 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 811 v.bv_offset, v.bv_len), 812 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 813 ) 814 815 return bytes; 816} 817EXPORT_SYMBOL(_copy_from_iter_nocache); 818 819#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 820/** 821 * _copy_from_iter_flushcache - write destination through cpu cache 822 * @addr: destination kernel address 823 * @bytes: total transfer length 824 * @iter: source iterator 825 * 826 * The pmem driver arranges for filesystem-dax to use this facility via 827 * dax_copy_from_iter() for ensuring that writes to persistent memory 828 * are flushed through the CPU cache. It is differentiated from 829 * _copy_from_iter_nocache() in that guarantees all data is flushed for 830 * all iterator types. The _copy_from_iter_nocache() only attempts to 831 * bypass the cache for the ITER_IOVEC case, and on some archs may use 832 * instructions that strand dirty-data in the cache. 833 */ 834size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 835{ 836 char *to = addr; 837 if (unlikely(iov_iter_is_pipe(i))) { 838 WARN_ON(1); 839 return 0; 840 } 841 iterate_and_advance(i, bytes, v, 842 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, 843 v.iov_base, v.iov_len), 844 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, 845 v.bv_offset, v.bv_len), 846 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, 847 v.iov_len) 848 ) 849 850 return bytes; 851} 852EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); 853#endif 854 855bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 856{ 857 char *to = addr; 858 if (unlikely(iov_iter_is_pipe(i))) { 859 WARN_ON(1); 860 return false; 861 } 862 if (unlikely(i->count < bytes)) 863 return false; 864 iterate_all_kinds(i, bytes, v, ({ 865 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 866 v.iov_base, v.iov_len)) 867 return false; 868 0;}), 869 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 870 v.bv_offset, v.bv_len), 871 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 872 ) 873 874 iov_iter_advance(i, bytes); 875 return true; 876} 877EXPORT_SYMBOL(_copy_from_iter_full_nocache); 878 879static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) 880{ 881 struct page *head; 882 size_t v = n + offset; 883 884 /* 885 * The general case needs to access the page order in order 886 * to compute the page size. 887 * However, we mostly deal with order-0 pages and thus can 888 * avoid a possible cache line miss for requests that fit all 889 * page orders. 890 */ 891 if (n <= v && v <= PAGE_SIZE) 892 return true; 893 894 head = compound_head(page); 895 v += (page - head) << PAGE_SHIFT; 896 897 if (likely(n <= v && v <= (page_size(head)))) 898 return true; 899 WARN_ON(1); 900 return false; 901} 902 903size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 904 struct iov_iter *i) 905{ 906 if (unlikely(!page_copy_sane(page, offset, bytes))) 907 return 0; 908 if (i->type & (ITER_BVEC|ITER_KVEC)) { 909 void *kaddr = kmap_atomic(page); 910 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 911 kunmap_atomic(kaddr); 912 return wanted; 913 } else if (unlikely(iov_iter_is_discard(i))) { 914 if (unlikely(i->count < bytes)) 915 bytes = i->count; 916 i->count -= bytes; 917 return bytes; 918 } else if (likely(!iov_iter_is_pipe(i))) 919 return copy_page_to_iter_iovec(page, offset, bytes, i); 920 else 921 return copy_page_to_iter_pipe(page, offset, bytes, i); 922} 923EXPORT_SYMBOL(copy_page_to_iter); 924 925size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 926 struct iov_iter *i) 927{ 928 if (unlikely(!page_copy_sane(page, offset, bytes))) 929 return 0; 930 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 931 WARN_ON(1); 932 return 0; 933 } 934 if (i->type & (ITER_BVEC|ITER_KVEC)) { 935 void *kaddr = kmap_atomic(page); 936 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); 937 kunmap_atomic(kaddr); 938 return wanted; 939 } else 940 return copy_page_from_iter_iovec(page, offset, bytes, i); 941} 942EXPORT_SYMBOL(copy_page_from_iter); 943 944static size_t pipe_zero(size_t bytes, struct iov_iter *i) 945{ 946 struct pipe_inode_info *pipe = i->pipe; 947 unsigned int p_mask = pipe->ring_size - 1; 948 unsigned int i_head; 949 size_t n, off; 950 951 if (!sanity(i)) 952 return 0; 953 954 bytes = n = push_pipe(i, bytes, &i_head, &off); 955 if (unlikely(!n)) 956 return 0; 957 958 do { 959 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 960 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk); 961 i->head = i_head; 962 i->iov_offset = off + chunk; 963 n -= chunk; 964 off = 0; 965 i_head++; 966 } while (n); 967 i->count -= bytes; 968 return bytes; 969} 970 971size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 972{ 973 if (unlikely(iov_iter_is_pipe(i))) 974 return pipe_zero(bytes, i); 975 iterate_and_advance(i, bytes, v, 976 clear_user(v.iov_base, v.iov_len), 977 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 978 memset(v.iov_base, 0, v.iov_len) 979 ) 980 981 return bytes; 982} 983EXPORT_SYMBOL(iov_iter_zero); 984 985size_t iov_iter_copy_from_user_atomic(struct page *page, 986 struct iov_iter *i, unsigned long offset, size_t bytes) 987{ 988 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 989 if (unlikely(!page_copy_sane(page, offset, bytes))) { 990 kunmap_atomic(kaddr); 991 return 0; 992 } 993 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 994 kunmap_atomic(kaddr); 995 WARN_ON(1); 996 return 0; 997 } 998 iterate_all_kinds(i, bytes, v, 999 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 1000 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 1001 v.bv_offset, v.bv_len), 1002 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 1003 ) 1004 kunmap_atomic(kaddr); 1005 return bytes; 1006} 1007EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 1008 1009static inline void pipe_truncate(struct iov_iter *i) 1010{ 1011 struct pipe_inode_info *pipe = i->pipe; 1012 unsigned int p_tail = pipe->tail; 1013 unsigned int p_head = pipe->head; 1014 unsigned int p_mask = pipe->ring_size - 1; 1015 1016 if (!pipe_empty(p_head, p_tail)) { 1017 struct pipe_buffer *buf; 1018 unsigned int i_head = i->head; 1019 size_t off = i->iov_offset; 1020 1021 if (off) { 1022 buf = &pipe->bufs[i_head & p_mask]; 1023 buf->len = off - buf->offset; 1024 i_head++; 1025 } 1026 while (p_head != i_head) { 1027 p_head--; 1028 pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); 1029 } 1030 1031 pipe->head = p_head; 1032 } 1033} 1034 1035static void pipe_advance(struct iov_iter *i, size_t size) 1036{ 1037 struct pipe_inode_info *pipe = i->pipe; 1038 if (unlikely(i->count < size)) 1039 size = i->count; 1040 if (size) { 1041 struct pipe_buffer *buf; 1042 unsigned int p_mask = pipe->ring_size - 1; 1043 unsigned int i_head = i->head; 1044 size_t off = i->iov_offset, left = size; 1045 1046 if (off) /* make it relative to the beginning of buffer */ 1047 left += off - pipe->bufs[i_head & p_mask].offset; 1048 while (1) { 1049 buf = &pipe->bufs[i_head & p_mask]; 1050 if (left <= buf->len) 1051 break; 1052 left -= buf->len; 1053 i_head++; 1054 } 1055 i->head = i_head; 1056 i->iov_offset = buf->offset + left; 1057 } 1058 i->count -= size; 1059 /* ... and discard everything past that point */ 1060 pipe_truncate(i); 1061} 1062 1063void iov_iter_advance(struct iov_iter *i, size_t size) 1064{ 1065 if (unlikely(iov_iter_is_pipe(i))) { 1066 pipe_advance(i, size); 1067 return; 1068 } 1069 if (unlikely(iov_iter_is_discard(i))) { 1070 i->count -= size; 1071 return; 1072 } 1073 iterate_and_advance(i, size, v, 0, 0, 0) 1074} 1075EXPORT_SYMBOL(iov_iter_advance); 1076 1077void iov_iter_revert(struct iov_iter *i, size_t unroll) 1078{ 1079 if (!unroll) 1080 return; 1081 if (WARN_ON(unroll > MAX_RW_COUNT)) 1082 return; 1083 i->count += unroll; 1084 if (unlikely(iov_iter_is_pipe(i))) { 1085 struct pipe_inode_info *pipe = i->pipe; 1086 unsigned int p_mask = pipe->ring_size - 1; 1087 unsigned int i_head = i->head; 1088 size_t off = i->iov_offset; 1089 while (1) { 1090 struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; 1091 size_t n = off - b->offset; 1092 if (unroll < n) { 1093 off -= unroll; 1094 break; 1095 } 1096 unroll -= n; 1097 if (!unroll && i_head == i->start_head) { 1098 off = 0; 1099 break; 1100 } 1101 i_head--; 1102 b = &pipe->bufs[i_head & p_mask]; 1103 off = b->offset + b->len; 1104 } 1105 i->iov_offset = off; 1106 i->head = i_head; 1107 pipe_truncate(i); 1108 return; 1109 } 1110 if (unlikely(iov_iter_is_discard(i))) 1111 return; 1112 if (unroll <= i->iov_offset) { 1113 i->iov_offset -= unroll; 1114 return; 1115 } 1116 unroll -= i->iov_offset; 1117 if (iov_iter_is_bvec(i)) { 1118 const struct bio_vec *bvec = i->bvec; 1119 while (1) { 1120 size_t n = (--bvec)->bv_len; 1121 i->nr_segs++; 1122 if (unroll <= n) { 1123 i->bvec = bvec; 1124 i->iov_offset = n - unroll; 1125 return; 1126 } 1127 unroll -= n; 1128 } 1129 } else { /* same logics for iovec and kvec */ 1130 const struct iovec *iov = i->iov; 1131 while (1) { 1132 size_t n = (--iov)->iov_len; 1133 i->nr_segs++; 1134 if (unroll <= n) { 1135 i->iov = iov; 1136 i->iov_offset = n - unroll; 1137 return; 1138 } 1139 unroll -= n; 1140 } 1141 } 1142} 1143EXPORT_SYMBOL(iov_iter_revert); 1144 1145/* 1146 * Return the count of just the current iov_iter segment. 1147 */ 1148size_t iov_iter_single_seg_count(const struct iov_iter *i) 1149{ 1150 if (unlikely(iov_iter_is_pipe(i))) 1151 return i->count; // it is a silly place, anyway 1152 if (i->nr_segs == 1) 1153 return i->count; 1154 if (unlikely(iov_iter_is_discard(i))) 1155 return i->count; 1156 else if (iov_iter_is_bvec(i)) 1157 return min(i->count, i->bvec->bv_len - i->iov_offset); 1158 else 1159 return min(i->count, i->iov->iov_len - i->iov_offset); 1160} 1161EXPORT_SYMBOL(iov_iter_single_seg_count); 1162 1163void iov_iter_kvec(struct iov_iter *i, unsigned int direction, 1164 const struct kvec *kvec, unsigned long nr_segs, 1165 size_t count) 1166{ 1167 WARN_ON(direction & ~(READ | WRITE)); 1168 i->type = ITER_KVEC | (direction & (READ | WRITE)); 1169 i->kvec = kvec; 1170 i->nr_segs = nr_segs; 1171 i->iov_offset = 0; 1172 i->count = count; 1173} 1174EXPORT_SYMBOL(iov_iter_kvec); 1175 1176void iov_iter_bvec(struct iov_iter *i, unsigned int direction, 1177 const struct bio_vec *bvec, unsigned long nr_segs, 1178 size_t count) 1179{ 1180 WARN_ON(direction & ~(READ | WRITE)); 1181 i->type = ITER_BVEC | (direction & (READ | WRITE)); 1182 i->bvec = bvec; 1183 i->nr_segs = nr_segs; 1184 i->iov_offset = 0; 1185 i->count = count; 1186} 1187EXPORT_SYMBOL(iov_iter_bvec); 1188 1189void iov_iter_pipe(struct iov_iter *i, unsigned int direction, 1190 struct pipe_inode_info *pipe, 1191 size_t count) 1192{ 1193 BUG_ON(direction != READ); 1194 WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); 1195 i->type = ITER_PIPE | READ; 1196 i->pipe = pipe; 1197 i->head = pipe->head; 1198 i->iov_offset = 0; 1199 i->count = count; 1200 i->start_head = i->head; 1201} 1202EXPORT_SYMBOL(iov_iter_pipe); 1203 1204/** 1205 * iov_iter_discard - Initialise an I/O iterator that discards data 1206 * @i: The iterator to initialise. 1207 * @direction: The direction of the transfer. 1208 * @count: The size of the I/O buffer in bytes. 1209 * 1210 * Set up an I/O iterator that just discards everything that's written to it. 1211 * It's only available as a READ iterator. 1212 */ 1213void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) 1214{ 1215 BUG_ON(direction != READ); 1216 i->type = ITER_DISCARD | READ; 1217 i->count = count; 1218 i->iov_offset = 0; 1219} 1220EXPORT_SYMBOL(iov_iter_discard); 1221 1222unsigned long iov_iter_alignment(const struct iov_iter *i) 1223{ 1224 unsigned long res = 0; 1225 size_t size = i->count; 1226 1227 if (unlikely(iov_iter_is_pipe(i))) { 1228 unsigned int p_mask = i->pipe->ring_size - 1; 1229 1230 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) 1231 return size | i->iov_offset; 1232 return size; 1233 } 1234 iterate_all_kinds(i, size, v, 1235 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 1236 res |= v.bv_offset | v.bv_len, 1237 res |= (unsigned long)v.iov_base | v.iov_len 1238 ) 1239 return res; 1240} 1241EXPORT_SYMBOL(iov_iter_alignment); 1242 1243unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 1244{ 1245 unsigned long res = 0; 1246 size_t size = i->count; 1247 1248 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1249 WARN_ON(1); 1250 return ~0U; 1251 } 1252 1253 iterate_all_kinds(i, size, v, 1254 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1255 (size != v.iov_len ? size : 0), 0), 1256 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 1257 (size != v.bv_len ? size : 0)), 1258 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1259 (size != v.iov_len ? size : 0)) 1260 ); 1261 return res; 1262} 1263EXPORT_SYMBOL(iov_iter_gap_alignment); 1264 1265static inline ssize_t __pipe_get_pages(struct iov_iter *i, 1266 size_t maxsize, 1267 struct page **pages, 1268 int iter_head, 1269 size_t *start) 1270{ 1271 struct pipe_inode_info *pipe = i->pipe; 1272 unsigned int p_mask = pipe->ring_size - 1; 1273 ssize_t n = push_pipe(i, maxsize, &iter_head, start); 1274 if (!n) 1275 return -EFAULT; 1276 1277 maxsize = n; 1278 n += *start; 1279 while (n > 0) { 1280 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); 1281 iter_head++; 1282 n -= PAGE_SIZE; 1283 } 1284 1285 return maxsize; 1286} 1287 1288static ssize_t pipe_get_pages(struct iov_iter *i, 1289 struct page **pages, size_t maxsize, unsigned maxpages, 1290 size_t *start) 1291{ 1292 unsigned int iter_head, npages; 1293 size_t capacity; 1294 1295 if (!maxsize) 1296 return 0; 1297 1298 if (!sanity(i)) 1299 return -EFAULT; 1300 1301 data_start(i, &iter_head, start); 1302 /* Amount of free space: some of this one + all after this one */ 1303 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1304 capacity = min(npages, maxpages) * PAGE_SIZE - *start; 1305 1306 return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); 1307} 1308 1309ssize_t iov_iter_get_pages(struct iov_iter *i, 1310 struct page **pages, size_t maxsize, unsigned maxpages, 1311 size_t *start) 1312{ 1313 if (maxsize > i->count) 1314 maxsize = i->count; 1315 1316 if (unlikely(iov_iter_is_pipe(i))) 1317 return pipe_get_pages(i, pages, maxsize, maxpages, start); 1318 if (unlikely(iov_iter_is_discard(i))) 1319 return -EFAULT; 1320 1321 iterate_all_kinds(i, maxsize, v, ({ 1322 unsigned long addr = (unsigned long)v.iov_base; 1323 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1324 int n; 1325 int res; 1326 1327 if (len > maxpages * PAGE_SIZE) 1328 len = maxpages * PAGE_SIZE; 1329 addr &= ~(PAGE_SIZE - 1); 1330 n = DIV_ROUND_UP(len, PAGE_SIZE); 1331 res = get_user_pages_fast(addr, n, 1332 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, 1333 pages); 1334 if (unlikely(res <= 0)) 1335 return res; 1336 return (res == n ? len : res * PAGE_SIZE) - *start; 1337 0;}),({ 1338 /* can't be more than PAGE_SIZE */ 1339 *start = v.bv_offset; 1340 get_page(*pages = v.bv_page); 1341 return v.bv_len; 1342 }),({ 1343 return -EFAULT; 1344 }) 1345 ) 1346 return 0; 1347} 1348EXPORT_SYMBOL(iov_iter_get_pages); 1349 1350static struct page **get_pages_array(size_t n) 1351{ 1352 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); 1353} 1354 1355static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 1356 struct page ***pages, size_t maxsize, 1357 size_t *start) 1358{ 1359 struct page **p; 1360 unsigned int iter_head, npages; 1361 ssize_t n; 1362 1363 if (!maxsize) 1364 return 0; 1365 1366 if (!sanity(i)) 1367 return -EFAULT; 1368 1369 data_start(i, &iter_head, start); 1370 /* Amount of free space: some of this one + all after this one */ 1371 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1372 n = npages * PAGE_SIZE - *start; 1373 if (maxsize > n) 1374 maxsize = n; 1375 else 1376 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1377 p = get_pages_array(npages); 1378 if (!p) 1379 return -ENOMEM; 1380 n = __pipe_get_pages(i, maxsize, p, iter_head, start); 1381 if (n > 0) 1382 *pages = p; 1383 else 1384 kvfree(p); 1385 return n; 1386} 1387 1388ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1389 struct page ***pages, size_t maxsize, 1390 size_t *start) 1391{ 1392 struct page **p; 1393 1394 if (maxsize > i->count) 1395 maxsize = i->count; 1396 1397 if (unlikely(iov_iter_is_pipe(i))) 1398 return pipe_get_pages_alloc(i, pages, maxsize, start); 1399 if (unlikely(iov_iter_is_discard(i))) 1400 return -EFAULT; 1401 1402 iterate_all_kinds(i, maxsize, v, ({ 1403 unsigned long addr = (unsigned long)v.iov_base; 1404 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1405 int n; 1406 int res; 1407 1408 addr &= ~(PAGE_SIZE - 1); 1409 n = DIV_ROUND_UP(len, PAGE_SIZE); 1410 p = get_pages_array(n); 1411 if (!p) 1412 return -ENOMEM; 1413 res = get_user_pages_fast(addr, n, 1414 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); 1415 if (unlikely(res <= 0)) { 1416 kvfree(p); 1417 *pages = NULL; 1418 return res; 1419 } 1420 *pages = p; 1421 return (res == n ? len : res * PAGE_SIZE) - *start; 1422 0;}),({ 1423 /* can't be more than PAGE_SIZE */ 1424 *start = v.bv_offset; 1425 *pages = p = get_pages_array(1); 1426 if (!p) 1427 return -ENOMEM; 1428 get_page(*p = v.bv_page); 1429 return v.bv_len; 1430 }),({ 1431 return -EFAULT; 1432 }) 1433 ) 1434 return 0; 1435} 1436EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1437 1438size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1439 struct iov_iter *i) 1440{ 1441 char *to = addr; 1442 __wsum sum, next; 1443 size_t off = 0; 1444 sum = *csum; 1445 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1446 WARN_ON(1); 1447 return 0; 1448 } 1449 iterate_and_advance(i, bytes, v, ({ 1450 next = csum_and_copy_from_user(v.iov_base, 1451 (to += v.iov_len) - v.iov_len, 1452 v.iov_len); 1453 if (next) { 1454 sum = csum_block_add(sum, next, off); 1455 off += v.iov_len; 1456 } 1457 next ? 0 : v.iov_len; 1458 }), ({ 1459 char *p = kmap_atomic(v.bv_page); 1460 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1461 p + v.bv_offset, v.bv_len, 1462 sum, off); 1463 kunmap_atomic(p); 1464 off += v.bv_len; 1465 }),({ 1466 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1467 v.iov_base, v.iov_len, 1468 sum, off); 1469 off += v.iov_len; 1470 }) 1471 ) 1472 *csum = sum; 1473 return bytes; 1474} 1475EXPORT_SYMBOL(csum_and_copy_from_iter); 1476 1477bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, 1478 struct iov_iter *i) 1479{ 1480 char *to = addr; 1481 __wsum sum, next; 1482 size_t off = 0; 1483 sum = *csum; 1484 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1485 WARN_ON(1); 1486 return false; 1487 } 1488 if (unlikely(i->count < bytes)) 1489 return false; 1490 iterate_all_kinds(i, bytes, v, ({ 1491 next = csum_and_copy_from_user(v.iov_base, 1492 (to += v.iov_len) - v.iov_len, 1493 v.iov_len); 1494 if (!next) 1495 return false; 1496 sum = csum_block_add(sum, next, off); 1497 off += v.iov_len; 1498 0; 1499 }), ({ 1500 char *p = kmap_atomic(v.bv_page); 1501 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1502 p + v.bv_offset, v.bv_len, 1503 sum, off); 1504 kunmap_atomic(p); 1505 off += v.bv_len; 1506 }),({ 1507 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1508 v.iov_base, v.iov_len, 1509 sum, off); 1510 off += v.iov_len; 1511 }) 1512 ) 1513 *csum = sum; 1514 iov_iter_advance(i, bytes); 1515 return true; 1516} 1517EXPORT_SYMBOL(csum_and_copy_from_iter_full); 1518 1519size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, 1520 struct iov_iter *i) 1521{ 1522 struct csum_state *csstate = _csstate; 1523 const char *from = addr; 1524 __wsum sum, next; 1525 size_t off; 1526 1527 if (unlikely(iov_iter_is_pipe(i))) 1528 return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); 1529 1530 sum = csstate->csum; 1531 off = csstate->off; 1532 if (unlikely(iov_iter_is_discard(i))) { 1533 WARN_ON(1); /* for now */ 1534 return 0; 1535 } 1536 iterate_and_advance(i, bytes, v, ({ 1537 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1538 v.iov_base, 1539 v.iov_len); 1540 if (next) { 1541 sum = csum_block_add(sum, next, off); 1542 off += v.iov_len; 1543 } 1544 next ? 0 : v.iov_len; 1545 }), ({ 1546 char *p = kmap_atomic(v.bv_page); 1547 sum = csum_and_memcpy(p + v.bv_offset, 1548 (from += v.bv_len) - v.bv_len, 1549 v.bv_len, sum, off); 1550 kunmap_atomic(p); 1551 off += v.bv_len; 1552 }),({ 1553 sum = csum_and_memcpy(v.iov_base, 1554 (from += v.iov_len) - v.iov_len, 1555 v.iov_len, sum, off); 1556 off += v.iov_len; 1557 }) 1558 ) 1559 csstate->csum = sum; 1560 csstate->off = off; 1561 return bytes; 1562} 1563EXPORT_SYMBOL(csum_and_copy_to_iter); 1564 1565size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, 1566 struct iov_iter *i) 1567{ 1568#ifdef CONFIG_CRYPTO_HASH 1569 struct ahash_request *hash = hashp; 1570 struct scatterlist sg; 1571 size_t copied; 1572 1573 copied = copy_to_iter(addr, bytes, i); 1574 sg_init_one(&sg, addr, copied); 1575 ahash_request_set_crypt(hash, &sg, NULL, copied); 1576 crypto_ahash_update(hash); 1577 return copied; 1578#else 1579 return 0; 1580#endif 1581} 1582EXPORT_SYMBOL(hash_and_copy_to_iter); 1583 1584int iov_iter_npages(const struct iov_iter *i, int maxpages) 1585{ 1586 size_t size = i->count; 1587 int npages = 0; 1588 1589 if (!size) 1590 return 0; 1591 if (unlikely(iov_iter_is_discard(i))) 1592 return 0; 1593 1594 if (unlikely(iov_iter_is_pipe(i))) { 1595 struct pipe_inode_info *pipe = i->pipe; 1596 unsigned int iter_head; 1597 size_t off; 1598 1599 if (!sanity(i)) 1600 return 0; 1601 1602 data_start(i, &iter_head, &off); 1603 /* some of this one + all after this one */ 1604 npages = pipe_space_for_user(iter_head, pipe->tail, pipe); 1605 if (npages >= maxpages) 1606 return maxpages; 1607 } else iterate_all_kinds(i, size, v, ({ 1608 unsigned long p = (unsigned long)v.iov_base; 1609 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1610 - p / PAGE_SIZE; 1611 if (npages >= maxpages) 1612 return maxpages; 1613 0;}),({ 1614 npages++; 1615 if (npages >= maxpages) 1616 return maxpages; 1617 }),({ 1618 unsigned long p = (unsigned long)v.iov_base; 1619 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1620 - p / PAGE_SIZE; 1621 if (npages >= maxpages) 1622 return maxpages; 1623 }) 1624 ) 1625 return npages; 1626} 1627EXPORT_SYMBOL(iov_iter_npages); 1628 1629const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1630{ 1631 *new = *old; 1632 if (unlikely(iov_iter_is_pipe(new))) { 1633 WARN_ON(1); 1634 return NULL; 1635 } 1636 if (unlikely(iov_iter_is_discard(new))) 1637 return NULL; 1638 if (iov_iter_is_bvec(new)) 1639 return new->bvec = kmemdup(new->bvec, 1640 new->nr_segs * sizeof(struct bio_vec), 1641 flags); 1642 else 1643 /* iovec and kvec have identical layout */ 1644 return new->iov = kmemdup(new->iov, 1645 new->nr_segs * sizeof(struct iovec), 1646 flags); 1647} 1648EXPORT_SYMBOL(dup_iter); 1649 1650static int copy_compat_iovec_from_user(struct iovec *iov, 1651 const struct iovec __user *uvec, unsigned long nr_segs) 1652{ 1653 const struct compat_iovec __user *uiov = 1654 (const struct compat_iovec __user *)uvec; 1655 int ret = -EFAULT, i; 1656 1657 if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) 1658 return -EFAULT; 1659 1660 for (i = 0; i < nr_segs; i++) { 1661 compat_uptr_t buf; 1662 compat_ssize_t len; 1663 1664 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end); 1665 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end); 1666 1667 /* check for compat_size_t not fitting in compat_ssize_t .. */ 1668 if (len < 0) { 1669 ret = -EINVAL; 1670 goto uaccess_end; 1671 } 1672 iov[i].iov_base = compat_ptr(buf); 1673 iov[i].iov_len = len; 1674 } 1675 1676 ret = 0; 1677uaccess_end: 1678 user_access_end(); 1679 return ret; 1680} 1681 1682static int copy_iovec_from_user(struct iovec *iov, 1683 const struct iovec __user *uvec, unsigned long nr_segs) 1684{ 1685 unsigned long seg; 1686 1687 if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec))) 1688 return -EFAULT; 1689 for (seg = 0; seg < nr_segs; seg++) { 1690 if ((ssize_t)iov[seg].iov_len < 0) 1691 return -EINVAL; 1692 } 1693 1694 return 0; 1695} 1696 1697struct iovec *iovec_from_user(const struct iovec __user *uvec, 1698 unsigned long nr_segs, unsigned long fast_segs, 1699 struct iovec *fast_iov, bool compat) 1700{ 1701 struct iovec *iov = fast_iov; 1702 int ret; 1703 1704 /* 1705 * SuS says "The readv() function *may* fail if the iovcnt argument was 1706 * less than or equal to 0, or greater than {IOV_MAX}. Linux has 1707 * traditionally returned zero for zero segments, so... 1708 */ 1709 if (nr_segs == 0) 1710 return iov; 1711 if (nr_segs > UIO_MAXIOV) 1712 return ERR_PTR(-EINVAL); 1713 if (nr_segs > fast_segs) { 1714 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); 1715 if (!iov) 1716 return ERR_PTR(-ENOMEM); 1717 } 1718 1719 if (compat) 1720 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs); 1721 else 1722 ret = copy_iovec_from_user(iov, uvec, nr_segs); 1723 if (ret) { 1724 if (iov != fast_iov) 1725 kfree(iov); 1726 return ERR_PTR(ret); 1727 } 1728 1729 return iov; 1730} 1731 1732ssize_t __import_iovec(int type, const struct iovec __user *uvec, 1733 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, 1734 struct iov_iter *i, bool compat) 1735{ 1736 ssize_t total_len = 0; 1737 unsigned long seg; 1738 struct iovec *iov; 1739 1740 iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat); 1741 if (IS_ERR(iov)) { 1742 *iovp = NULL; 1743 return PTR_ERR(iov); 1744 } 1745 1746 /* 1747 * According to the Single Unix Specification we should return EINVAL if 1748 * an element length is < 0 when cast to ssize_t or if the total length 1749 * would overflow the ssize_t return value of the system call. 1750 * 1751 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 1752 * overflow case. 1753 */ 1754 for (seg = 0; seg < nr_segs; seg++) { 1755 ssize_t len = (ssize_t)iov[seg].iov_len; 1756 1757 if (!access_ok(iov[seg].iov_base, len)) { 1758 if (iov != *iovp) 1759 kfree(iov); 1760 *iovp = NULL; 1761 return -EFAULT; 1762 } 1763 1764 if (len > MAX_RW_COUNT - total_len) { 1765 len = MAX_RW_COUNT - total_len; 1766 iov[seg].iov_len = len; 1767 } 1768 total_len += len; 1769 } 1770 1771 iov_iter_init(i, type, iov, nr_segs, total_len); 1772 if (iov == *iovp) 1773 *iovp = NULL; 1774 else 1775 *iovp = iov; 1776 return total_len; 1777} 1778 1779/** 1780 * import_iovec() - Copy an array of &struct iovec from userspace 1781 * into the kernel, check that it is valid, and initialize a new 1782 * &struct iov_iter iterator to access it. 1783 * 1784 * @type: One of %READ or %WRITE. 1785 * @uvec: Pointer to the userspace array. 1786 * @nr_segs: Number of elements in userspace array. 1787 * @fast_segs: Number of elements in @iov. 1788 * @iovp: (input and output parameter) Pointer to pointer to (usually small 1789 * on-stack) kernel array. 1790 * @i: Pointer to iterator that will be initialized on success. 1791 * 1792 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1793 * then this function places %NULL in *@iov on return. Otherwise, a new 1794 * array will be allocated and the result placed in *@iov. This means that 1795 * the caller may call kfree() on *@iov regardless of whether the small 1796 * on-stack array was used or not (and regardless of whether this function 1797 * returns an error or not). 1798 * 1799 * Return: Negative error code on error, bytes imported on success 1800 */ 1801ssize_t import_iovec(int type, const struct iovec __user *uvec, 1802 unsigned nr_segs, unsigned fast_segs, 1803 struct iovec **iovp, struct iov_iter *i) 1804{ 1805 return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, 1806 in_compat_syscall()); 1807} 1808EXPORT_SYMBOL(import_iovec); 1809 1810int import_single_range(int rw, void __user *buf, size_t len, 1811 struct iovec *iov, struct iov_iter *i) 1812{ 1813 if (len > MAX_RW_COUNT) 1814 len = MAX_RW_COUNT; 1815 if (unlikely(!access_ok(buf, len))) 1816 return -EFAULT; 1817 1818 iov->iov_base = buf; 1819 iov->iov_len = len; 1820 iov_iter_init(i, rw, iov, 1, len); 1821 return 0; 1822} 1823EXPORT_SYMBOL(import_single_range); 1824 1825/** 1826 * iov_iter_restore() - Restore a &struct iov_iter to the same state as when 1827 * iov_iter_save_state() was called. 1828 * 1829 * @i: &struct iov_iter to restore 1830 * @state: state to restore from 1831 * 1832 * Used after iov_iter_save_state() to bring restore @i, if operations may 1833 * have advanced it. 1834 * 1835 * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC 1836 */ 1837void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) 1838{ 1839 if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && 1840 !iov_iter_is_kvec(i)) 1841 return; 1842 i->iov_offset = state->iov_offset; 1843 i->count = state->count; 1844 /* 1845 * For the *vec iters, nr_segs + iov is constant - if we increment 1846 * the vec, then we also decrement the nr_segs count. Hence we don't 1847 * need to track both of these, just one is enough and we can deduct 1848 * the other from that. ITER_KVEC and ITER_IOVEC are the same struct 1849 * size, so we can just increment the iov pointer as they are unionzed. 1850 * ITER_BVEC _may_ be the same size on some archs, but on others it is 1851 * not. Be safe and handle it separately. 1852 */ 1853 BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); 1854 if (iov_iter_is_bvec(i)) 1855 i->bvec -= state->nr_segs - i->nr_segs; 1856 else 1857 i->iov -= state->nr_segs - i->nr_segs; 1858 i->nr_segs = state->nr_segs; 1859} 1860