162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * fs/proc/kcore.c kernel ELF core dumper 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Modelled on fs/exec.c:aout_core_dump() 662306a36Sopenharmony_ci * Jeremy Fitzhardinge <jeremy@sw.oz.au> 762306a36Sopenharmony_ci * ELF version written by David Howells <David.Howells@nexor.co.uk> 862306a36Sopenharmony_ci * Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com> 962306a36Sopenharmony_ci * Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com> 1062306a36Sopenharmony_ci * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com> 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <linux/crash_core.h> 1462306a36Sopenharmony_ci#include <linux/mm.h> 1562306a36Sopenharmony_ci#include <linux/proc_fs.h> 1662306a36Sopenharmony_ci#include <linux/kcore.h> 1762306a36Sopenharmony_ci#include <linux/user.h> 1862306a36Sopenharmony_ci#include <linux/capability.h> 1962306a36Sopenharmony_ci#include <linux/elf.h> 2062306a36Sopenharmony_ci#include <linux/elfcore.h> 2162306a36Sopenharmony_ci#include <linux/vmalloc.h> 2262306a36Sopenharmony_ci#include <linux/highmem.h> 2362306a36Sopenharmony_ci#include <linux/printk.h> 2462306a36Sopenharmony_ci#include <linux/memblock.h> 2562306a36Sopenharmony_ci#include <linux/init.h> 2662306a36Sopenharmony_ci#include <linux/slab.h> 2762306a36Sopenharmony_ci#include <linux/uio.h> 2862306a36Sopenharmony_ci#include <asm/io.h> 2962306a36Sopenharmony_ci#include <linux/list.h> 3062306a36Sopenharmony_ci#include <linux/ioport.h> 3162306a36Sopenharmony_ci#include <linux/memory.h> 3262306a36Sopenharmony_ci#include <linux/sched/task.h> 3362306a36Sopenharmony_ci#include <linux/security.h> 3462306a36Sopenharmony_ci#include <asm/sections.h> 3562306a36Sopenharmony_ci#include "internal.h" 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci#define CORE_STR "CORE" 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci#ifndef ELF_CORE_EFLAGS 4062306a36Sopenharmony_ci#define ELF_CORE_EFLAGS 0 4162306a36Sopenharmony_ci#endif 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_cistatic struct proc_dir_entry *proc_root_kcore; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci#ifndef kc_vaddr_to_offset 4762306a36Sopenharmony_ci#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) 4862306a36Sopenharmony_ci#endif 4962306a36Sopenharmony_ci#ifndef kc_offset_to_vaddr 5062306a36Sopenharmony_ci#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) 5162306a36Sopenharmony_ci#endif 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistatic LIST_HEAD(kclist_head); 5462306a36Sopenharmony_cistatic DECLARE_RWSEM(kclist_lock); 5562306a36Sopenharmony_cistatic int kcore_need_update = 1; 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci/* 5862306a36Sopenharmony_ci * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error 5962306a36Sopenharmony_ci * Same as oldmem_pfn_is_ram in vmcore 6062306a36Sopenharmony_ci */ 6162306a36Sopenharmony_cistatic int (*mem_pfn_is_ram)(unsigned long pfn); 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ciint __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn)) 6462306a36Sopenharmony_ci{ 6562306a36Sopenharmony_ci if (mem_pfn_is_ram) 6662306a36Sopenharmony_ci return -EBUSY; 6762306a36Sopenharmony_ci mem_pfn_is_ram = fn; 6862306a36Sopenharmony_ci return 0; 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_cistatic int pfn_is_ram(unsigned long pfn) 7262306a36Sopenharmony_ci{ 7362306a36Sopenharmony_ci if (mem_pfn_is_ram) 7462306a36Sopenharmony_ci return mem_pfn_is_ram(pfn); 7562306a36Sopenharmony_ci else 7662306a36Sopenharmony_ci return 1; 7762306a36Sopenharmony_ci} 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci/* This doesn't grab kclist_lock, so it should only be used at init time. */ 8062306a36Sopenharmony_civoid __init kclist_add(struct kcore_list *new, void *addr, size_t size, 8162306a36Sopenharmony_ci int type) 8262306a36Sopenharmony_ci{ 8362306a36Sopenharmony_ci new->addr = (unsigned long)addr; 8462306a36Sopenharmony_ci new->size = size; 8562306a36Sopenharmony_ci new->type = type; 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci list_add_tail(&new->list, &kclist_head); 8862306a36Sopenharmony_ci} 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_cistatic size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len, 9162306a36Sopenharmony_ci size_t *data_offset) 9262306a36Sopenharmony_ci{ 9362306a36Sopenharmony_ci size_t try, size; 9462306a36Sopenharmony_ci struct kcore_list *m; 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci *nphdr = 1; /* PT_NOTE */ 9762306a36Sopenharmony_ci size = 0; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci list_for_each_entry(m, &kclist_head, list) { 10062306a36Sopenharmony_ci try = kc_vaddr_to_offset((size_t)m->addr + m->size); 10162306a36Sopenharmony_ci if (try > size) 10262306a36Sopenharmony_ci size = try; 10362306a36Sopenharmony_ci *nphdr = *nphdr + 1; 10462306a36Sopenharmony_ci } 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci *phdrs_len = *nphdr * sizeof(struct elf_phdr); 10762306a36Sopenharmony_ci *notes_len = (4 * sizeof(struct elf_note) + 10862306a36Sopenharmony_ci 3 * ALIGN(sizeof(CORE_STR), 4) + 10962306a36Sopenharmony_ci VMCOREINFO_NOTE_NAME_BYTES + 11062306a36Sopenharmony_ci ALIGN(sizeof(struct elf_prstatus), 4) + 11162306a36Sopenharmony_ci ALIGN(sizeof(struct elf_prpsinfo), 4) + 11262306a36Sopenharmony_ci ALIGN(arch_task_struct_size, 4) + 11362306a36Sopenharmony_ci ALIGN(vmcoreinfo_size, 4)); 11462306a36Sopenharmony_ci *data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len + 11562306a36Sopenharmony_ci *notes_len); 11662306a36Sopenharmony_ci return *data_offset + size; 11762306a36Sopenharmony_ci} 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci#ifdef CONFIG_HIGHMEM 12062306a36Sopenharmony_ci/* 12162306a36Sopenharmony_ci * If no highmem, we can assume [0...max_low_pfn) continuous range of memory 12262306a36Sopenharmony_ci * because memory hole is not as big as !HIGHMEM case. 12362306a36Sopenharmony_ci * (HIGHMEM is special because part of memory is _invisible_ from the kernel.) 12462306a36Sopenharmony_ci */ 12562306a36Sopenharmony_cistatic int kcore_ram_list(struct list_head *head) 12662306a36Sopenharmony_ci{ 12762306a36Sopenharmony_ci struct kcore_list *ent; 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci ent = kmalloc(sizeof(*ent), GFP_KERNEL); 13062306a36Sopenharmony_ci if (!ent) 13162306a36Sopenharmony_ci return -ENOMEM; 13262306a36Sopenharmony_ci ent->addr = (unsigned long)__va(0); 13362306a36Sopenharmony_ci ent->size = max_low_pfn << PAGE_SHIFT; 13462306a36Sopenharmony_ci ent->type = KCORE_RAM; 13562306a36Sopenharmony_ci list_add(&ent->list, head); 13662306a36Sopenharmony_ci return 0; 13762306a36Sopenharmony_ci} 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci#else /* !CONFIG_HIGHMEM */ 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP 14262306a36Sopenharmony_ci/* calculate vmemmap's address from given system ram pfn and register it */ 14362306a36Sopenharmony_cistatic int 14462306a36Sopenharmony_ciget_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) 14562306a36Sopenharmony_ci{ 14662306a36Sopenharmony_ci unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; 14762306a36Sopenharmony_ci unsigned long nr_pages = ent->size >> PAGE_SHIFT; 14862306a36Sopenharmony_ci unsigned long start, end; 14962306a36Sopenharmony_ci struct kcore_list *vmm, *tmp; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK; 15362306a36Sopenharmony_ci end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1; 15462306a36Sopenharmony_ci end = PAGE_ALIGN(end); 15562306a36Sopenharmony_ci /* overlap check (because we have to align page */ 15662306a36Sopenharmony_ci list_for_each_entry(tmp, head, list) { 15762306a36Sopenharmony_ci if (tmp->type != KCORE_VMEMMAP) 15862306a36Sopenharmony_ci continue; 15962306a36Sopenharmony_ci if (start < tmp->addr + tmp->size) 16062306a36Sopenharmony_ci if (end > tmp->addr) 16162306a36Sopenharmony_ci end = tmp->addr; 16262306a36Sopenharmony_ci } 16362306a36Sopenharmony_ci if (start < end) { 16462306a36Sopenharmony_ci vmm = kmalloc(sizeof(*vmm), GFP_KERNEL); 16562306a36Sopenharmony_ci if (!vmm) 16662306a36Sopenharmony_ci return 0; 16762306a36Sopenharmony_ci vmm->addr = start; 16862306a36Sopenharmony_ci vmm->size = end - start; 16962306a36Sopenharmony_ci vmm->type = KCORE_VMEMMAP; 17062306a36Sopenharmony_ci list_add_tail(&vmm->list, head); 17162306a36Sopenharmony_ci } 17262306a36Sopenharmony_ci return 1; 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ci#else 17662306a36Sopenharmony_cistatic int 17762306a36Sopenharmony_ciget_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci return 1; 18062306a36Sopenharmony_ci} 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci#endif 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_cistatic int 18562306a36Sopenharmony_cikclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) 18662306a36Sopenharmony_ci{ 18762306a36Sopenharmony_ci struct list_head *head = (struct list_head *)arg; 18862306a36Sopenharmony_ci struct kcore_list *ent; 18962306a36Sopenharmony_ci struct page *p; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci if (!pfn_valid(pfn)) 19262306a36Sopenharmony_ci return 1; 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci p = pfn_to_page(pfn); 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci ent = kmalloc(sizeof(*ent), GFP_KERNEL); 19762306a36Sopenharmony_ci if (!ent) 19862306a36Sopenharmony_ci return -ENOMEM; 19962306a36Sopenharmony_ci ent->addr = (unsigned long)page_to_virt(p); 20062306a36Sopenharmony_ci ent->size = nr_pages << PAGE_SHIFT; 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci if (!virt_addr_valid((void *)ent->addr)) 20362306a36Sopenharmony_ci goto free_out; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci /* cut not-mapped area. ....from ppc-32 code. */ 20662306a36Sopenharmony_ci if (ULONG_MAX - ent->addr < ent->size) 20762306a36Sopenharmony_ci ent->size = ULONG_MAX - ent->addr; 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci /* 21062306a36Sopenharmony_ci * We've already checked virt_addr_valid so we know this address 21162306a36Sopenharmony_ci * is a valid pointer, therefore we can check against it to determine 21262306a36Sopenharmony_ci * if we need to trim 21362306a36Sopenharmony_ci */ 21462306a36Sopenharmony_ci if (VMALLOC_START > ent->addr) { 21562306a36Sopenharmony_ci if (VMALLOC_START - ent->addr < ent->size) 21662306a36Sopenharmony_ci ent->size = VMALLOC_START - ent->addr; 21762306a36Sopenharmony_ci } 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci ent->type = KCORE_RAM; 22062306a36Sopenharmony_ci list_add_tail(&ent->list, head); 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci if (!get_sparsemem_vmemmap_info(ent, head)) { 22362306a36Sopenharmony_ci list_del(&ent->list); 22462306a36Sopenharmony_ci goto free_out; 22562306a36Sopenharmony_ci } 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci return 0; 22862306a36Sopenharmony_cifree_out: 22962306a36Sopenharmony_ci kfree(ent); 23062306a36Sopenharmony_ci return 1; 23162306a36Sopenharmony_ci} 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_cistatic int kcore_ram_list(struct list_head *list) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci int nid, ret; 23662306a36Sopenharmony_ci unsigned long end_pfn; 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci /* Not inialized....update now */ 23962306a36Sopenharmony_ci /* find out "max pfn" */ 24062306a36Sopenharmony_ci end_pfn = 0; 24162306a36Sopenharmony_ci for_each_node_state(nid, N_MEMORY) { 24262306a36Sopenharmony_ci unsigned long node_end; 24362306a36Sopenharmony_ci node_end = node_end_pfn(nid); 24462306a36Sopenharmony_ci if (end_pfn < node_end) 24562306a36Sopenharmony_ci end_pfn = node_end; 24662306a36Sopenharmony_ci } 24762306a36Sopenharmony_ci /* scan 0 to max_pfn */ 24862306a36Sopenharmony_ci ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private); 24962306a36Sopenharmony_ci if (ret) 25062306a36Sopenharmony_ci return -ENOMEM; 25162306a36Sopenharmony_ci return 0; 25262306a36Sopenharmony_ci} 25362306a36Sopenharmony_ci#endif /* CONFIG_HIGHMEM */ 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_cistatic int kcore_update_ram(void) 25662306a36Sopenharmony_ci{ 25762306a36Sopenharmony_ci LIST_HEAD(list); 25862306a36Sopenharmony_ci LIST_HEAD(garbage); 25962306a36Sopenharmony_ci int nphdr; 26062306a36Sopenharmony_ci size_t phdrs_len, notes_len, data_offset; 26162306a36Sopenharmony_ci struct kcore_list *tmp, *pos; 26262306a36Sopenharmony_ci int ret = 0; 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci down_write(&kclist_lock); 26562306a36Sopenharmony_ci if (!xchg(&kcore_need_update, 0)) 26662306a36Sopenharmony_ci goto out; 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci ret = kcore_ram_list(&list); 26962306a36Sopenharmony_ci if (ret) { 27062306a36Sopenharmony_ci /* Couldn't get the RAM list, try again next time. */ 27162306a36Sopenharmony_ci WRITE_ONCE(kcore_need_update, 1); 27262306a36Sopenharmony_ci list_splice_tail(&list, &garbage); 27362306a36Sopenharmony_ci goto out; 27462306a36Sopenharmony_ci } 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci list_for_each_entry_safe(pos, tmp, &kclist_head, list) { 27762306a36Sopenharmony_ci if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP) 27862306a36Sopenharmony_ci list_move(&pos->list, &garbage); 27962306a36Sopenharmony_ci } 28062306a36Sopenharmony_ci list_splice_tail(&list, &kclist_head); 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, ¬es_len, 28362306a36Sopenharmony_ci &data_offset); 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ciout: 28662306a36Sopenharmony_ci up_write(&kclist_lock); 28762306a36Sopenharmony_ci list_for_each_entry_safe(pos, tmp, &garbage, list) { 28862306a36Sopenharmony_ci list_del(&pos->list); 28962306a36Sopenharmony_ci kfree(pos); 29062306a36Sopenharmony_ci } 29162306a36Sopenharmony_ci return ret; 29262306a36Sopenharmony_ci} 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_cistatic void append_kcore_note(char *notes, size_t *i, const char *name, 29562306a36Sopenharmony_ci unsigned int type, const void *desc, 29662306a36Sopenharmony_ci size_t descsz) 29762306a36Sopenharmony_ci{ 29862306a36Sopenharmony_ci struct elf_note *note = (struct elf_note *)¬es[*i]; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci note->n_namesz = strlen(name) + 1; 30162306a36Sopenharmony_ci note->n_descsz = descsz; 30262306a36Sopenharmony_ci note->n_type = type; 30362306a36Sopenharmony_ci *i += sizeof(*note); 30462306a36Sopenharmony_ci memcpy(¬es[*i], name, note->n_namesz); 30562306a36Sopenharmony_ci *i = ALIGN(*i + note->n_namesz, 4); 30662306a36Sopenharmony_ci memcpy(¬es[*i], desc, descsz); 30762306a36Sopenharmony_ci *i = ALIGN(*i + descsz, 4); 30862306a36Sopenharmony_ci} 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_cistatic ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) 31162306a36Sopenharmony_ci{ 31262306a36Sopenharmony_ci struct file *file = iocb->ki_filp; 31362306a36Sopenharmony_ci char *buf = file->private_data; 31462306a36Sopenharmony_ci loff_t *fpos = &iocb->ki_pos; 31562306a36Sopenharmony_ci size_t phdrs_offset, notes_offset, data_offset; 31662306a36Sopenharmony_ci size_t page_offline_frozen = 1; 31762306a36Sopenharmony_ci size_t phdrs_len, notes_len; 31862306a36Sopenharmony_ci struct kcore_list *m; 31962306a36Sopenharmony_ci size_t tsz; 32062306a36Sopenharmony_ci int nphdr; 32162306a36Sopenharmony_ci unsigned long start; 32262306a36Sopenharmony_ci size_t buflen = iov_iter_count(iter); 32362306a36Sopenharmony_ci size_t orig_buflen = buflen; 32462306a36Sopenharmony_ci int ret = 0; 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci down_read(&kclist_lock); 32762306a36Sopenharmony_ci /* 32862306a36Sopenharmony_ci * Don't race against drivers that set PageOffline() and expect no 32962306a36Sopenharmony_ci * further page access. 33062306a36Sopenharmony_ci */ 33162306a36Sopenharmony_ci page_offline_freeze(); 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset); 33462306a36Sopenharmony_ci phdrs_offset = sizeof(struct elfhdr); 33562306a36Sopenharmony_ci notes_offset = phdrs_offset + phdrs_len; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci /* ELF file header. */ 33862306a36Sopenharmony_ci if (buflen && *fpos < sizeof(struct elfhdr)) { 33962306a36Sopenharmony_ci struct elfhdr ehdr = { 34062306a36Sopenharmony_ci .e_ident = { 34162306a36Sopenharmony_ci [EI_MAG0] = ELFMAG0, 34262306a36Sopenharmony_ci [EI_MAG1] = ELFMAG1, 34362306a36Sopenharmony_ci [EI_MAG2] = ELFMAG2, 34462306a36Sopenharmony_ci [EI_MAG3] = ELFMAG3, 34562306a36Sopenharmony_ci [EI_CLASS] = ELF_CLASS, 34662306a36Sopenharmony_ci [EI_DATA] = ELF_DATA, 34762306a36Sopenharmony_ci [EI_VERSION] = EV_CURRENT, 34862306a36Sopenharmony_ci [EI_OSABI] = ELF_OSABI, 34962306a36Sopenharmony_ci }, 35062306a36Sopenharmony_ci .e_type = ET_CORE, 35162306a36Sopenharmony_ci .e_machine = ELF_ARCH, 35262306a36Sopenharmony_ci .e_version = EV_CURRENT, 35362306a36Sopenharmony_ci .e_phoff = sizeof(struct elfhdr), 35462306a36Sopenharmony_ci .e_flags = ELF_CORE_EFLAGS, 35562306a36Sopenharmony_ci .e_ehsize = sizeof(struct elfhdr), 35662306a36Sopenharmony_ci .e_phentsize = sizeof(struct elf_phdr), 35762306a36Sopenharmony_ci .e_phnum = nphdr, 35862306a36Sopenharmony_ci }; 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos); 36162306a36Sopenharmony_ci if (copy_to_iter((char *)&ehdr + *fpos, tsz, iter) != tsz) { 36262306a36Sopenharmony_ci ret = -EFAULT; 36362306a36Sopenharmony_ci goto out; 36462306a36Sopenharmony_ci } 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci buflen -= tsz; 36762306a36Sopenharmony_ci *fpos += tsz; 36862306a36Sopenharmony_ci } 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci /* ELF program headers. */ 37162306a36Sopenharmony_ci if (buflen && *fpos < phdrs_offset + phdrs_len) { 37262306a36Sopenharmony_ci struct elf_phdr *phdrs, *phdr; 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci phdrs = kzalloc(phdrs_len, GFP_KERNEL); 37562306a36Sopenharmony_ci if (!phdrs) { 37662306a36Sopenharmony_ci ret = -ENOMEM; 37762306a36Sopenharmony_ci goto out; 37862306a36Sopenharmony_ci } 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci phdrs[0].p_type = PT_NOTE; 38162306a36Sopenharmony_ci phdrs[0].p_offset = notes_offset; 38262306a36Sopenharmony_ci phdrs[0].p_filesz = notes_len; 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci phdr = &phdrs[1]; 38562306a36Sopenharmony_ci list_for_each_entry(m, &kclist_head, list) { 38662306a36Sopenharmony_ci phdr->p_type = PT_LOAD; 38762306a36Sopenharmony_ci phdr->p_flags = PF_R | PF_W | PF_X; 38862306a36Sopenharmony_ci phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset; 38962306a36Sopenharmony_ci phdr->p_vaddr = (size_t)m->addr; 39062306a36Sopenharmony_ci if (m->type == KCORE_RAM) 39162306a36Sopenharmony_ci phdr->p_paddr = __pa(m->addr); 39262306a36Sopenharmony_ci else if (m->type == KCORE_TEXT) 39362306a36Sopenharmony_ci phdr->p_paddr = __pa_symbol(m->addr); 39462306a36Sopenharmony_ci else 39562306a36Sopenharmony_ci phdr->p_paddr = (elf_addr_t)-1; 39662306a36Sopenharmony_ci phdr->p_filesz = phdr->p_memsz = m->size; 39762306a36Sopenharmony_ci phdr->p_align = PAGE_SIZE; 39862306a36Sopenharmony_ci phdr++; 39962306a36Sopenharmony_ci } 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos); 40262306a36Sopenharmony_ci if (copy_to_iter((char *)phdrs + *fpos - phdrs_offset, tsz, 40362306a36Sopenharmony_ci iter) != tsz) { 40462306a36Sopenharmony_ci kfree(phdrs); 40562306a36Sopenharmony_ci ret = -EFAULT; 40662306a36Sopenharmony_ci goto out; 40762306a36Sopenharmony_ci } 40862306a36Sopenharmony_ci kfree(phdrs); 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci buflen -= tsz; 41162306a36Sopenharmony_ci *fpos += tsz; 41262306a36Sopenharmony_ci } 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci /* ELF note segment. */ 41562306a36Sopenharmony_ci if (buflen && *fpos < notes_offset + notes_len) { 41662306a36Sopenharmony_ci struct elf_prstatus prstatus = {}; 41762306a36Sopenharmony_ci struct elf_prpsinfo prpsinfo = { 41862306a36Sopenharmony_ci .pr_sname = 'R', 41962306a36Sopenharmony_ci .pr_fname = "vmlinux", 42062306a36Sopenharmony_ci }; 42162306a36Sopenharmony_ci char *notes; 42262306a36Sopenharmony_ci size_t i = 0; 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci strscpy(prpsinfo.pr_psargs, saved_command_line, 42562306a36Sopenharmony_ci sizeof(prpsinfo.pr_psargs)); 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci notes = kzalloc(notes_len, GFP_KERNEL); 42862306a36Sopenharmony_ci if (!notes) { 42962306a36Sopenharmony_ci ret = -ENOMEM; 43062306a36Sopenharmony_ci goto out; 43162306a36Sopenharmony_ci } 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci append_kcore_note(notes, &i, CORE_STR, NT_PRSTATUS, &prstatus, 43462306a36Sopenharmony_ci sizeof(prstatus)); 43562306a36Sopenharmony_ci append_kcore_note(notes, &i, CORE_STR, NT_PRPSINFO, &prpsinfo, 43662306a36Sopenharmony_ci sizeof(prpsinfo)); 43762306a36Sopenharmony_ci append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current, 43862306a36Sopenharmony_ci arch_task_struct_size); 43962306a36Sopenharmony_ci /* 44062306a36Sopenharmony_ci * vmcoreinfo_size is mostly constant after init time, but it 44162306a36Sopenharmony_ci * can be changed by crash_save_vmcoreinfo(). Racing here with a 44262306a36Sopenharmony_ci * panic on another CPU before the machine goes down is insanely 44362306a36Sopenharmony_ci * unlikely, but it's better to not leave potential buffer 44462306a36Sopenharmony_ci * overflows lying around, regardless. 44562306a36Sopenharmony_ci */ 44662306a36Sopenharmony_ci append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0, 44762306a36Sopenharmony_ci vmcoreinfo_data, 44862306a36Sopenharmony_ci min(vmcoreinfo_size, notes_len - i)); 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos); 45162306a36Sopenharmony_ci if (copy_to_iter(notes + *fpos - notes_offset, tsz, iter) != tsz) { 45262306a36Sopenharmony_ci kfree(notes); 45362306a36Sopenharmony_ci ret = -EFAULT; 45462306a36Sopenharmony_ci goto out; 45562306a36Sopenharmony_ci } 45662306a36Sopenharmony_ci kfree(notes); 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci buflen -= tsz; 45962306a36Sopenharmony_ci *fpos += tsz; 46062306a36Sopenharmony_ci } 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci /* 46362306a36Sopenharmony_ci * Check to see if our file offset matches with any of 46462306a36Sopenharmony_ci * the addresses in the elf_phdr on our list. 46562306a36Sopenharmony_ci */ 46662306a36Sopenharmony_ci start = kc_offset_to_vaddr(*fpos - data_offset); 46762306a36Sopenharmony_ci if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) 46862306a36Sopenharmony_ci tsz = buflen; 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci m = NULL; 47162306a36Sopenharmony_ci while (buflen) { 47262306a36Sopenharmony_ci struct page *page; 47362306a36Sopenharmony_ci unsigned long pfn; 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci /* 47662306a36Sopenharmony_ci * If this is the first iteration or the address is not within 47762306a36Sopenharmony_ci * the previous entry, search for a matching entry. 47862306a36Sopenharmony_ci */ 47962306a36Sopenharmony_ci if (!m || start < m->addr || start >= m->addr + m->size) { 48062306a36Sopenharmony_ci struct kcore_list *iter; 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci m = NULL; 48362306a36Sopenharmony_ci list_for_each_entry(iter, &kclist_head, list) { 48462306a36Sopenharmony_ci if (start >= iter->addr && 48562306a36Sopenharmony_ci start < iter->addr + iter->size) { 48662306a36Sopenharmony_ci m = iter; 48762306a36Sopenharmony_ci break; 48862306a36Sopenharmony_ci } 48962306a36Sopenharmony_ci } 49062306a36Sopenharmony_ci } 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci if (page_offline_frozen++ % MAX_ORDER_NR_PAGES == 0) { 49362306a36Sopenharmony_ci page_offline_thaw(); 49462306a36Sopenharmony_ci cond_resched(); 49562306a36Sopenharmony_ci page_offline_freeze(); 49662306a36Sopenharmony_ci } 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci if (!m) { 49962306a36Sopenharmony_ci if (iov_iter_zero(tsz, iter) != tsz) { 50062306a36Sopenharmony_ci ret = -EFAULT; 50162306a36Sopenharmony_ci goto out; 50262306a36Sopenharmony_ci } 50362306a36Sopenharmony_ci goto skip; 50462306a36Sopenharmony_ci } 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci switch (m->type) { 50762306a36Sopenharmony_ci case KCORE_VMALLOC: 50862306a36Sopenharmony_ci { 50962306a36Sopenharmony_ci const char *src = (char *)start; 51062306a36Sopenharmony_ci size_t read = 0, left = tsz; 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci /* 51362306a36Sopenharmony_ci * vmalloc uses spinlocks, so we optimistically try to 51462306a36Sopenharmony_ci * read memory. If this fails, fault pages in and try 51562306a36Sopenharmony_ci * again until we are done. 51662306a36Sopenharmony_ci */ 51762306a36Sopenharmony_ci while (true) { 51862306a36Sopenharmony_ci read += vread_iter(iter, src, left); 51962306a36Sopenharmony_ci if (read == tsz) 52062306a36Sopenharmony_ci break; 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci src += read; 52362306a36Sopenharmony_ci left -= read; 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci if (fault_in_iov_iter_writeable(iter, left)) { 52662306a36Sopenharmony_ci ret = -EFAULT; 52762306a36Sopenharmony_ci goto out; 52862306a36Sopenharmony_ci } 52962306a36Sopenharmony_ci } 53062306a36Sopenharmony_ci break; 53162306a36Sopenharmony_ci } 53262306a36Sopenharmony_ci case KCORE_USER: 53362306a36Sopenharmony_ci /* User page is handled prior to normal kernel page: */ 53462306a36Sopenharmony_ci if (copy_to_iter((char *)start, tsz, iter) != tsz) { 53562306a36Sopenharmony_ci ret = -EFAULT; 53662306a36Sopenharmony_ci goto out; 53762306a36Sopenharmony_ci } 53862306a36Sopenharmony_ci break; 53962306a36Sopenharmony_ci case KCORE_RAM: 54062306a36Sopenharmony_ci pfn = __pa(start) >> PAGE_SHIFT; 54162306a36Sopenharmony_ci page = pfn_to_online_page(pfn); 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci /* 54462306a36Sopenharmony_ci * Don't read offline sections, logically offline pages 54562306a36Sopenharmony_ci * (e.g., inflated in a balloon), hwpoisoned pages, 54662306a36Sopenharmony_ci * and explicitly excluded physical ranges. 54762306a36Sopenharmony_ci */ 54862306a36Sopenharmony_ci if (!page || PageOffline(page) || 54962306a36Sopenharmony_ci is_page_hwpoison(page) || !pfn_is_ram(pfn)) { 55062306a36Sopenharmony_ci if (iov_iter_zero(tsz, iter) != tsz) { 55162306a36Sopenharmony_ci ret = -EFAULT; 55262306a36Sopenharmony_ci goto out; 55362306a36Sopenharmony_ci } 55462306a36Sopenharmony_ci break; 55562306a36Sopenharmony_ci } 55662306a36Sopenharmony_ci fallthrough; 55762306a36Sopenharmony_ci case KCORE_VMEMMAP: 55862306a36Sopenharmony_ci case KCORE_TEXT: 55962306a36Sopenharmony_ci /* 56062306a36Sopenharmony_ci * Sadly we must use a bounce buffer here to be able to 56162306a36Sopenharmony_ci * make use of copy_from_kernel_nofault(), as these 56262306a36Sopenharmony_ci * memory regions might not always be mapped on all 56362306a36Sopenharmony_ci * architectures. 56462306a36Sopenharmony_ci */ 56562306a36Sopenharmony_ci if (copy_from_kernel_nofault(buf, (void *)start, tsz)) { 56662306a36Sopenharmony_ci if (iov_iter_zero(tsz, iter) != tsz) { 56762306a36Sopenharmony_ci ret = -EFAULT; 56862306a36Sopenharmony_ci goto out; 56962306a36Sopenharmony_ci } 57062306a36Sopenharmony_ci /* 57162306a36Sopenharmony_ci * We know the bounce buffer is safe to copy from, so 57262306a36Sopenharmony_ci * use _copy_to_iter() directly. 57362306a36Sopenharmony_ci */ 57462306a36Sopenharmony_ci } else if (_copy_to_iter(buf, tsz, iter) != tsz) { 57562306a36Sopenharmony_ci ret = -EFAULT; 57662306a36Sopenharmony_ci goto out; 57762306a36Sopenharmony_ci } 57862306a36Sopenharmony_ci break; 57962306a36Sopenharmony_ci default: 58062306a36Sopenharmony_ci pr_warn_once("Unhandled KCORE type: %d\n", m->type); 58162306a36Sopenharmony_ci if (iov_iter_zero(tsz, iter) != tsz) { 58262306a36Sopenharmony_ci ret = -EFAULT; 58362306a36Sopenharmony_ci goto out; 58462306a36Sopenharmony_ci } 58562306a36Sopenharmony_ci } 58662306a36Sopenharmony_ciskip: 58762306a36Sopenharmony_ci buflen -= tsz; 58862306a36Sopenharmony_ci *fpos += tsz; 58962306a36Sopenharmony_ci start += tsz; 59062306a36Sopenharmony_ci tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen); 59162306a36Sopenharmony_ci } 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ciout: 59462306a36Sopenharmony_ci page_offline_thaw(); 59562306a36Sopenharmony_ci up_read(&kclist_lock); 59662306a36Sopenharmony_ci if (ret) 59762306a36Sopenharmony_ci return ret; 59862306a36Sopenharmony_ci return orig_buflen - buflen; 59962306a36Sopenharmony_ci} 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_cistatic int open_kcore(struct inode *inode, struct file *filp) 60262306a36Sopenharmony_ci{ 60362306a36Sopenharmony_ci int ret = security_locked_down(LOCKDOWN_KCORE); 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci if (!capable(CAP_SYS_RAWIO)) 60662306a36Sopenharmony_ci return -EPERM; 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci if (ret) 60962306a36Sopenharmony_ci return ret; 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); 61262306a36Sopenharmony_ci if (!filp->private_data) 61362306a36Sopenharmony_ci return -ENOMEM; 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci if (kcore_need_update) 61662306a36Sopenharmony_ci kcore_update_ram(); 61762306a36Sopenharmony_ci if (i_size_read(inode) != proc_root_kcore->size) { 61862306a36Sopenharmony_ci inode_lock(inode); 61962306a36Sopenharmony_ci i_size_write(inode, proc_root_kcore->size); 62062306a36Sopenharmony_ci inode_unlock(inode); 62162306a36Sopenharmony_ci } 62262306a36Sopenharmony_ci return 0; 62362306a36Sopenharmony_ci} 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_cistatic int release_kcore(struct inode *inode, struct file *file) 62662306a36Sopenharmony_ci{ 62762306a36Sopenharmony_ci kfree(file->private_data); 62862306a36Sopenharmony_ci return 0; 62962306a36Sopenharmony_ci} 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_cistatic const struct proc_ops kcore_proc_ops = { 63262306a36Sopenharmony_ci .proc_read_iter = read_kcore_iter, 63362306a36Sopenharmony_ci .proc_open = open_kcore, 63462306a36Sopenharmony_ci .proc_release = release_kcore, 63562306a36Sopenharmony_ci .proc_lseek = default_llseek, 63662306a36Sopenharmony_ci}; 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci/* just remember that we have to update kcore */ 63962306a36Sopenharmony_cistatic int __meminit kcore_callback(struct notifier_block *self, 64062306a36Sopenharmony_ci unsigned long action, void *arg) 64162306a36Sopenharmony_ci{ 64262306a36Sopenharmony_ci switch (action) { 64362306a36Sopenharmony_ci case MEM_ONLINE: 64462306a36Sopenharmony_ci case MEM_OFFLINE: 64562306a36Sopenharmony_ci kcore_need_update = 1; 64662306a36Sopenharmony_ci break; 64762306a36Sopenharmony_ci } 64862306a36Sopenharmony_ci return NOTIFY_OK; 64962306a36Sopenharmony_ci} 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_cistatic struct kcore_list kcore_vmalloc; 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci#ifdef CONFIG_ARCH_PROC_KCORE_TEXT 65562306a36Sopenharmony_cistatic struct kcore_list kcore_text; 65662306a36Sopenharmony_ci/* 65762306a36Sopenharmony_ci * If defined, special segment is used for mapping kernel text instead of 65862306a36Sopenharmony_ci * direct-map area. We need to create special TEXT section. 65962306a36Sopenharmony_ci */ 66062306a36Sopenharmony_cistatic void __init proc_kcore_text_init(void) 66162306a36Sopenharmony_ci{ 66262306a36Sopenharmony_ci kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT); 66362306a36Sopenharmony_ci} 66462306a36Sopenharmony_ci#else 66562306a36Sopenharmony_cistatic void __init proc_kcore_text_init(void) 66662306a36Sopenharmony_ci{ 66762306a36Sopenharmony_ci} 66862306a36Sopenharmony_ci#endif 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) 67162306a36Sopenharmony_ci/* 67262306a36Sopenharmony_ci * MODULES_VADDR has no intersection with VMALLOC_ADDR. 67362306a36Sopenharmony_ci */ 67462306a36Sopenharmony_cistatic struct kcore_list kcore_modules; 67562306a36Sopenharmony_cistatic void __init add_modules_range(void) 67662306a36Sopenharmony_ci{ 67762306a36Sopenharmony_ci if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { 67862306a36Sopenharmony_ci kclist_add(&kcore_modules, (void *)MODULES_VADDR, 67962306a36Sopenharmony_ci MODULES_END - MODULES_VADDR, KCORE_VMALLOC); 68062306a36Sopenharmony_ci } 68162306a36Sopenharmony_ci} 68262306a36Sopenharmony_ci#else 68362306a36Sopenharmony_cistatic void __init add_modules_range(void) 68462306a36Sopenharmony_ci{ 68562306a36Sopenharmony_ci} 68662306a36Sopenharmony_ci#endif 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_cistatic int __init proc_kcore_init(void) 68962306a36Sopenharmony_ci{ 69062306a36Sopenharmony_ci proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &kcore_proc_ops); 69162306a36Sopenharmony_ci if (!proc_root_kcore) { 69262306a36Sopenharmony_ci pr_err("couldn't create /proc/kcore\n"); 69362306a36Sopenharmony_ci return 0; /* Always returns 0. */ 69462306a36Sopenharmony_ci } 69562306a36Sopenharmony_ci /* Store text area if it's special */ 69662306a36Sopenharmony_ci proc_kcore_text_init(); 69762306a36Sopenharmony_ci /* Store vmalloc area */ 69862306a36Sopenharmony_ci kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 69962306a36Sopenharmony_ci VMALLOC_END - VMALLOC_START, KCORE_VMALLOC); 70062306a36Sopenharmony_ci add_modules_range(); 70162306a36Sopenharmony_ci /* Store direct-map area from physical memory map */ 70262306a36Sopenharmony_ci kcore_update_ram(); 70362306a36Sopenharmony_ci hotplug_memory_notifier(kcore_callback, DEFAULT_CALLBACK_PRI); 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci return 0; 70662306a36Sopenharmony_ci} 70762306a36Sopenharmony_cifs_initcall(proc_kcore_init); 708