162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * 362306a36Sopenharmony_ci * Copyright IBM Corporation, 2012 462306a36Sopenharmony_ci * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * Cgroup v2 762306a36Sopenharmony_ci * Copyright (C) 2019 Red Hat, Inc. 862306a36Sopenharmony_ci * Author: Giuseppe Scrivano <gscrivan@redhat.com> 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * This program is free software; you can redistribute it and/or modify it 1162306a36Sopenharmony_ci * under the terms of version 2.1 of the GNU Lesser General Public License 1262306a36Sopenharmony_ci * as published by the Free Software Foundation. 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * This program is distributed in the hope that it would be useful, but 1562306a36Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of 1662306a36Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci */ 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#include <linux/cgroup.h> 2162306a36Sopenharmony_ci#include <linux/page_counter.h> 2262306a36Sopenharmony_ci#include <linux/slab.h> 2362306a36Sopenharmony_ci#include <linux/hugetlb.h> 2462306a36Sopenharmony_ci#include <linux/hugetlb_cgroup.h> 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) 2762306a36Sopenharmony_ci#define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) 2862306a36Sopenharmony_ci#define MEMFILE_ATTR(val) ((val) & 0xffff) 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_cistatic struct hugetlb_cgroup *root_h_cgroup __read_mostly; 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_cistatic inline struct page_counter * 3362306a36Sopenharmony_ci__hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, 3462306a36Sopenharmony_ci bool rsvd) 3562306a36Sopenharmony_ci{ 3662306a36Sopenharmony_ci if (rsvd) 3762306a36Sopenharmony_ci return &h_cg->rsvd_hugepage[idx]; 3862306a36Sopenharmony_ci return &h_cg->hugepage[idx]; 3962306a36Sopenharmony_ci} 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cistatic inline struct page_counter * 4262306a36Sopenharmony_cihugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) 4362306a36Sopenharmony_ci{ 4462306a36Sopenharmony_ci return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); 4562306a36Sopenharmony_ci} 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_cistatic inline struct page_counter * 4862306a36Sopenharmony_cihugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) 4962306a36Sopenharmony_ci{ 5062306a36Sopenharmony_ci return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); 5162306a36Sopenharmony_ci} 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistatic inline 5462306a36Sopenharmony_cistruct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) 5562306a36Sopenharmony_ci{ 5662306a36Sopenharmony_ci return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; 5762306a36Sopenharmony_ci} 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_cistatic inline 6062306a36Sopenharmony_cistruct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) 6162306a36Sopenharmony_ci{ 6262306a36Sopenharmony_ci return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id)); 6362306a36Sopenharmony_ci} 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_cistatic inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) 6662306a36Sopenharmony_ci{ 6762306a36Sopenharmony_ci return (h_cg == root_h_cgroup); 6862306a36Sopenharmony_ci} 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_cistatic inline struct hugetlb_cgroup * 7162306a36Sopenharmony_ciparent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) 7262306a36Sopenharmony_ci{ 7362306a36Sopenharmony_ci return hugetlb_cgroup_from_css(h_cg->css.parent); 7462306a36Sopenharmony_ci} 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_cistatic inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) 7762306a36Sopenharmony_ci{ 7862306a36Sopenharmony_ci struct hstate *h; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci for_each_hstate(h) { 8162306a36Sopenharmony_ci if (page_counter_read( 8262306a36Sopenharmony_ci hugetlb_cgroup_counter_from_cgroup(h_cg, hstate_index(h)))) 8362306a36Sopenharmony_ci return true; 8462306a36Sopenharmony_ci } 8562306a36Sopenharmony_ci return false; 8662306a36Sopenharmony_ci} 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_cistatic void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, 8962306a36Sopenharmony_ci struct hugetlb_cgroup *parent_h_cgroup) 9062306a36Sopenharmony_ci{ 9162306a36Sopenharmony_ci int idx; 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { 9462306a36Sopenharmony_ci struct page_counter *fault_parent = NULL; 9562306a36Sopenharmony_ci struct page_counter *rsvd_parent = NULL; 9662306a36Sopenharmony_ci unsigned long limit; 9762306a36Sopenharmony_ci int ret; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci if (parent_h_cgroup) { 10062306a36Sopenharmony_ci fault_parent = hugetlb_cgroup_counter_from_cgroup( 10162306a36Sopenharmony_ci parent_h_cgroup, idx); 10262306a36Sopenharmony_ci rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( 10362306a36Sopenharmony_ci parent_h_cgroup, idx); 10462306a36Sopenharmony_ci } 10562306a36Sopenharmony_ci page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup, 10662306a36Sopenharmony_ci idx), 10762306a36Sopenharmony_ci fault_parent); 10862306a36Sopenharmony_ci page_counter_init( 10962306a36Sopenharmony_ci hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 11062306a36Sopenharmony_ci rsvd_parent); 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci limit = round_down(PAGE_COUNTER_MAX, 11362306a36Sopenharmony_ci pages_per_huge_page(&hstates[idx])); 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci ret = page_counter_set_max( 11662306a36Sopenharmony_ci hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx), 11762306a36Sopenharmony_ci limit); 11862306a36Sopenharmony_ci VM_BUG_ON(ret); 11962306a36Sopenharmony_ci ret = page_counter_set_max( 12062306a36Sopenharmony_ci hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 12162306a36Sopenharmony_ci limit); 12262306a36Sopenharmony_ci VM_BUG_ON(ret); 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci} 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_cistatic void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup) 12762306a36Sopenharmony_ci{ 12862306a36Sopenharmony_ci int node; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci for_each_node(node) 13162306a36Sopenharmony_ci kfree(h_cgroup->nodeinfo[node]); 13262306a36Sopenharmony_ci kfree(h_cgroup); 13362306a36Sopenharmony_ci} 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_cistatic struct cgroup_subsys_state * 13662306a36Sopenharmony_cihugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) 13762306a36Sopenharmony_ci{ 13862306a36Sopenharmony_ci struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); 13962306a36Sopenharmony_ci struct hugetlb_cgroup *h_cgroup; 14062306a36Sopenharmony_ci int node; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci h_cgroup = kzalloc(struct_size(h_cgroup, nodeinfo, nr_node_ids), 14362306a36Sopenharmony_ci GFP_KERNEL); 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci if (!h_cgroup) 14662306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci if (!parent_h_cgroup) 14962306a36Sopenharmony_ci root_h_cgroup = h_cgroup; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci /* 15262306a36Sopenharmony_ci * TODO: this routine can waste much memory for nodes which will 15362306a36Sopenharmony_ci * never be onlined. It's better to use memory hotplug callback 15462306a36Sopenharmony_ci * function. 15562306a36Sopenharmony_ci */ 15662306a36Sopenharmony_ci for_each_node(node) { 15762306a36Sopenharmony_ci /* Set node_to_alloc to NUMA_NO_NODE for offline nodes. */ 15862306a36Sopenharmony_ci int node_to_alloc = 15962306a36Sopenharmony_ci node_state(node, N_NORMAL_MEMORY) ? node : NUMA_NO_NODE; 16062306a36Sopenharmony_ci h_cgroup->nodeinfo[node] = 16162306a36Sopenharmony_ci kzalloc_node(sizeof(struct hugetlb_cgroup_per_node), 16262306a36Sopenharmony_ci GFP_KERNEL, node_to_alloc); 16362306a36Sopenharmony_ci if (!h_cgroup->nodeinfo[node]) 16462306a36Sopenharmony_ci goto fail_alloc_nodeinfo; 16562306a36Sopenharmony_ci } 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci hugetlb_cgroup_init(h_cgroup, parent_h_cgroup); 16862306a36Sopenharmony_ci return &h_cgroup->css; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_cifail_alloc_nodeinfo: 17162306a36Sopenharmony_ci hugetlb_cgroup_free(h_cgroup); 17262306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 17362306a36Sopenharmony_ci} 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_cistatic void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) 17662306a36Sopenharmony_ci{ 17762306a36Sopenharmony_ci hugetlb_cgroup_free(hugetlb_cgroup_from_css(css)); 17862306a36Sopenharmony_ci} 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci/* 18162306a36Sopenharmony_ci * Should be called with hugetlb_lock held. 18262306a36Sopenharmony_ci * Since we are holding hugetlb_lock, pages cannot get moved from 18362306a36Sopenharmony_ci * active list or uncharged from the cgroup, So no need to get 18462306a36Sopenharmony_ci * page reference and test for page active here. This function 18562306a36Sopenharmony_ci * cannot fail. 18662306a36Sopenharmony_ci */ 18762306a36Sopenharmony_cistatic void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, 18862306a36Sopenharmony_ci struct page *page) 18962306a36Sopenharmony_ci{ 19062306a36Sopenharmony_ci unsigned int nr_pages; 19162306a36Sopenharmony_ci struct page_counter *counter; 19262306a36Sopenharmony_ci struct hugetlb_cgroup *page_hcg; 19362306a36Sopenharmony_ci struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); 19462306a36Sopenharmony_ci struct folio *folio = page_folio(page); 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci page_hcg = hugetlb_cgroup_from_folio(folio); 19762306a36Sopenharmony_ci /* 19862306a36Sopenharmony_ci * We can have pages in active list without any cgroup 19962306a36Sopenharmony_ci * ie, hugepage with less than 3 pages. We can safely 20062306a36Sopenharmony_ci * ignore those pages. 20162306a36Sopenharmony_ci */ 20262306a36Sopenharmony_ci if (!page_hcg || page_hcg != h_cg) 20362306a36Sopenharmony_ci goto out; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci nr_pages = compound_nr(page); 20662306a36Sopenharmony_ci if (!parent) { 20762306a36Sopenharmony_ci parent = root_h_cgroup; 20862306a36Sopenharmony_ci /* root has no limit */ 20962306a36Sopenharmony_ci page_counter_charge(&parent->hugepage[idx], nr_pages); 21062306a36Sopenharmony_ci } 21162306a36Sopenharmony_ci counter = &h_cg->hugepage[idx]; 21262306a36Sopenharmony_ci /* Take the pages off the local counter */ 21362306a36Sopenharmony_ci page_counter_cancel(counter, nr_pages); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci set_hugetlb_cgroup(folio, parent); 21662306a36Sopenharmony_ciout: 21762306a36Sopenharmony_ci return; 21862306a36Sopenharmony_ci} 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci/* 22162306a36Sopenharmony_ci * Force the hugetlb cgroup to empty the hugetlb resources by moving them to 22262306a36Sopenharmony_ci * the parent cgroup. 22362306a36Sopenharmony_ci */ 22462306a36Sopenharmony_cistatic void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 22762306a36Sopenharmony_ci struct hstate *h; 22862306a36Sopenharmony_ci struct page *page; 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci do { 23162306a36Sopenharmony_ci for_each_hstate(h) { 23262306a36Sopenharmony_ci spin_lock_irq(&hugetlb_lock); 23362306a36Sopenharmony_ci list_for_each_entry(page, &h->hugepage_activelist, lru) 23462306a36Sopenharmony_ci hugetlb_cgroup_move_parent(hstate_index(h), h_cg, page); 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci spin_unlock_irq(&hugetlb_lock); 23762306a36Sopenharmony_ci } 23862306a36Sopenharmony_ci cond_resched(); 23962306a36Sopenharmony_ci } while (hugetlb_cgroup_have_usage(h_cg)); 24062306a36Sopenharmony_ci} 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_cistatic inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, 24362306a36Sopenharmony_ci enum hugetlb_memory_event event) 24462306a36Sopenharmony_ci{ 24562306a36Sopenharmony_ci atomic_long_inc(&hugetlb->events_local[idx][event]); 24662306a36Sopenharmony_ci cgroup_file_notify(&hugetlb->events_local_file[idx]); 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci do { 24962306a36Sopenharmony_ci atomic_long_inc(&hugetlb->events[idx][event]); 25062306a36Sopenharmony_ci cgroup_file_notify(&hugetlb->events_file[idx]); 25162306a36Sopenharmony_ci } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && 25262306a36Sopenharmony_ci !hugetlb_cgroup_is_root(hugetlb)); 25362306a36Sopenharmony_ci} 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_cistatic int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 25662306a36Sopenharmony_ci struct hugetlb_cgroup **ptr, 25762306a36Sopenharmony_ci bool rsvd) 25862306a36Sopenharmony_ci{ 25962306a36Sopenharmony_ci int ret = 0; 26062306a36Sopenharmony_ci struct page_counter *counter; 26162306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg = NULL; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci if (hugetlb_cgroup_disabled()) 26462306a36Sopenharmony_ci goto done; 26562306a36Sopenharmony_ci /* 26662306a36Sopenharmony_ci * We don't charge any cgroup if the compound page have less 26762306a36Sopenharmony_ci * than 3 pages. 26862306a36Sopenharmony_ci */ 26962306a36Sopenharmony_ci if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 27062306a36Sopenharmony_ci goto done; 27162306a36Sopenharmony_ciagain: 27262306a36Sopenharmony_ci rcu_read_lock(); 27362306a36Sopenharmony_ci h_cg = hugetlb_cgroup_from_task(current); 27462306a36Sopenharmony_ci if (!css_tryget(&h_cg->css)) { 27562306a36Sopenharmony_ci rcu_read_unlock(); 27662306a36Sopenharmony_ci goto again; 27762306a36Sopenharmony_ci } 27862306a36Sopenharmony_ci rcu_read_unlock(); 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci if (!page_counter_try_charge( 28162306a36Sopenharmony_ci __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 28262306a36Sopenharmony_ci nr_pages, &counter)) { 28362306a36Sopenharmony_ci ret = -ENOMEM; 28462306a36Sopenharmony_ci hugetlb_event(h_cg, idx, HUGETLB_MAX); 28562306a36Sopenharmony_ci css_put(&h_cg->css); 28662306a36Sopenharmony_ci goto done; 28762306a36Sopenharmony_ci } 28862306a36Sopenharmony_ci /* Reservations take a reference to the css because they do not get 28962306a36Sopenharmony_ci * reparented. 29062306a36Sopenharmony_ci */ 29162306a36Sopenharmony_ci if (!rsvd) 29262306a36Sopenharmony_ci css_put(&h_cg->css); 29362306a36Sopenharmony_cidone: 29462306a36Sopenharmony_ci *ptr = h_cg; 29562306a36Sopenharmony_ci return ret; 29662306a36Sopenharmony_ci} 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ciint hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 29962306a36Sopenharmony_ci struct hugetlb_cgroup **ptr) 30062306a36Sopenharmony_ci{ 30162306a36Sopenharmony_ci return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); 30262306a36Sopenharmony_ci} 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ciint hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, 30562306a36Sopenharmony_ci struct hugetlb_cgroup **ptr) 30662306a36Sopenharmony_ci{ 30762306a36Sopenharmony_ci return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); 30862306a36Sopenharmony_ci} 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci/* Should be called with hugetlb_lock held */ 31162306a36Sopenharmony_cistatic void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 31262306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg, 31362306a36Sopenharmony_ci struct folio *folio, bool rsvd) 31462306a36Sopenharmony_ci{ 31562306a36Sopenharmony_ci if (hugetlb_cgroup_disabled() || !h_cg) 31662306a36Sopenharmony_ci return; 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci __set_hugetlb_cgroup(folio, h_cg, rsvd); 31962306a36Sopenharmony_ci if (!rsvd) { 32062306a36Sopenharmony_ci unsigned long usage = 32162306a36Sopenharmony_ci h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; 32262306a36Sopenharmony_ci /* 32362306a36Sopenharmony_ci * This write is not atomic due to fetching usage and writing 32462306a36Sopenharmony_ci * to it, but that's fine because we call this with 32562306a36Sopenharmony_ci * hugetlb_lock held anyway. 32662306a36Sopenharmony_ci */ 32762306a36Sopenharmony_ci WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], 32862306a36Sopenharmony_ci usage + nr_pages); 32962306a36Sopenharmony_ci } 33062306a36Sopenharmony_ci} 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_civoid hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 33362306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg, 33462306a36Sopenharmony_ci struct folio *folio) 33562306a36Sopenharmony_ci{ 33662306a36Sopenharmony_ci __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, false); 33762306a36Sopenharmony_ci} 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_civoid hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, 34062306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg, 34162306a36Sopenharmony_ci struct folio *folio) 34262306a36Sopenharmony_ci{ 34362306a36Sopenharmony_ci __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, true); 34462306a36Sopenharmony_ci} 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci/* 34762306a36Sopenharmony_ci * Should be called with hugetlb_lock held 34862306a36Sopenharmony_ci */ 34962306a36Sopenharmony_cistatic void __hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, 35062306a36Sopenharmony_ci struct folio *folio, bool rsvd) 35162306a36Sopenharmony_ci{ 35262306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg; 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci if (hugetlb_cgroup_disabled()) 35562306a36Sopenharmony_ci return; 35662306a36Sopenharmony_ci lockdep_assert_held(&hugetlb_lock); 35762306a36Sopenharmony_ci h_cg = __hugetlb_cgroup_from_folio(folio, rsvd); 35862306a36Sopenharmony_ci if (unlikely(!h_cg)) 35962306a36Sopenharmony_ci return; 36062306a36Sopenharmony_ci __set_hugetlb_cgroup(folio, NULL, rsvd); 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 36362306a36Sopenharmony_ci rsvd), 36462306a36Sopenharmony_ci nr_pages); 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci if (rsvd) 36762306a36Sopenharmony_ci css_put(&h_cg->css); 36862306a36Sopenharmony_ci else { 36962306a36Sopenharmony_ci unsigned long usage = 37062306a36Sopenharmony_ci h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; 37162306a36Sopenharmony_ci /* 37262306a36Sopenharmony_ci * This write is not atomic due to fetching usage and writing 37362306a36Sopenharmony_ci * to it, but that's fine because we call this with 37462306a36Sopenharmony_ci * hugetlb_lock held anyway. 37562306a36Sopenharmony_ci */ 37662306a36Sopenharmony_ci WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], 37762306a36Sopenharmony_ci usage - nr_pages); 37862306a36Sopenharmony_ci } 37962306a36Sopenharmony_ci} 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_civoid hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, 38262306a36Sopenharmony_ci struct folio *folio) 38362306a36Sopenharmony_ci{ 38462306a36Sopenharmony_ci __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, false); 38562306a36Sopenharmony_ci} 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_civoid hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, 38862306a36Sopenharmony_ci struct folio *folio) 38962306a36Sopenharmony_ci{ 39062306a36Sopenharmony_ci __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, true); 39162306a36Sopenharmony_ci} 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_cistatic void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 39462306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg, 39562306a36Sopenharmony_ci bool rsvd) 39662306a36Sopenharmony_ci{ 39762306a36Sopenharmony_ci if (hugetlb_cgroup_disabled() || !h_cg) 39862306a36Sopenharmony_ci return; 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 40162306a36Sopenharmony_ci return; 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 40462306a36Sopenharmony_ci rsvd), 40562306a36Sopenharmony_ci nr_pages); 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci if (rsvd) 40862306a36Sopenharmony_ci css_put(&h_cg->css); 40962306a36Sopenharmony_ci} 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_civoid hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 41262306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg) 41362306a36Sopenharmony_ci{ 41462306a36Sopenharmony_ci __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); 41562306a36Sopenharmony_ci} 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_civoid hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, 41862306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); 42162306a36Sopenharmony_ci} 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_civoid hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, 42462306a36Sopenharmony_ci unsigned long end) 42562306a36Sopenharmony_ci{ 42662306a36Sopenharmony_ci if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || 42762306a36Sopenharmony_ci !resv->css) 42862306a36Sopenharmony_ci return; 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci page_counter_uncharge(resv->reservation_counter, 43162306a36Sopenharmony_ci (end - start) * resv->pages_per_hpage); 43262306a36Sopenharmony_ci css_put(resv->css); 43362306a36Sopenharmony_ci} 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_civoid hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, 43662306a36Sopenharmony_ci struct file_region *rg, 43762306a36Sopenharmony_ci unsigned long nr_pages, 43862306a36Sopenharmony_ci bool region_del) 43962306a36Sopenharmony_ci{ 44062306a36Sopenharmony_ci if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) 44162306a36Sopenharmony_ci return; 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci if (rg->reservation_counter && resv->pages_per_hpage && 44462306a36Sopenharmony_ci !resv->reservation_counter) { 44562306a36Sopenharmony_ci page_counter_uncharge(rg->reservation_counter, 44662306a36Sopenharmony_ci nr_pages * resv->pages_per_hpage); 44762306a36Sopenharmony_ci /* 44862306a36Sopenharmony_ci * Only do css_put(rg->css) when we delete the entire region 44962306a36Sopenharmony_ci * because one file_region must hold exactly one css reference. 45062306a36Sopenharmony_ci */ 45162306a36Sopenharmony_ci if (region_del) 45262306a36Sopenharmony_ci css_put(rg->css); 45362306a36Sopenharmony_ci } 45462306a36Sopenharmony_ci} 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_cienum { 45762306a36Sopenharmony_ci RES_USAGE, 45862306a36Sopenharmony_ci RES_RSVD_USAGE, 45962306a36Sopenharmony_ci RES_LIMIT, 46062306a36Sopenharmony_ci RES_RSVD_LIMIT, 46162306a36Sopenharmony_ci RES_MAX_USAGE, 46262306a36Sopenharmony_ci RES_RSVD_MAX_USAGE, 46362306a36Sopenharmony_ci RES_FAILCNT, 46462306a36Sopenharmony_ci RES_RSVD_FAILCNT, 46562306a36Sopenharmony_ci}; 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_cistatic int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy) 46862306a36Sopenharmony_ci{ 46962306a36Sopenharmony_ci int nid; 47062306a36Sopenharmony_ci struct cftype *cft = seq_cft(seq); 47162306a36Sopenharmony_ci int idx = MEMFILE_IDX(cft->private); 47262306a36Sopenharmony_ci bool legacy = MEMFILE_ATTR(cft->private); 47362306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 47462306a36Sopenharmony_ci struct cgroup_subsys_state *css; 47562306a36Sopenharmony_ci unsigned long usage; 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci if (legacy) { 47862306a36Sopenharmony_ci /* Add up usage across all nodes for the non-hierarchical total. */ 47962306a36Sopenharmony_ci usage = 0; 48062306a36Sopenharmony_ci for_each_node_state(nid, N_MEMORY) 48162306a36Sopenharmony_ci usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]); 48262306a36Sopenharmony_ci seq_printf(seq, "total=%lu", usage * PAGE_SIZE); 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci /* Simply print the per-node usage for the non-hierarchical total. */ 48562306a36Sopenharmony_ci for_each_node_state(nid, N_MEMORY) 48662306a36Sopenharmony_ci seq_printf(seq, " N%d=%lu", nid, 48762306a36Sopenharmony_ci READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) * 48862306a36Sopenharmony_ci PAGE_SIZE); 48962306a36Sopenharmony_ci seq_putc(seq, '\n'); 49062306a36Sopenharmony_ci } 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci /* 49362306a36Sopenharmony_ci * The hierarchical total is pretty much the value recorded by the 49462306a36Sopenharmony_ci * counter, so use that. 49562306a36Sopenharmony_ci */ 49662306a36Sopenharmony_ci seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "", 49762306a36Sopenharmony_ci page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE); 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci /* 50062306a36Sopenharmony_ci * For each node, transverse the css tree to obtain the hierarchical 50162306a36Sopenharmony_ci * node usage. 50262306a36Sopenharmony_ci */ 50362306a36Sopenharmony_ci for_each_node_state(nid, N_MEMORY) { 50462306a36Sopenharmony_ci usage = 0; 50562306a36Sopenharmony_ci rcu_read_lock(); 50662306a36Sopenharmony_ci css_for_each_descendant_pre(css, &h_cg->css) { 50762306a36Sopenharmony_ci usage += READ_ONCE(hugetlb_cgroup_from_css(css) 50862306a36Sopenharmony_ci ->nodeinfo[nid] 50962306a36Sopenharmony_ci ->usage[idx]); 51062306a36Sopenharmony_ci } 51162306a36Sopenharmony_ci rcu_read_unlock(); 51262306a36Sopenharmony_ci seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE); 51362306a36Sopenharmony_ci } 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci seq_putc(seq, '\n'); 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci return 0; 51862306a36Sopenharmony_ci} 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_cistatic u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, 52162306a36Sopenharmony_ci struct cftype *cft) 52262306a36Sopenharmony_ci{ 52362306a36Sopenharmony_ci struct page_counter *counter; 52462306a36Sopenharmony_ci struct page_counter *rsvd_counter; 52562306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; 52862306a36Sopenharmony_ci rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci switch (MEMFILE_ATTR(cft->private)) { 53162306a36Sopenharmony_ci case RES_USAGE: 53262306a36Sopenharmony_ci return (u64)page_counter_read(counter) * PAGE_SIZE; 53362306a36Sopenharmony_ci case RES_RSVD_USAGE: 53462306a36Sopenharmony_ci return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; 53562306a36Sopenharmony_ci case RES_LIMIT: 53662306a36Sopenharmony_ci return (u64)counter->max * PAGE_SIZE; 53762306a36Sopenharmony_ci case RES_RSVD_LIMIT: 53862306a36Sopenharmony_ci return (u64)rsvd_counter->max * PAGE_SIZE; 53962306a36Sopenharmony_ci case RES_MAX_USAGE: 54062306a36Sopenharmony_ci return (u64)counter->watermark * PAGE_SIZE; 54162306a36Sopenharmony_ci case RES_RSVD_MAX_USAGE: 54262306a36Sopenharmony_ci return (u64)rsvd_counter->watermark * PAGE_SIZE; 54362306a36Sopenharmony_ci case RES_FAILCNT: 54462306a36Sopenharmony_ci return counter->failcnt; 54562306a36Sopenharmony_ci case RES_RSVD_FAILCNT: 54662306a36Sopenharmony_ci return rsvd_counter->failcnt; 54762306a36Sopenharmony_ci default: 54862306a36Sopenharmony_ci BUG(); 54962306a36Sopenharmony_ci } 55062306a36Sopenharmony_ci} 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_cistatic int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) 55362306a36Sopenharmony_ci{ 55462306a36Sopenharmony_ci int idx; 55562306a36Sopenharmony_ci u64 val; 55662306a36Sopenharmony_ci struct cftype *cft = seq_cft(seq); 55762306a36Sopenharmony_ci unsigned long limit; 55862306a36Sopenharmony_ci struct page_counter *counter; 55962306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci idx = MEMFILE_IDX(cft->private); 56262306a36Sopenharmony_ci counter = &h_cg->hugepage[idx]; 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci limit = round_down(PAGE_COUNTER_MAX, 56562306a36Sopenharmony_ci pages_per_huge_page(&hstates[idx])); 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci switch (MEMFILE_ATTR(cft->private)) { 56862306a36Sopenharmony_ci case RES_RSVD_USAGE: 56962306a36Sopenharmony_ci counter = &h_cg->rsvd_hugepage[idx]; 57062306a36Sopenharmony_ci fallthrough; 57162306a36Sopenharmony_ci case RES_USAGE: 57262306a36Sopenharmony_ci val = (u64)page_counter_read(counter); 57362306a36Sopenharmony_ci seq_printf(seq, "%llu\n", val * PAGE_SIZE); 57462306a36Sopenharmony_ci break; 57562306a36Sopenharmony_ci case RES_RSVD_LIMIT: 57662306a36Sopenharmony_ci counter = &h_cg->rsvd_hugepage[idx]; 57762306a36Sopenharmony_ci fallthrough; 57862306a36Sopenharmony_ci case RES_LIMIT: 57962306a36Sopenharmony_ci val = (u64)counter->max; 58062306a36Sopenharmony_ci if (val == limit) 58162306a36Sopenharmony_ci seq_puts(seq, "max\n"); 58262306a36Sopenharmony_ci else 58362306a36Sopenharmony_ci seq_printf(seq, "%llu\n", val * PAGE_SIZE); 58462306a36Sopenharmony_ci break; 58562306a36Sopenharmony_ci default: 58662306a36Sopenharmony_ci BUG(); 58762306a36Sopenharmony_ci } 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci return 0; 59062306a36Sopenharmony_ci} 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_cistatic DEFINE_MUTEX(hugetlb_limit_mutex); 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_cistatic ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, 59562306a36Sopenharmony_ci char *buf, size_t nbytes, loff_t off, 59662306a36Sopenharmony_ci const char *max) 59762306a36Sopenharmony_ci{ 59862306a36Sopenharmony_ci int ret, idx; 59962306a36Sopenharmony_ci unsigned long nr_pages; 60062306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 60162306a36Sopenharmony_ci bool rsvd = false; 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ 60462306a36Sopenharmony_ci return -EINVAL; 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci buf = strstrip(buf); 60762306a36Sopenharmony_ci ret = page_counter_memparse(buf, max, &nr_pages); 60862306a36Sopenharmony_ci if (ret) 60962306a36Sopenharmony_ci return ret; 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci idx = MEMFILE_IDX(of_cft(of)->private); 61262306a36Sopenharmony_ci nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx])); 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci switch (MEMFILE_ATTR(of_cft(of)->private)) { 61562306a36Sopenharmony_ci case RES_RSVD_LIMIT: 61662306a36Sopenharmony_ci rsvd = true; 61762306a36Sopenharmony_ci fallthrough; 61862306a36Sopenharmony_ci case RES_LIMIT: 61962306a36Sopenharmony_ci mutex_lock(&hugetlb_limit_mutex); 62062306a36Sopenharmony_ci ret = page_counter_set_max( 62162306a36Sopenharmony_ci __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 62262306a36Sopenharmony_ci nr_pages); 62362306a36Sopenharmony_ci mutex_unlock(&hugetlb_limit_mutex); 62462306a36Sopenharmony_ci break; 62562306a36Sopenharmony_ci default: 62662306a36Sopenharmony_ci ret = -EINVAL; 62762306a36Sopenharmony_ci break; 62862306a36Sopenharmony_ci } 62962306a36Sopenharmony_ci return ret ?: nbytes; 63062306a36Sopenharmony_ci} 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_cistatic ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, 63362306a36Sopenharmony_ci char *buf, size_t nbytes, loff_t off) 63462306a36Sopenharmony_ci{ 63562306a36Sopenharmony_ci return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); 63662306a36Sopenharmony_ci} 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_cistatic ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, 63962306a36Sopenharmony_ci char *buf, size_t nbytes, loff_t off) 64062306a36Sopenharmony_ci{ 64162306a36Sopenharmony_ci return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); 64262306a36Sopenharmony_ci} 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_cistatic ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, 64562306a36Sopenharmony_ci char *buf, size_t nbytes, loff_t off) 64662306a36Sopenharmony_ci{ 64762306a36Sopenharmony_ci int ret = 0; 64862306a36Sopenharmony_ci struct page_counter *counter, *rsvd_counter; 64962306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; 65262306a36Sopenharmony_ci rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci switch (MEMFILE_ATTR(of_cft(of)->private)) { 65562306a36Sopenharmony_ci case RES_MAX_USAGE: 65662306a36Sopenharmony_ci page_counter_reset_watermark(counter); 65762306a36Sopenharmony_ci break; 65862306a36Sopenharmony_ci case RES_RSVD_MAX_USAGE: 65962306a36Sopenharmony_ci page_counter_reset_watermark(rsvd_counter); 66062306a36Sopenharmony_ci break; 66162306a36Sopenharmony_ci case RES_FAILCNT: 66262306a36Sopenharmony_ci counter->failcnt = 0; 66362306a36Sopenharmony_ci break; 66462306a36Sopenharmony_ci case RES_RSVD_FAILCNT: 66562306a36Sopenharmony_ci rsvd_counter->failcnt = 0; 66662306a36Sopenharmony_ci break; 66762306a36Sopenharmony_ci default: 66862306a36Sopenharmony_ci ret = -EINVAL; 66962306a36Sopenharmony_ci break; 67062306a36Sopenharmony_ci } 67162306a36Sopenharmony_ci return ret ?: nbytes; 67262306a36Sopenharmony_ci} 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_cistatic char *mem_fmt(char *buf, int size, unsigned long hsize) 67562306a36Sopenharmony_ci{ 67662306a36Sopenharmony_ci if (hsize >= SZ_1G) 67762306a36Sopenharmony_ci snprintf(buf, size, "%luGB", hsize / SZ_1G); 67862306a36Sopenharmony_ci else if (hsize >= SZ_1M) 67962306a36Sopenharmony_ci snprintf(buf, size, "%luMB", hsize / SZ_1M); 68062306a36Sopenharmony_ci else 68162306a36Sopenharmony_ci snprintf(buf, size, "%luKB", hsize / SZ_1K); 68262306a36Sopenharmony_ci return buf; 68362306a36Sopenharmony_ci} 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_cistatic int __hugetlb_events_show(struct seq_file *seq, bool local) 68662306a36Sopenharmony_ci{ 68762306a36Sopenharmony_ci int idx; 68862306a36Sopenharmony_ci long max; 68962306a36Sopenharmony_ci struct cftype *cft = seq_cft(seq); 69062306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci idx = MEMFILE_IDX(cft->private); 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci if (local) 69562306a36Sopenharmony_ci max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); 69662306a36Sopenharmony_ci else 69762306a36Sopenharmony_ci max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci seq_printf(seq, "max %lu\n", max); 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci return 0; 70262306a36Sopenharmony_ci} 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_cistatic int hugetlb_events_show(struct seq_file *seq, void *v) 70562306a36Sopenharmony_ci{ 70662306a36Sopenharmony_ci return __hugetlb_events_show(seq, false); 70762306a36Sopenharmony_ci} 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_cistatic int hugetlb_events_local_show(struct seq_file *seq, void *v) 71062306a36Sopenharmony_ci{ 71162306a36Sopenharmony_ci return __hugetlb_events_show(seq, true); 71262306a36Sopenharmony_ci} 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_cistatic void __init __hugetlb_cgroup_file_dfl_init(int idx) 71562306a36Sopenharmony_ci{ 71662306a36Sopenharmony_ci char buf[32]; 71762306a36Sopenharmony_ci struct cftype *cft; 71862306a36Sopenharmony_ci struct hstate *h = &hstates[idx]; 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci /* format the size */ 72162306a36Sopenharmony_ci mem_fmt(buf, sizeof(buf), huge_page_size(h)); 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci /* Add the limit file */ 72462306a36Sopenharmony_ci cft = &h->cgroup_files_dfl[0]; 72562306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf); 72662306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 72762306a36Sopenharmony_ci cft->seq_show = hugetlb_cgroup_read_u64_max; 72862306a36Sopenharmony_ci cft->write = hugetlb_cgroup_write_dfl; 72962306a36Sopenharmony_ci cft->flags = CFTYPE_NOT_ON_ROOT; 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci /* Add the reservation limit file */ 73262306a36Sopenharmony_ci cft = &h->cgroup_files_dfl[1]; 73362306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf); 73462306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 73562306a36Sopenharmony_ci cft->seq_show = hugetlb_cgroup_read_u64_max; 73662306a36Sopenharmony_ci cft->write = hugetlb_cgroup_write_dfl; 73762306a36Sopenharmony_ci cft->flags = CFTYPE_NOT_ON_ROOT; 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci /* Add the current usage file */ 74062306a36Sopenharmony_ci cft = &h->cgroup_files_dfl[2]; 74162306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf); 74262306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 74362306a36Sopenharmony_ci cft->seq_show = hugetlb_cgroup_read_u64_max; 74462306a36Sopenharmony_ci cft->flags = CFTYPE_NOT_ON_ROOT; 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci /* Add the current reservation usage file */ 74762306a36Sopenharmony_ci cft = &h->cgroup_files_dfl[3]; 74862306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf); 74962306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 75062306a36Sopenharmony_ci cft->seq_show = hugetlb_cgroup_read_u64_max; 75162306a36Sopenharmony_ci cft->flags = CFTYPE_NOT_ON_ROOT; 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci /* Add the events file */ 75462306a36Sopenharmony_ci cft = &h->cgroup_files_dfl[4]; 75562306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf); 75662306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, 0); 75762306a36Sopenharmony_ci cft->seq_show = hugetlb_events_show; 75862306a36Sopenharmony_ci cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]); 75962306a36Sopenharmony_ci cft->flags = CFTYPE_NOT_ON_ROOT; 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci /* Add the events.local file */ 76262306a36Sopenharmony_ci cft = &h->cgroup_files_dfl[5]; 76362306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf); 76462306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, 0); 76562306a36Sopenharmony_ci cft->seq_show = hugetlb_events_local_show; 76662306a36Sopenharmony_ci cft->file_offset = offsetof(struct hugetlb_cgroup, 76762306a36Sopenharmony_ci events_local_file[idx]); 76862306a36Sopenharmony_ci cft->flags = CFTYPE_NOT_ON_ROOT; 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci /* Add the numa stat file */ 77162306a36Sopenharmony_ci cft = &h->cgroup_files_dfl[6]; 77262306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf); 77362306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, 0); 77462306a36Sopenharmony_ci cft->seq_show = hugetlb_cgroup_read_numa_stat; 77562306a36Sopenharmony_ci cft->flags = CFTYPE_NOT_ON_ROOT; 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci /* NULL terminate the last cft */ 77862306a36Sopenharmony_ci cft = &h->cgroup_files_dfl[7]; 77962306a36Sopenharmony_ci memset(cft, 0, sizeof(*cft)); 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, 78262306a36Sopenharmony_ci h->cgroup_files_dfl)); 78362306a36Sopenharmony_ci} 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_cistatic void __init __hugetlb_cgroup_file_legacy_init(int idx) 78662306a36Sopenharmony_ci{ 78762306a36Sopenharmony_ci char buf[32]; 78862306a36Sopenharmony_ci struct cftype *cft; 78962306a36Sopenharmony_ci struct hstate *h = &hstates[idx]; 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_ci /* format the size */ 79262306a36Sopenharmony_ci mem_fmt(buf, sizeof(buf), huge_page_size(h)); 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci /* Add the limit file */ 79562306a36Sopenharmony_ci cft = &h->cgroup_files_legacy[0]; 79662306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); 79762306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 79862306a36Sopenharmony_ci cft->read_u64 = hugetlb_cgroup_read_u64; 79962306a36Sopenharmony_ci cft->write = hugetlb_cgroup_write_legacy; 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci /* Add the reservation limit file */ 80262306a36Sopenharmony_ci cft = &h->cgroup_files_legacy[1]; 80362306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf); 80462306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 80562306a36Sopenharmony_ci cft->read_u64 = hugetlb_cgroup_read_u64; 80662306a36Sopenharmony_ci cft->write = hugetlb_cgroup_write_legacy; 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci /* Add the usage file */ 80962306a36Sopenharmony_ci cft = &h->cgroup_files_legacy[2]; 81062306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); 81162306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 81262306a36Sopenharmony_ci cft->read_u64 = hugetlb_cgroup_read_u64; 81362306a36Sopenharmony_ci 81462306a36Sopenharmony_ci /* Add the reservation usage file */ 81562306a36Sopenharmony_ci cft = &h->cgroup_files_legacy[3]; 81662306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf); 81762306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 81862306a36Sopenharmony_ci cft->read_u64 = hugetlb_cgroup_read_u64; 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci /* Add the MAX usage file */ 82162306a36Sopenharmony_ci cft = &h->cgroup_files_legacy[4]; 82262306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); 82362306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); 82462306a36Sopenharmony_ci cft->write = hugetlb_cgroup_reset; 82562306a36Sopenharmony_ci cft->read_u64 = hugetlb_cgroup_read_u64; 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci /* Add the MAX reservation usage file */ 82862306a36Sopenharmony_ci cft = &h->cgroup_files_legacy[5]; 82962306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf); 83062306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE); 83162306a36Sopenharmony_ci cft->write = hugetlb_cgroup_reset; 83262306a36Sopenharmony_ci cft->read_u64 = hugetlb_cgroup_read_u64; 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci /* Add the failcntfile */ 83562306a36Sopenharmony_ci cft = &h->cgroup_files_legacy[6]; 83662306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); 83762306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); 83862306a36Sopenharmony_ci cft->write = hugetlb_cgroup_reset; 83962306a36Sopenharmony_ci cft->read_u64 = hugetlb_cgroup_read_u64; 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci /* Add the reservation failcntfile */ 84262306a36Sopenharmony_ci cft = &h->cgroup_files_legacy[7]; 84362306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf); 84462306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT); 84562306a36Sopenharmony_ci cft->write = hugetlb_cgroup_reset; 84662306a36Sopenharmony_ci cft->read_u64 = hugetlb_cgroup_read_u64; 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci /* Add the numa stat file */ 84962306a36Sopenharmony_ci cft = &h->cgroup_files_legacy[8]; 85062306a36Sopenharmony_ci snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf); 85162306a36Sopenharmony_ci cft->private = MEMFILE_PRIVATE(idx, 1); 85262306a36Sopenharmony_ci cft->seq_show = hugetlb_cgroup_read_numa_stat; 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_ci /* NULL terminate the last cft */ 85562306a36Sopenharmony_ci cft = &h->cgroup_files_legacy[9]; 85662306a36Sopenharmony_ci memset(cft, 0, sizeof(*cft)); 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, 85962306a36Sopenharmony_ci h->cgroup_files_legacy)); 86062306a36Sopenharmony_ci} 86162306a36Sopenharmony_ci 86262306a36Sopenharmony_cistatic void __init __hugetlb_cgroup_file_init(int idx) 86362306a36Sopenharmony_ci{ 86462306a36Sopenharmony_ci __hugetlb_cgroup_file_dfl_init(idx); 86562306a36Sopenharmony_ci __hugetlb_cgroup_file_legacy_init(idx); 86662306a36Sopenharmony_ci} 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_civoid __init hugetlb_cgroup_file_init(void) 86962306a36Sopenharmony_ci{ 87062306a36Sopenharmony_ci struct hstate *h; 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci for_each_hstate(h) { 87362306a36Sopenharmony_ci /* 87462306a36Sopenharmony_ci * Add cgroup control files only if the huge page consists 87562306a36Sopenharmony_ci * of more than two normal pages. This is because we use 87662306a36Sopenharmony_ci * page[2].private for storing cgroup details. 87762306a36Sopenharmony_ci */ 87862306a36Sopenharmony_ci if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER) 87962306a36Sopenharmony_ci __hugetlb_cgroup_file_init(hstate_index(h)); 88062306a36Sopenharmony_ci } 88162306a36Sopenharmony_ci} 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci/* 88462306a36Sopenharmony_ci * hugetlb_lock will make sure a parallel cgroup rmdir won't happen 88562306a36Sopenharmony_ci * when we migrate hugepages 88662306a36Sopenharmony_ci */ 88762306a36Sopenharmony_civoid hugetlb_cgroup_migrate(struct folio *old_folio, struct folio *new_folio) 88862306a36Sopenharmony_ci{ 88962306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg; 89062306a36Sopenharmony_ci struct hugetlb_cgroup *h_cg_rsvd; 89162306a36Sopenharmony_ci struct hstate *h = folio_hstate(old_folio); 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci if (hugetlb_cgroup_disabled()) 89462306a36Sopenharmony_ci return; 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci spin_lock_irq(&hugetlb_lock); 89762306a36Sopenharmony_ci h_cg = hugetlb_cgroup_from_folio(old_folio); 89862306a36Sopenharmony_ci h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio); 89962306a36Sopenharmony_ci set_hugetlb_cgroup(old_folio, NULL); 90062306a36Sopenharmony_ci set_hugetlb_cgroup_rsvd(old_folio, NULL); 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci /* move the h_cg details to new cgroup */ 90362306a36Sopenharmony_ci set_hugetlb_cgroup(new_folio, h_cg); 90462306a36Sopenharmony_ci set_hugetlb_cgroup_rsvd(new_folio, h_cg_rsvd); 90562306a36Sopenharmony_ci list_move(&new_folio->lru, &h->hugepage_activelist); 90662306a36Sopenharmony_ci spin_unlock_irq(&hugetlb_lock); 90762306a36Sopenharmony_ci return; 90862306a36Sopenharmony_ci} 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_cistatic struct cftype hugetlb_files[] = { 91162306a36Sopenharmony_ci {} /* terminate */ 91262306a36Sopenharmony_ci}; 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_cistruct cgroup_subsys hugetlb_cgrp_subsys = { 91562306a36Sopenharmony_ci .css_alloc = hugetlb_cgroup_css_alloc, 91662306a36Sopenharmony_ci .css_offline = hugetlb_cgroup_css_offline, 91762306a36Sopenharmony_ci .css_free = hugetlb_cgroup_css_free, 91862306a36Sopenharmony_ci .dfl_cftypes = hugetlb_files, 91962306a36Sopenharmony_ci .legacy_cftypes = hugetlb_files, 92062306a36Sopenharmony_ci}; 921