162306a36Sopenharmony_ci=================================================== 262306a36Sopenharmony_ciAdding reference counters (krefs) to kernel objects 362306a36Sopenharmony_ci=================================================== 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci:Author: Corey Minyard <minyard@acm.org> 662306a36Sopenharmony_ci:Author: Thomas Hellstrom <thellstrom@vmware.com> 762306a36Sopenharmony_ci 862306a36Sopenharmony_ciA lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and 962306a36Sopenharmony_cipresentation on krefs, which can be found at: 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci - http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf 1262306a36Sopenharmony_ci - http://www.kroah.com/linux/talks/ols_2004_kref_talk/ 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ciIntroduction 1562306a36Sopenharmony_ci============ 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_cikrefs allow you to add reference counters to your objects. If you 1862306a36Sopenharmony_cihave objects that are used in multiple places and passed around, and 1962306a36Sopenharmony_ciyou don't have refcounts, your code is almost certainly broken. If 2062306a36Sopenharmony_ciyou want refcounts, krefs are the way to go. 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ciTo use a kref, add one to your data structures like:: 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci struct my_data 2562306a36Sopenharmony_ci { 2662306a36Sopenharmony_ci . 2762306a36Sopenharmony_ci . 2862306a36Sopenharmony_ci struct kref refcount; 2962306a36Sopenharmony_ci . 3062306a36Sopenharmony_ci . 3162306a36Sopenharmony_ci }; 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ciThe kref can occur anywhere within the data structure. 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ciInitialization 3662306a36Sopenharmony_ci============== 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ciYou must initialize the kref after you allocate it. To do this, call 3962306a36Sopenharmony_cikref_init as so:: 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci struct my_data *data; 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci data = kmalloc(sizeof(*data), GFP_KERNEL); 4462306a36Sopenharmony_ci if (!data) 4562306a36Sopenharmony_ci return -ENOMEM; 4662306a36Sopenharmony_ci kref_init(&data->refcount); 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ciThis sets the refcount in the kref to 1. 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ciKref rules 5162306a36Sopenharmony_ci========== 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ciOnce you have an initialized kref, you must follow the following 5462306a36Sopenharmony_cirules: 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci1) If you make a non-temporary copy of a pointer, especially if 5762306a36Sopenharmony_ci it can be passed to another thread of execution, you must 5862306a36Sopenharmony_ci increment the refcount with kref_get() before passing it off:: 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci kref_get(&data->refcount); 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci If you already have a valid pointer to a kref-ed structure (the 6362306a36Sopenharmony_ci refcount cannot go to zero) you may do this without a lock. 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci2) When you are done with a pointer, you must call kref_put():: 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci kref_put(&data->refcount, data_release); 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci If this is the last reference to the pointer, the release 7062306a36Sopenharmony_ci routine will be called. If the code never tries to get 7162306a36Sopenharmony_ci a valid pointer to a kref-ed structure without already 7262306a36Sopenharmony_ci holding a valid pointer, it is safe to do this without 7362306a36Sopenharmony_ci a lock. 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci3) If the code attempts to gain a reference to a kref-ed structure 7662306a36Sopenharmony_ci without already holding a valid pointer, it must serialize access 7762306a36Sopenharmony_ci where a kref_put() cannot occur during the kref_get(), and the 7862306a36Sopenharmony_ci structure must remain valid during the kref_get(). 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ciFor example, if you allocate some data and then pass it to another 8162306a36Sopenharmony_cithread to process:: 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci void data_release(struct kref *ref) 8462306a36Sopenharmony_ci { 8562306a36Sopenharmony_ci struct my_data *data = container_of(ref, struct my_data, refcount); 8662306a36Sopenharmony_ci kfree(data); 8762306a36Sopenharmony_ci } 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci void more_data_handling(void *cb_data) 9062306a36Sopenharmony_ci { 9162306a36Sopenharmony_ci struct my_data *data = cb_data; 9262306a36Sopenharmony_ci . 9362306a36Sopenharmony_ci . do stuff with data here 9462306a36Sopenharmony_ci . 9562306a36Sopenharmony_ci kref_put(&data->refcount, data_release); 9662306a36Sopenharmony_ci } 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci int my_data_handler(void) 9962306a36Sopenharmony_ci { 10062306a36Sopenharmony_ci int rv = 0; 10162306a36Sopenharmony_ci struct my_data *data; 10262306a36Sopenharmony_ci struct task_struct *task; 10362306a36Sopenharmony_ci data = kmalloc(sizeof(*data), GFP_KERNEL); 10462306a36Sopenharmony_ci if (!data) 10562306a36Sopenharmony_ci return -ENOMEM; 10662306a36Sopenharmony_ci kref_init(&data->refcount); 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci kref_get(&data->refcount); 10962306a36Sopenharmony_ci task = kthread_run(more_data_handling, data, "more_data_handling"); 11062306a36Sopenharmony_ci if (task == ERR_PTR(-ENOMEM)) { 11162306a36Sopenharmony_ci rv = -ENOMEM; 11262306a36Sopenharmony_ci kref_put(&data->refcount, data_release); 11362306a36Sopenharmony_ci goto out; 11462306a36Sopenharmony_ci } 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci . 11762306a36Sopenharmony_ci . do stuff with data here 11862306a36Sopenharmony_ci . 11962306a36Sopenharmony_ci out: 12062306a36Sopenharmony_ci kref_put(&data->refcount, data_release); 12162306a36Sopenharmony_ci return rv; 12262306a36Sopenharmony_ci } 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ciThis way, it doesn't matter what order the two threads handle the 12562306a36Sopenharmony_cidata, the kref_put() handles knowing when the data is not referenced 12662306a36Sopenharmony_ciany more and releasing it. The kref_get() does not require a lock, 12762306a36Sopenharmony_cisince we already have a valid pointer that we own a refcount for. The 12862306a36Sopenharmony_ciput needs no lock because nothing tries to get the data without 12962306a36Sopenharmony_cialready holding a pointer. 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ciIn the above example, kref_put() will be called 2 times in both success 13262306a36Sopenharmony_ciand error paths. This is necessary because the reference count got 13362306a36Sopenharmony_ciincremented 2 times by kref_init() and kref_get(). 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ciNote that the "before" in rule 1 is very important. You should never 13662306a36Sopenharmony_cido something like:: 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci task = kthread_run(more_data_handling, data, "more_data_handling"); 13962306a36Sopenharmony_ci if (task == ERR_PTR(-ENOMEM)) { 14062306a36Sopenharmony_ci rv = -ENOMEM; 14162306a36Sopenharmony_ci goto out; 14262306a36Sopenharmony_ci } else 14362306a36Sopenharmony_ci /* BAD BAD BAD - get is after the handoff */ 14462306a36Sopenharmony_ci kref_get(&data->refcount); 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ciDon't assume you know what you are doing and use the above construct. 14762306a36Sopenharmony_ciFirst of all, you may not know what you are doing. Second, you may 14862306a36Sopenharmony_ciknow what you are doing (there are some situations where locking is 14962306a36Sopenharmony_ciinvolved where the above may be legal) but someone else who doesn't 15062306a36Sopenharmony_ciknow what they are doing may change the code or copy the code. It's 15162306a36Sopenharmony_cibad style. Don't do it. 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ciThere are some situations where you can optimize the gets and puts. 15462306a36Sopenharmony_ciFor instance, if you are done with an object and enqueuing it for 15562306a36Sopenharmony_cisomething else or passing it off to something else, there is no reason 15662306a36Sopenharmony_cito do a get then a put:: 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci /* Silly extra get and put */ 15962306a36Sopenharmony_ci kref_get(&obj->ref); 16062306a36Sopenharmony_ci enqueue(obj); 16162306a36Sopenharmony_ci kref_put(&obj->ref, obj_cleanup); 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ciJust do the enqueue. A comment about this is always welcome:: 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci enqueue(obj); 16662306a36Sopenharmony_ci /* We are done with obj, so we pass our refcount off 16762306a36Sopenharmony_ci to the queue. DON'T TOUCH obj AFTER HERE! */ 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ciThe last rule (rule 3) is the nastiest one to handle. Say, for 17062306a36Sopenharmony_ciinstance, you have a list of items that are each kref-ed, and you wish 17162306a36Sopenharmony_cito get the first one. You can't just pull the first item off the list 17262306a36Sopenharmony_ciand kref_get() it. That violates rule 3 because you are not already 17362306a36Sopenharmony_ciholding a valid pointer. You must add a mutex (or some other lock). 17462306a36Sopenharmony_ciFor instance:: 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci static DEFINE_MUTEX(mutex); 17762306a36Sopenharmony_ci static LIST_HEAD(q); 17862306a36Sopenharmony_ci struct my_data 17962306a36Sopenharmony_ci { 18062306a36Sopenharmony_ci struct kref refcount; 18162306a36Sopenharmony_ci struct list_head link; 18262306a36Sopenharmony_ci }; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci static struct my_data *get_entry() 18562306a36Sopenharmony_ci { 18662306a36Sopenharmony_ci struct my_data *entry = NULL; 18762306a36Sopenharmony_ci mutex_lock(&mutex); 18862306a36Sopenharmony_ci if (!list_empty(&q)) { 18962306a36Sopenharmony_ci entry = container_of(q.next, struct my_data, link); 19062306a36Sopenharmony_ci kref_get(&entry->refcount); 19162306a36Sopenharmony_ci } 19262306a36Sopenharmony_ci mutex_unlock(&mutex); 19362306a36Sopenharmony_ci return entry; 19462306a36Sopenharmony_ci } 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci static void release_entry(struct kref *ref) 19762306a36Sopenharmony_ci { 19862306a36Sopenharmony_ci struct my_data *entry = container_of(ref, struct my_data, refcount); 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci list_del(&entry->link); 20162306a36Sopenharmony_ci kfree(entry); 20262306a36Sopenharmony_ci } 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci static void put_entry(struct my_data *entry) 20562306a36Sopenharmony_ci { 20662306a36Sopenharmony_ci mutex_lock(&mutex); 20762306a36Sopenharmony_ci kref_put(&entry->refcount, release_entry); 20862306a36Sopenharmony_ci mutex_unlock(&mutex); 20962306a36Sopenharmony_ci } 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ciThe kref_put() return value is useful if you do not want to hold the 21262306a36Sopenharmony_cilock during the whole release operation. Say you didn't want to call 21362306a36Sopenharmony_cikfree() with the lock held in the example above (since it is kind of 21462306a36Sopenharmony_cipointless to do so). You could use kref_put() as follows:: 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci static void release_entry(struct kref *ref) 21762306a36Sopenharmony_ci { 21862306a36Sopenharmony_ci /* All work is done after the return from kref_put(). */ 21962306a36Sopenharmony_ci } 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci static void put_entry(struct my_data *entry) 22262306a36Sopenharmony_ci { 22362306a36Sopenharmony_ci mutex_lock(&mutex); 22462306a36Sopenharmony_ci if (kref_put(&entry->refcount, release_entry)) { 22562306a36Sopenharmony_ci list_del(&entry->link); 22662306a36Sopenharmony_ci mutex_unlock(&mutex); 22762306a36Sopenharmony_ci kfree(entry); 22862306a36Sopenharmony_ci } else 22962306a36Sopenharmony_ci mutex_unlock(&mutex); 23062306a36Sopenharmony_ci } 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ciThis is really more useful if you have to call other routines as part 23362306a36Sopenharmony_ciof the free operations that could take a long time or might claim the 23462306a36Sopenharmony_cisame lock. Note that doing everything in the release routine is still 23562306a36Sopenharmony_cipreferred as it is a little neater. 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ciThe above example could also be optimized using kref_get_unless_zero() in 23862306a36Sopenharmony_cithe following way:: 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci static struct my_data *get_entry() 24162306a36Sopenharmony_ci { 24262306a36Sopenharmony_ci struct my_data *entry = NULL; 24362306a36Sopenharmony_ci mutex_lock(&mutex); 24462306a36Sopenharmony_ci if (!list_empty(&q)) { 24562306a36Sopenharmony_ci entry = container_of(q.next, struct my_data, link); 24662306a36Sopenharmony_ci if (!kref_get_unless_zero(&entry->refcount)) 24762306a36Sopenharmony_ci entry = NULL; 24862306a36Sopenharmony_ci } 24962306a36Sopenharmony_ci mutex_unlock(&mutex); 25062306a36Sopenharmony_ci return entry; 25162306a36Sopenharmony_ci } 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci static void release_entry(struct kref *ref) 25462306a36Sopenharmony_ci { 25562306a36Sopenharmony_ci struct my_data *entry = container_of(ref, struct my_data, refcount); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci mutex_lock(&mutex); 25862306a36Sopenharmony_ci list_del(&entry->link); 25962306a36Sopenharmony_ci mutex_unlock(&mutex); 26062306a36Sopenharmony_ci kfree(entry); 26162306a36Sopenharmony_ci } 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci static void put_entry(struct my_data *entry) 26462306a36Sopenharmony_ci { 26562306a36Sopenharmony_ci kref_put(&entry->refcount, release_entry); 26662306a36Sopenharmony_ci } 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ciWhich is useful to remove the mutex lock around kref_put() in put_entry(), but 26962306a36Sopenharmony_ciit's important that kref_get_unless_zero is enclosed in the same critical 27062306a36Sopenharmony_cisection that finds the entry in the lookup table, 27162306a36Sopenharmony_ciotherwise kref_get_unless_zero may reference already freed memory. 27262306a36Sopenharmony_ciNote that it is illegal to use kref_get_unless_zero without checking its 27362306a36Sopenharmony_cireturn value. If you are sure (by already having a valid pointer) that 27462306a36Sopenharmony_cikref_get_unless_zero() will return true, then use kref_get() instead. 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ciKrefs and RCU 27762306a36Sopenharmony_ci============= 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ciThe function kref_get_unless_zero also makes it possible to use rcu 28062306a36Sopenharmony_cilocking for lookups in the above example:: 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci struct my_data 28362306a36Sopenharmony_ci { 28462306a36Sopenharmony_ci struct rcu_head rhead; 28562306a36Sopenharmony_ci . 28662306a36Sopenharmony_ci struct kref refcount; 28762306a36Sopenharmony_ci . 28862306a36Sopenharmony_ci . 28962306a36Sopenharmony_ci }; 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci static struct my_data *get_entry_rcu() 29262306a36Sopenharmony_ci { 29362306a36Sopenharmony_ci struct my_data *entry = NULL; 29462306a36Sopenharmony_ci rcu_read_lock(); 29562306a36Sopenharmony_ci if (!list_empty(&q)) { 29662306a36Sopenharmony_ci entry = container_of(q.next, struct my_data, link); 29762306a36Sopenharmony_ci if (!kref_get_unless_zero(&entry->refcount)) 29862306a36Sopenharmony_ci entry = NULL; 29962306a36Sopenharmony_ci } 30062306a36Sopenharmony_ci rcu_read_unlock(); 30162306a36Sopenharmony_ci return entry; 30262306a36Sopenharmony_ci } 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci static void release_entry_rcu(struct kref *ref) 30562306a36Sopenharmony_ci { 30662306a36Sopenharmony_ci struct my_data *entry = container_of(ref, struct my_data, refcount); 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci mutex_lock(&mutex); 30962306a36Sopenharmony_ci list_del_rcu(&entry->link); 31062306a36Sopenharmony_ci mutex_unlock(&mutex); 31162306a36Sopenharmony_ci kfree_rcu(entry, rhead); 31262306a36Sopenharmony_ci } 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci static void put_entry(struct my_data *entry) 31562306a36Sopenharmony_ci { 31662306a36Sopenharmony_ci kref_put(&entry->refcount, release_entry_rcu); 31762306a36Sopenharmony_ci } 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ciBut note that the struct kref member needs to remain in valid memory for a 32062306a36Sopenharmony_circu grace period after release_entry_rcu was called. That can be accomplished 32162306a36Sopenharmony_ciby using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu() 32262306a36Sopenharmony_cibefore using kfree, but note that synchronize_rcu() may sleep for a 32362306a36Sopenharmony_cisubstantial amount of time. 324