162306a36Sopenharmony_ci===================================================
262306a36Sopenharmony_ciAdding reference counters (krefs) to kernel objects
362306a36Sopenharmony_ci===================================================
462306a36Sopenharmony_ci
562306a36Sopenharmony_ci:Author: Corey Minyard <minyard@acm.org>
662306a36Sopenharmony_ci:Author: Thomas Hellstrom <thellstrom@vmware.com>
762306a36Sopenharmony_ci
862306a36Sopenharmony_ciA lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
962306a36Sopenharmony_cipresentation on krefs, which can be found at:
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci  - http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf
1262306a36Sopenharmony_ci  - http://www.kroah.com/linux/talks/ols_2004_kref_talk/
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ciIntroduction
1562306a36Sopenharmony_ci============
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_cikrefs allow you to add reference counters to your objects.  If you
1862306a36Sopenharmony_cihave objects that are used in multiple places and passed around, and
1962306a36Sopenharmony_ciyou don't have refcounts, your code is almost certainly broken.  If
2062306a36Sopenharmony_ciyou want refcounts, krefs are the way to go.
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ciTo use a kref, add one to your data structures like::
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci    struct my_data
2562306a36Sopenharmony_ci    {
2662306a36Sopenharmony_ci	.
2762306a36Sopenharmony_ci	.
2862306a36Sopenharmony_ci	struct kref refcount;
2962306a36Sopenharmony_ci	.
3062306a36Sopenharmony_ci	.
3162306a36Sopenharmony_ci    };
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ciThe kref can occur anywhere within the data structure.
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ciInitialization
3662306a36Sopenharmony_ci==============
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ciYou must initialize the kref after you allocate it.  To do this, call
3962306a36Sopenharmony_cikref_init as so::
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci     struct my_data *data;
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci     data = kmalloc(sizeof(*data), GFP_KERNEL);
4462306a36Sopenharmony_ci     if (!data)
4562306a36Sopenharmony_ci            return -ENOMEM;
4662306a36Sopenharmony_ci     kref_init(&data->refcount);
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ciThis sets the refcount in the kref to 1.
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ciKref rules
5162306a36Sopenharmony_ci==========
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ciOnce you have an initialized kref, you must follow the following
5462306a36Sopenharmony_cirules:
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci1) If you make a non-temporary copy of a pointer, especially if
5762306a36Sopenharmony_ci   it can be passed to another thread of execution, you must
5862306a36Sopenharmony_ci   increment the refcount with kref_get() before passing it off::
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci       kref_get(&data->refcount);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci   If you already have a valid pointer to a kref-ed structure (the
6362306a36Sopenharmony_ci   refcount cannot go to zero) you may do this without a lock.
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci2) When you are done with a pointer, you must call kref_put()::
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci       kref_put(&data->refcount, data_release);
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci   If this is the last reference to the pointer, the release
7062306a36Sopenharmony_ci   routine will be called.  If the code never tries to get
7162306a36Sopenharmony_ci   a valid pointer to a kref-ed structure without already
7262306a36Sopenharmony_ci   holding a valid pointer, it is safe to do this without
7362306a36Sopenharmony_ci   a lock.
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci3) If the code attempts to gain a reference to a kref-ed structure
7662306a36Sopenharmony_ci   without already holding a valid pointer, it must serialize access
7762306a36Sopenharmony_ci   where a kref_put() cannot occur during the kref_get(), and the
7862306a36Sopenharmony_ci   structure must remain valid during the kref_get().
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ciFor example, if you allocate some data and then pass it to another
8162306a36Sopenharmony_cithread to process::
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci    void data_release(struct kref *ref)
8462306a36Sopenharmony_ci    {
8562306a36Sopenharmony_ci	struct my_data *data = container_of(ref, struct my_data, refcount);
8662306a36Sopenharmony_ci	kfree(data);
8762306a36Sopenharmony_ci    }
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci    void more_data_handling(void *cb_data)
9062306a36Sopenharmony_ci    {
9162306a36Sopenharmony_ci	struct my_data *data = cb_data;
9262306a36Sopenharmony_ci	.
9362306a36Sopenharmony_ci	. do stuff with data here
9462306a36Sopenharmony_ci	.
9562306a36Sopenharmony_ci	kref_put(&data->refcount, data_release);
9662306a36Sopenharmony_ci    }
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci    int my_data_handler(void)
9962306a36Sopenharmony_ci    {
10062306a36Sopenharmony_ci	int rv = 0;
10162306a36Sopenharmony_ci	struct my_data *data;
10262306a36Sopenharmony_ci	struct task_struct *task;
10362306a36Sopenharmony_ci	data = kmalloc(sizeof(*data), GFP_KERNEL);
10462306a36Sopenharmony_ci	if (!data)
10562306a36Sopenharmony_ci		return -ENOMEM;
10662306a36Sopenharmony_ci	kref_init(&data->refcount);
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	kref_get(&data->refcount);
10962306a36Sopenharmony_ci	task = kthread_run(more_data_handling, data, "more_data_handling");
11062306a36Sopenharmony_ci	if (task == ERR_PTR(-ENOMEM)) {
11162306a36Sopenharmony_ci		rv = -ENOMEM;
11262306a36Sopenharmony_ci	        kref_put(&data->refcount, data_release);
11362306a36Sopenharmony_ci		goto out;
11462306a36Sopenharmony_ci	}
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	.
11762306a36Sopenharmony_ci	. do stuff with data here
11862306a36Sopenharmony_ci	.
11962306a36Sopenharmony_ci    out:
12062306a36Sopenharmony_ci	kref_put(&data->refcount, data_release);
12162306a36Sopenharmony_ci	return rv;
12262306a36Sopenharmony_ci    }
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ciThis way, it doesn't matter what order the two threads handle the
12562306a36Sopenharmony_cidata, the kref_put() handles knowing when the data is not referenced
12662306a36Sopenharmony_ciany more and releasing it.  The kref_get() does not require a lock,
12762306a36Sopenharmony_cisince we already have a valid pointer that we own a refcount for.  The
12862306a36Sopenharmony_ciput needs no lock because nothing tries to get the data without
12962306a36Sopenharmony_cialready holding a pointer.
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ciIn the above example, kref_put() will be called 2 times in both success
13262306a36Sopenharmony_ciand error paths. This is necessary because the reference count got
13362306a36Sopenharmony_ciincremented 2 times by kref_init() and kref_get().
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ciNote that the "before" in rule 1 is very important.  You should never
13662306a36Sopenharmony_cido something like::
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	task = kthread_run(more_data_handling, data, "more_data_handling");
13962306a36Sopenharmony_ci	if (task == ERR_PTR(-ENOMEM)) {
14062306a36Sopenharmony_ci		rv = -ENOMEM;
14162306a36Sopenharmony_ci		goto out;
14262306a36Sopenharmony_ci	} else
14362306a36Sopenharmony_ci		/* BAD BAD BAD - get is after the handoff */
14462306a36Sopenharmony_ci		kref_get(&data->refcount);
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ciDon't assume you know what you are doing and use the above construct.
14762306a36Sopenharmony_ciFirst of all, you may not know what you are doing.  Second, you may
14862306a36Sopenharmony_ciknow what you are doing (there are some situations where locking is
14962306a36Sopenharmony_ciinvolved where the above may be legal) but someone else who doesn't
15062306a36Sopenharmony_ciknow what they are doing may change the code or copy the code.  It's
15162306a36Sopenharmony_cibad style.  Don't do it.
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ciThere are some situations where you can optimize the gets and puts.
15462306a36Sopenharmony_ciFor instance, if you are done with an object and enqueuing it for
15562306a36Sopenharmony_cisomething else or passing it off to something else, there is no reason
15662306a36Sopenharmony_cito do a get then a put::
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	/* Silly extra get and put */
15962306a36Sopenharmony_ci	kref_get(&obj->ref);
16062306a36Sopenharmony_ci	enqueue(obj);
16162306a36Sopenharmony_ci	kref_put(&obj->ref, obj_cleanup);
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ciJust do the enqueue.  A comment about this is always welcome::
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci	enqueue(obj);
16662306a36Sopenharmony_ci	/* We are done with obj, so we pass our refcount off
16762306a36Sopenharmony_ci	   to the queue.  DON'T TOUCH obj AFTER HERE! */
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ciThe last rule (rule 3) is the nastiest one to handle.  Say, for
17062306a36Sopenharmony_ciinstance, you have a list of items that are each kref-ed, and you wish
17162306a36Sopenharmony_cito get the first one.  You can't just pull the first item off the list
17262306a36Sopenharmony_ciand kref_get() it.  That violates rule 3 because you are not already
17362306a36Sopenharmony_ciholding a valid pointer.  You must add a mutex (or some other lock).
17462306a36Sopenharmony_ciFor instance::
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	static DEFINE_MUTEX(mutex);
17762306a36Sopenharmony_ci	static LIST_HEAD(q);
17862306a36Sopenharmony_ci	struct my_data
17962306a36Sopenharmony_ci	{
18062306a36Sopenharmony_ci		struct kref      refcount;
18162306a36Sopenharmony_ci		struct list_head link;
18262306a36Sopenharmony_ci	};
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	static struct my_data *get_entry()
18562306a36Sopenharmony_ci	{
18662306a36Sopenharmony_ci		struct my_data *entry = NULL;
18762306a36Sopenharmony_ci		mutex_lock(&mutex);
18862306a36Sopenharmony_ci		if (!list_empty(&q)) {
18962306a36Sopenharmony_ci			entry = container_of(q.next, struct my_data, link);
19062306a36Sopenharmony_ci			kref_get(&entry->refcount);
19162306a36Sopenharmony_ci		}
19262306a36Sopenharmony_ci		mutex_unlock(&mutex);
19362306a36Sopenharmony_ci		return entry;
19462306a36Sopenharmony_ci	}
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	static void release_entry(struct kref *ref)
19762306a36Sopenharmony_ci	{
19862306a36Sopenharmony_ci		struct my_data *entry = container_of(ref, struct my_data, refcount);
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci		list_del(&entry->link);
20162306a36Sopenharmony_ci		kfree(entry);
20262306a36Sopenharmony_ci	}
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	static void put_entry(struct my_data *entry)
20562306a36Sopenharmony_ci	{
20662306a36Sopenharmony_ci		mutex_lock(&mutex);
20762306a36Sopenharmony_ci		kref_put(&entry->refcount, release_entry);
20862306a36Sopenharmony_ci		mutex_unlock(&mutex);
20962306a36Sopenharmony_ci	}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ciThe kref_put() return value is useful if you do not want to hold the
21262306a36Sopenharmony_cilock during the whole release operation.  Say you didn't want to call
21362306a36Sopenharmony_cikfree() with the lock held in the example above (since it is kind of
21462306a36Sopenharmony_cipointless to do so).  You could use kref_put() as follows::
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	static void release_entry(struct kref *ref)
21762306a36Sopenharmony_ci	{
21862306a36Sopenharmony_ci		/* All work is done after the return from kref_put(). */
21962306a36Sopenharmony_ci	}
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	static void put_entry(struct my_data *entry)
22262306a36Sopenharmony_ci	{
22362306a36Sopenharmony_ci		mutex_lock(&mutex);
22462306a36Sopenharmony_ci		if (kref_put(&entry->refcount, release_entry)) {
22562306a36Sopenharmony_ci			list_del(&entry->link);
22662306a36Sopenharmony_ci			mutex_unlock(&mutex);
22762306a36Sopenharmony_ci			kfree(entry);
22862306a36Sopenharmony_ci		} else
22962306a36Sopenharmony_ci			mutex_unlock(&mutex);
23062306a36Sopenharmony_ci	}
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ciThis is really more useful if you have to call other routines as part
23362306a36Sopenharmony_ciof the free operations that could take a long time or might claim the
23462306a36Sopenharmony_cisame lock.  Note that doing everything in the release routine is still
23562306a36Sopenharmony_cipreferred as it is a little neater.
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ciThe above example could also be optimized using kref_get_unless_zero() in
23862306a36Sopenharmony_cithe following way::
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	static struct my_data *get_entry()
24162306a36Sopenharmony_ci	{
24262306a36Sopenharmony_ci		struct my_data *entry = NULL;
24362306a36Sopenharmony_ci		mutex_lock(&mutex);
24462306a36Sopenharmony_ci		if (!list_empty(&q)) {
24562306a36Sopenharmony_ci			entry = container_of(q.next, struct my_data, link);
24662306a36Sopenharmony_ci			if (!kref_get_unless_zero(&entry->refcount))
24762306a36Sopenharmony_ci				entry = NULL;
24862306a36Sopenharmony_ci		}
24962306a36Sopenharmony_ci		mutex_unlock(&mutex);
25062306a36Sopenharmony_ci		return entry;
25162306a36Sopenharmony_ci	}
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	static void release_entry(struct kref *ref)
25462306a36Sopenharmony_ci	{
25562306a36Sopenharmony_ci		struct my_data *entry = container_of(ref, struct my_data, refcount);
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci		mutex_lock(&mutex);
25862306a36Sopenharmony_ci		list_del(&entry->link);
25962306a36Sopenharmony_ci		mutex_unlock(&mutex);
26062306a36Sopenharmony_ci		kfree(entry);
26162306a36Sopenharmony_ci	}
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci	static void put_entry(struct my_data *entry)
26462306a36Sopenharmony_ci	{
26562306a36Sopenharmony_ci		kref_put(&entry->refcount, release_entry);
26662306a36Sopenharmony_ci	}
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ciWhich is useful to remove the mutex lock around kref_put() in put_entry(), but
26962306a36Sopenharmony_ciit's important that kref_get_unless_zero is enclosed in the same critical
27062306a36Sopenharmony_cisection that finds the entry in the lookup table,
27162306a36Sopenharmony_ciotherwise kref_get_unless_zero may reference already freed memory.
27262306a36Sopenharmony_ciNote that it is illegal to use kref_get_unless_zero without checking its
27362306a36Sopenharmony_cireturn value. If you are sure (by already having a valid pointer) that
27462306a36Sopenharmony_cikref_get_unless_zero() will return true, then use kref_get() instead.
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_ciKrefs and RCU
27762306a36Sopenharmony_ci=============
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ciThe function kref_get_unless_zero also makes it possible to use rcu
28062306a36Sopenharmony_cilocking for lookups in the above example::
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci	struct my_data
28362306a36Sopenharmony_ci	{
28462306a36Sopenharmony_ci		struct rcu_head rhead;
28562306a36Sopenharmony_ci		.
28662306a36Sopenharmony_ci		struct kref refcount;
28762306a36Sopenharmony_ci		.
28862306a36Sopenharmony_ci		.
28962306a36Sopenharmony_ci	};
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	static struct my_data *get_entry_rcu()
29262306a36Sopenharmony_ci	{
29362306a36Sopenharmony_ci		struct my_data *entry = NULL;
29462306a36Sopenharmony_ci		rcu_read_lock();
29562306a36Sopenharmony_ci		if (!list_empty(&q)) {
29662306a36Sopenharmony_ci			entry = container_of(q.next, struct my_data, link);
29762306a36Sopenharmony_ci			if (!kref_get_unless_zero(&entry->refcount))
29862306a36Sopenharmony_ci				entry = NULL;
29962306a36Sopenharmony_ci		}
30062306a36Sopenharmony_ci		rcu_read_unlock();
30162306a36Sopenharmony_ci		return entry;
30262306a36Sopenharmony_ci	}
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	static void release_entry_rcu(struct kref *ref)
30562306a36Sopenharmony_ci	{
30662306a36Sopenharmony_ci		struct my_data *entry = container_of(ref, struct my_data, refcount);
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci		mutex_lock(&mutex);
30962306a36Sopenharmony_ci		list_del_rcu(&entry->link);
31062306a36Sopenharmony_ci		mutex_unlock(&mutex);
31162306a36Sopenharmony_ci		kfree_rcu(entry, rhead);
31262306a36Sopenharmony_ci	}
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci	static void put_entry(struct my_data *entry)
31562306a36Sopenharmony_ci	{
31662306a36Sopenharmony_ci		kref_put(&entry->refcount, release_entry_rcu);
31762306a36Sopenharmony_ci	}
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ciBut note that the struct kref member needs to remain in valid memory for a
32062306a36Sopenharmony_circu grace period after release_entry_rcu was called. That can be accomplished
32162306a36Sopenharmony_ciby using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
32262306a36Sopenharmony_cibefore using kfree, but note that synchronize_rcu() may sleep for a
32362306a36Sopenharmony_cisubstantial amount of time.
324