1// SPDX-License-Identifier: GPL-2.0-or-later
2#include <linux/bug.h>
3#include <linux/compiler.h>
4#include <linux/export.h>
5#include <linux/percpu.h>
6#include <linux/processor.h>
7#include <linux/smp.h>
8#include <linux/topology.h>
9#include <linux/sched/clock.h>
10#include <asm/qspinlock.h>
11#include <asm/paravirt.h>
12
13#define MAX_NODES	4
14
15struct qnode {
16	struct qnode	*next;
17	struct qspinlock *lock;
18	int		cpu;
19	int		yield_cpu;
20	u8		locked; /* 1 if lock acquired */
21};
22
23struct qnodes {
24	int		count;
25	struct qnode nodes[MAX_NODES];
26};
27
28/* Tuning parameters */
29static int steal_spins __read_mostly = (1 << 5);
30static int remote_steal_spins __read_mostly = (1 << 2);
31#if _Q_SPIN_TRY_LOCK_STEAL == 1
32static const bool maybe_stealers = true;
33#else
34static bool maybe_stealers __read_mostly = true;
35#endif
36static int head_spins __read_mostly = (1 << 8);
37
38static bool pv_yield_owner __read_mostly = true;
39static bool pv_yield_allow_steal __read_mostly = false;
40static bool pv_spin_on_preempted_owner __read_mostly = false;
41static bool pv_sleepy_lock __read_mostly = true;
42static bool pv_sleepy_lock_sticky __read_mostly = false;
43static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
44static int pv_sleepy_lock_factor __read_mostly = 256;
45static bool pv_yield_prev __read_mostly = true;
46static bool pv_yield_propagate_owner __read_mostly = true;
47static bool pv_prod_head __read_mostly = false;
48
49static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
50static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
51
52#if _Q_SPIN_SPEC_BARRIER == 1
53#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
54#else
55#define spec_barrier() do { } while (0)
56#endif
57
58static __always_inline bool recently_sleepy(void)
59{
60	/* pv_sleepy_lock is true when this is called */
61	if (pv_sleepy_lock_interval_ns) {
62		u64 seen = this_cpu_read(sleepy_lock_seen_clock);
63
64		if (seen) {
65			u64 delta = sched_clock() - seen;
66			if (delta < pv_sleepy_lock_interval_ns)
67				return true;
68			this_cpu_write(sleepy_lock_seen_clock, 0);
69		}
70	}
71
72	return false;
73}
74
75static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
76{
77	if (paravirt && sleepy)
78		return steal_spins * pv_sleepy_lock_factor;
79	else
80		return steal_spins;
81}
82
83static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
84{
85	if (paravirt && sleepy)
86		return remote_steal_spins * pv_sleepy_lock_factor;
87	else
88		return remote_steal_spins;
89}
90
91static __always_inline int get_head_spins(bool paravirt, bool sleepy)
92{
93	if (paravirt && sleepy)
94		return head_spins * pv_sleepy_lock_factor;
95	else
96		return head_spins;
97}
98
99static inline u32 encode_tail_cpu(int cpu)
100{
101	return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
102}
103
104static inline int decode_tail_cpu(u32 val)
105{
106	return (val >> _Q_TAIL_CPU_OFFSET) - 1;
107}
108
109static inline int get_owner_cpu(u32 val)
110{
111	return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
112}
113
114/*
115 * Try to acquire the lock if it was not already locked. If the tail matches
116 * mytail then clear it, otherwise leave it unchnaged. Return previous value.
117 *
118 * This is used by the head of the queue to acquire the lock and clean up
119 * its tail if it was the last one queued.
120 */
121static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
122{
123	u32 newval = queued_spin_encode_locked_val();
124	u32 prev, tmp;
125
126	asm volatile(
127"1:	lwarx	%0,0,%2,%7	# trylock_clean_tail			\n"
128	/* This test is necessary if there could be stealers */
129"	andi.	%1,%0,%5						\n"
130"	bne	3f							\n"
131	/* Test whether the lock tail == mytail */
132"	and	%1,%0,%6						\n"
133"	cmpw	0,%1,%3							\n"
134	/* Merge the new locked value */
135"	or	%1,%1,%4						\n"
136"	bne	2f							\n"
137	/* If the lock tail matched, then clear it, otherwise leave it. */
138"	andc	%1,%1,%6						\n"
139"2:	stwcx.	%1,0,%2							\n"
140"	bne-	1b							\n"
141"\t"	PPC_ACQUIRE_BARRIER "						\n"
142"3:									\n"
143	: "=&r" (prev), "=&r" (tmp)
144	: "r" (&lock->val), "r"(tail), "r" (newval),
145	  "i" (_Q_LOCKED_VAL),
146	  "r" (_Q_TAIL_CPU_MASK),
147	  "i" (_Q_SPIN_EH_HINT)
148	: "cr0", "memory");
149
150	return prev;
151}
152
153/*
154 * Publish our tail, replacing previous tail. Return previous value.
155 *
156 * This provides a release barrier for publishing node, this pairs with the
157 * acquire barrier in get_tail_qnode() when the next CPU finds this tail
158 * value.
159 */
160static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
161{
162	u32 prev, tmp;
163
164	kcsan_release();
165
166	asm volatile(
167"\t"	PPC_RELEASE_BARRIER "						\n"
168"1:	lwarx	%0,0,%2		# publish_tail_cpu			\n"
169"	andc	%1,%0,%4						\n"
170"	or	%1,%1,%3						\n"
171"	stwcx.	%1,0,%2							\n"
172"	bne-	1b							\n"
173	: "=&r" (prev), "=&r"(tmp)
174	: "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
175	: "cr0", "memory");
176
177	return prev;
178}
179
180static __always_inline u32 set_mustq(struct qspinlock *lock)
181{
182	u32 prev;
183
184	asm volatile(
185"1:	lwarx	%0,0,%1		# set_mustq				\n"
186"	or	%0,%0,%2						\n"
187"	stwcx.	%0,0,%1							\n"
188"	bne-	1b							\n"
189	: "=&r" (prev)
190	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
191	: "cr0", "memory");
192
193	return prev;
194}
195
196static __always_inline u32 clear_mustq(struct qspinlock *lock)
197{
198	u32 prev;
199
200	asm volatile(
201"1:	lwarx	%0,0,%1		# clear_mustq				\n"
202"	andc	%0,%0,%2						\n"
203"	stwcx.	%0,0,%1							\n"
204"	bne-	1b							\n"
205	: "=&r" (prev)
206	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
207	: "cr0", "memory");
208
209	return prev;
210}
211
212static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
213{
214	u32 prev;
215	u32 new = old | _Q_SLEEPY_VAL;
216
217	BUG_ON(!(old & _Q_LOCKED_VAL));
218	BUG_ON(old & _Q_SLEEPY_VAL);
219
220	asm volatile(
221"1:	lwarx	%0,0,%1		# try_set_sleepy			\n"
222"	cmpw	0,%0,%2							\n"
223"	bne-	2f							\n"
224"	stwcx.	%3,0,%1							\n"
225"	bne-	1b							\n"
226"2:									\n"
227	: "=&r" (prev)
228	: "r" (&lock->val), "r"(old), "r" (new)
229	: "cr0", "memory");
230
231	return likely(prev == old);
232}
233
234static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
235{
236	if (pv_sleepy_lock) {
237		if (pv_sleepy_lock_interval_ns)
238			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
239		if (!(val & _Q_SLEEPY_VAL))
240			try_set_sleepy(lock, val);
241	}
242}
243
244static __always_inline void seen_sleepy_lock(void)
245{
246	if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
247		this_cpu_write(sleepy_lock_seen_clock, sched_clock());
248}
249
250static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val)
251{
252	if (pv_sleepy_lock) {
253		if (pv_sleepy_lock_interval_ns)
254			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
255		if (val & _Q_LOCKED_VAL) {
256			if (!(val & _Q_SLEEPY_VAL))
257				try_set_sleepy(lock, val);
258		}
259	}
260}
261
262static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
263{
264	int cpu = decode_tail_cpu(val);
265	struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu);
266	int idx;
267
268	/*
269	 * After publishing the new tail and finding a previous tail in the
270	 * previous val (which is the control dependency), this barrier
271	 * orders the release barrier in publish_tail_cpu performed by the
272	 * last CPU, with subsequently looking at its qnode structures
273	 * after the barrier.
274	 */
275	smp_acquire__after_ctrl_dep();
276
277	for (idx = 0; idx < MAX_NODES; idx++) {
278		struct qnode *qnode = &qnodesp->nodes[idx];
279		if (qnode->lock == lock)
280			return qnode;
281	}
282
283	BUG();
284}
285
286/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
287static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
288{
289	int owner;
290	u32 yield_count;
291	bool preempted = false;
292
293	BUG_ON(!(val & _Q_LOCKED_VAL));
294
295	if (!paravirt)
296		goto relax;
297
298	if (!pv_yield_owner)
299		goto relax;
300
301	owner = get_owner_cpu(val);
302	yield_count = yield_count_of(owner);
303
304	if ((yield_count & 1) == 0)
305		goto relax; /* owner vcpu is running */
306
307	spin_end();
308
309	seen_sleepy_owner(lock, val);
310	preempted = true;
311
312	/*
313	 * Read the lock word after sampling the yield count. On the other side
314	 * there may a wmb because the yield count update is done by the
315	 * hypervisor preemption and the value update by the OS, however this
316	 * ordering might reduce the chance of out of order accesses and
317	 * improve the heuristic.
318	 */
319	smp_rmb();
320
321	if (READ_ONCE(lock->val) == val) {
322		if (mustq)
323			clear_mustq(lock);
324		yield_to_preempted(owner, yield_count);
325		if (mustq)
326			set_mustq(lock);
327		spin_begin();
328
329		/* Don't relax if we yielded. Maybe we should? */
330		return preempted;
331	}
332	spin_begin();
333relax:
334	spin_cpu_relax();
335
336	return preempted;
337}
338
339/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
340static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
341{
342	return __yield_to_locked_owner(lock, val, paravirt, false);
343}
344
345/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
346static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
347{
348	bool mustq = false;
349
350	if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
351		mustq = true;
352
353	return __yield_to_locked_owner(lock, val, paravirt, mustq);
354}
355
356static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
357{
358	struct qnode *next;
359	int owner;
360
361	if (!paravirt)
362		return;
363	if (!pv_yield_propagate_owner)
364		return;
365
366	owner = get_owner_cpu(val);
367	if (*set_yield_cpu == owner)
368		return;
369
370	next = READ_ONCE(node->next);
371	if (!next)
372		return;
373
374	if (vcpu_is_preempted(owner)) {
375		next->yield_cpu = owner;
376		*set_yield_cpu = owner;
377	} else if (*set_yield_cpu != -1) {
378		next->yield_cpu = owner;
379		*set_yield_cpu = owner;
380	}
381}
382
383/* Called inside spin_begin() */
384static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
385{
386	int prev_cpu = decode_tail_cpu(val);
387	u32 yield_count;
388	int yield_cpu;
389	bool preempted = false;
390
391	if (!paravirt)
392		goto relax;
393
394	if (!pv_yield_propagate_owner)
395		goto yield_prev;
396
397	yield_cpu = READ_ONCE(node->yield_cpu);
398	if (yield_cpu == -1) {
399		/* Propagate back the -1 CPU */
400		if (node->next && node->next->yield_cpu != -1)
401			node->next->yield_cpu = yield_cpu;
402		goto yield_prev;
403	}
404
405	yield_count = yield_count_of(yield_cpu);
406	if ((yield_count & 1) == 0)
407		goto yield_prev; /* owner vcpu is running */
408
409	if (get_owner_cpu(READ_ONCE(lock->val)) != yield_cpu)
410		goto yield_prev; /* re-sample lock owner */
411
412	spin_end();
413
414	preempted = true;
415	seen_sleepy_node(lock, val);
416
417	smp_rmb();
418
419	if (yield_cpu == node->yield_cpu) {
420		if (node->next && node->next->yield_cpu != yield_cpu)
421			node->next->yield_cpu = yield_cpu;
422		yield_to_preempted(yield_cpu, yield_count);
423		spin_begin();
424		return preempted;
425	}
426	spin_begin();
427
428yield_prev:
429	if (!pv_yield_prev)
430		goto relax;
431
432	yield_count = yield_count_of(prev_cpu);
433	if ((yield_count & 1) == 0)
434		goto relax; /* owner vcpu is running */
435
436	spin_end();
437
438	preempted = true;
439	seen_sleepy_node(lock, val);
440
441	smp_rmb(); /* See __yield_to_locked_owner comment */
442
443	if (!READ_ONCE(node->locked)) {
444		yield_to_preempted(prev_cpu, yield_count);
445		spin_begin();
446		return preempted;
447	}
448	spin_begin();
449
450relax:
451	spin_cpu_relax();
452
453	return preempted;
454}
455
456static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
457{
458	if (iters >= get_steal_spins(paravirt, sleepy))
459		return true;
460
461	if (IS_ENABLED(CONFIG_NUMA) &&
462	    (iters >= get_remote_steal_spins(paravirt, sleepy))) {
463		int cpu = get_owner_cpu(val);
464		if (numa_node_id() != cpu_to_node(cpu))
465			return true;
466	}
467	return false;
468}
469
470static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
471{
472	bool seen_preempted = false;
473	bool sleepy = false;
474	int iters = 0;
475	u32 val;
476
477	if (!steal_spins) {
478		/* XXX: should spin_on_preempted_owner do anything here? */
479		return false;
480	}
481
482	/* Attempt to steal the lock */
483	spin_begin();
484	do {
485		bool preempted = false;
486
487		val = READ_ONCE(lock->val);
488		if (val & _Q_MUST_Q_VAL)
489			break;
490		spec_barrier();
491
492		if (unlikely(!(val & _Q_LOCKED_VAL))) {
493			spin_end();
494			if (__queued_spin_trylock_steal(lock))
495				return true;
496			spin_begin();
497		} else {
498			preempted = yield_to_locked_owner(lock, val, paravirt);
499		}
500
501		if (paravirt && pv_sleepy_lock) {
502			if (!sleepy) {
503				if (val & _Q_SLEEPY_VAL) {
504					seen_sleepy_lock();
505					sleepy = true;
506				} else if (recently_sleepy()) {
507					sleepy = true;
508				}
509			}
510			if (pv_sleepy_lock_sticky && seen_preempted &&
511			    !(val & _Q_SLEEPY_VAL)) {
512				if (try_set_sleepy(lock, val))
513					val |= _Q_SLEEPY_VAL;
514			}
515		}
516
517		if (preempted) {
518			seen_preempted = true;
519			sleepy = true;
520			if (!pv_spin_on_preempted_owner)
521				iters++;
522			/*
523			 * pv_spin_on_preempted_owner don't increase iters
524			 * while the owner is preempted -- we won't interfere
525			 * with it by definition. This could introduce some
526			 * latency issue if we continually observe preempted
527			 * owners, but hopefully that's a rare corner case of
528			 * a badly oversubscribed system.
529			 */
530		} else {
531			iters++;
532		}
533	} while (!steal_break(val, iters, paravirt, sleepy));
534
535	spin_end();
536
537	return false;
538}
539
540static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
541{
542	struct qnodes *qnodesp;
543	struct qnode *next, *node;
544	u32 val, old, tail;
545	bool seen_preempted = false;
546	bool sleepy = false;
547	bool mustq = false;
548	int idx;
549	int set_yield_cpu = -1;
550	int iters = 0;
551
552	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
553
554	qnodesp = this_cpu_ptr(&qnodes);
555	if (unlikely(qnodesp->count >= MAX_NODES)) {
556		spec_barrier();
557		while (!queued_spin_trylock(lock))
558			cpu_relax();
559		return;
560	}
561
562	idx = qnodesp->count++;
563	/*
564	 * Ensure that we increment the head node->count before initialising
565	 * the actual node. If the compiler is kind enough to reorder these
566	 * stores, then an IRQ could overwrite our assignments.
567	 */
568	barrier();
569	node = &qnodesp->nodes[idx];
570	node->next = NULL;
571	node->lock = lock;
572	node->cpu = smp_processor_id();
573	node->yield_cpu = -1;
574	node->locked = 0;
575
576	tail = encode_tail_cpu(node->cpu);
577
578	/*
579	 * Assign all attributes of a node before it can be published.
580	 * Issues an lwsync, serving as a release barrier, as well as a
581	 * compiler barrier.
582	 */
583	old = publish_tail_cpu(lock, tail);
584
585	/*
586	 * If there was a previous node; link it and wait until reaching the
587	 * head of the waitqueue.
588	 */
589	if (old & _Q_TAIL_CPU_MASK) {
590		struct qnode *prev = get_tail_qnode(lock, old);
591
592		/* Link @node into the waitqueue. */
593		WRITE_ONCE(prev->next, node);
594
595		/* Wait for mcs node lock to be released */
596		spin_begin();
597		while (!READ_ONCE(node->locked)) {
598			spec_barrier();
599
600			if (yield_to_prev(lock, node, old, paravirt))
601				seen_preempted = true;
602		}
603		spec_barrier();
604		spin_end();
605
606		/* Clear out stale propagated yield_cpu */
607		if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
608			node->yield_cpu = -1;
609
610		smp_rmb(); /* acquire barrier for the mcs lock */
611
612		/*
613		 * Generic qspinlocks have this prefetch here, but it seems
614		 * like it could cause additional line transitions because
615		 * the waiter will keep loading from it.
616		 */
617		if (_Q_SPIN_PREFETCH_NEXT) {
618			next = READ_ONCE(node->next);
619			if (next)
620				prefetchw(next);
621		}
622	}
623
624	/* We're at the head of the waitqueue, wait for the lock. */
625again:
626	spin_begin();
627	for (;;) {
628		bool preempted;
629
630		val = READ_ONCE(lock->val);
631		if (!(val & _Q_LOCKED_VAL))
632			break;
633		spec_barrier();
634
635		if (paravirt && pv_sleepy_lock && maybe_stealers) {
636			if (!sleepy) {
637				if (val & _Q_SLEEPY_VAL) {
638					seen_sleepy_lock();
639					sleepy = true;
640				} else if (recently_sleepy()) {
641					sleepy = true;
642				}
643			}
644			if (pv_sleepy_lock_sticky && seen_preempted &&
645			    !(val & _Q_SLEEPY_VAL)) {
646				if (try_set_sleepy(lock, val))
647					val |= _Q_SLEEPY_VAL;
648			}
649		}
650
651		propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
652		preempted = yield_head_to_locked_owner(lock, val, paravirt);
653		if (!maybe_stealers)
654			continue;
655
656		if (preempted)
657			seen_preempted = true;
658
659		if (paravirt && preempted) {
660			sleepy = true;
661
662			if (!pv_spin_on_preempted_owner)
663				iters++;
664		} else {
665			iters++;
666		}
667
668		if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
669			mustq = true;
670			set_mustq(lock);
671			val |= _Q_MUST_Q_VAL;
672		}
673	}
674	spec_barrier();
675	spin_end();
676
677	/* If we're the last queued, must clean up the tail. */
678	old = trylock_clean_tail(lock, tail);
679	if (unlikely(old & _Q_LOCKED_VAL)) {
680		BUG_ON(!maybe_stealers);
681		goto again; /* Can only be true if maybe_stealers. */
682	}
683
684	if ((old & _Q_TAIL_CPU_MASK) == tail)
685		goto release; /* We were the tail, no next. */
686
687	/* There is a next, must wait for node->next != NULL (MCS protocol) */
688	next = READ_ONCE(node->next);
689	if (!next) {
690		spin_begin();
691		while (!(next = READ_ONCE(node->next)))
692			cpu_relax();
693		spin_end();
694	}
695	spec_barrier();
696
697	/*
698	 * Unlock the next mcs waiter node. Release barrier is not required
699	 * here because the acquirer is only accessing the lock word, and
700	 * the acquire barrier we took the lock with orders that update vs
701	 * this store to locked. The corresponding barrier is the smp_rmb()
702	 * acquire barrier for mcs lock, above.
703	 */
704	if (paravirt && pv_prod_head) {
705		int next_cpu = next->cpu;
706		WRITE_ONCE(next->locked, 1);
707		if (_Q_SPIN_MISO)
708			asm volatile("miso" ::: "memory");
709		if (vcpu_is_preempted(next_cpu))
710			prod_cpu(next_cpu);
711	} else {
712		WRITE_ONCE(next->locked, 1);
713		if (_Q_SPIN_MISO)
714			asm volatile("miso" ::: "memory");
715	}
716
717release:
718	qnodesp->count--; /* release the node */
719}
720
721void queued_spin_lock_slowpath(struct qspinlock *lock)
722{
723	/*
724	 * This looks funny, but it induces the compiler to inline both
725	 * sides of the branch rather than share code as when the condition
726	 * is passed as the paravirt argument to the functions.
727	 */
728	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
729		if (try_to_steal_lock(lock, true)) {
730			spec_barrier();
731			return;
732		}
733		queued_spin_lock_mcs_queue(lock, true);
734	} else {
735		if (try_to_steal_lock(lock, false)) {
736			spec_barrier();
737			return;
738		}
739		queued_spin_lock_mcs_queue(lock, false);
740	}
741}
742EXPORT_SYMBOL(queued_spin_lock_slowpath);
743
744#ifdef CONFIG_PARAVIRT_SPINLOCKS
745void pv_spinlocks_init(void)
746{
747}
748#endif
749
750#include <linux/debugfs.h>
751static int steal_spins_set(void *data, u64 val)
752{
753#if _Q_SPIN_TRY_LOCK_STEAL == 1
754	/* MAYBE_STEAL remains true */
755	steal_spins = val;
756#else
757	static DEFINE_MUTEX(lock);
758
759	/*
760	 * The lock slow path has a !maybe_stealers case that can assume
761	 * the head of queue will not see concurrent waiters. That waiter
762	 * is unsafe in the presence of stealers, so must keep them away
763	 * from one another.
764	 */
765
766	mutex_lock(&lock);
767	if (val && !steal_spins) {
768		maybe_stealers = true;
769		/* wait for queue head waiter to go away */
770		synchronize_rcu();
771		steal_spins = val;
772	} else if (!val && steal_spins) {
773		steal_spins = val;
774		/* wait for all possible stealers to go away */
775		synchronize_rcu();
776		maybe_stealers = false;
777	} else {
778		steal_spins = val;
779	}
780	mutex_unlock(&lock);
781#endif
782
783	return 0;
784}
785
786static int steal_spins_get(void *data, u64 *val)
787{
788	*val = steal_spins;
789
790	return 0;
791}
792
793DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
794
795static int remote_steal_spins_set(void *data, u64 val)
796{
797	remote_steal_spins = val;
798
799	return 0;
800}
801
802static int remote_steal_spins_get(void *data, u64 *val)
803{
804	*val = remote_steal_spins;
805
806	return 0;
807}
808
809DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
810
811static int head_spins_set(void *data, u64 val)
812{
813	head_spins = val;
814
815	return 0;
816}
817
818static int head_spins_get(void *data, u64 *val)
819{
820	*val = head_spins;
821
822	return 0;
823}
824
825DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
826
827static int pv_yield_owner_set(void *data, u64 val)
828{
829	pv_yield_owner = !!val;
830
831	return 0;
832}
833
834static int pv_yield_owner_get(void *data, u64 *val)
835{
836	*val = pv_yield_owner;
837
838	return 0;
839}
840
841DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
842
843static int pv_yield_allow_steal_set(void *data, u64 val)
844{
845	pv_yield_allow_steal = !!val;
846
847	return 0;
848}
849
850static int pv_yield_allow_steal_get(void *data, u64 *val)
851{
852	*val = pv_yield_allow_steal;
853
854	return 0;
855}
856
857DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
858
859static int pv_spin_on_preempted_owner_set(void *data, u64 val)
860{
861	pv_spin_on_preempted_owner = !!val;
862
863	return 0;
864}
865
866static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
867{
868	*val = pv_spin_on_preempted_owner;
869
870	return 0;
871}
872
873DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
874
875static int pv_sleepy_lock_set(void *data, u64 val)
876{
877	pv_sleepy_lock = !!val;
878
879	return 0;
880}
881
882static int pv_sleepy_lock_get(void *data, u64 *val)
883{
884	*val = pv_sleepy_lock;
885
886	return 0;
887}
888
889DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
890
891static int pv_sleepy_lock_sticky_set(void *data, u64 val)
892{
893	pv_sleepy_lock_sticky = !!val;
894
895	return 0;
896}
897
898static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
899{
900	*val = pv_sleepy_lock_sticky;
901
902	return 0;
903}
904
905DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
906
907static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
908{
909	pv_sleepy_lock_interval_ns = val;
910
911	return 0;
912}
913
914static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
915{
916	*val = pv_sleepy_lock_interval_ns;
917
918	return 0;
919}
920
921DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
922
923static int pv_sleepy_lock_factor_set(void *data, u64 val)
924{
925	pv_sleepy_lock_factor = val;
926
927	return 0;
928}
929
930static int pv_sleepy_lock_factor_get(void *data, u64 *val)
931{
932	*val = pv_sleepy_lock_factor;
933
934	return 0;
935}
936
937DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
938
939static int pv_yield_prev_set(void *data, u64 val)
940{
941	pv_yield_prev = !!val;
942
943	return 0;
944}
945
946static int pv_yield_prev_get(void *data, u64 *val)
947{
948	*val = pv_yield_prev;
949
950	return 0;
951}
952
953DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
954
955static int pv_yield_propagate_owner_set(void *data, u64 val)
956{
957	pv_yield_propagate_owner = !!val;
958
959	return 0;
960}
961
962static int pv_yield_propagate_owner_get(void *data, u64 *val)
963{
964	*val = pv_yield_propagate_owner;
965
966	return 0;
967}
968
969DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
970
971static int pv_prod_head_set(void *data, u64 val)
972{
973	pv_prod_head = !!val;
974
975	return 0;
976}
977
978static int pv_prod_head_get(void *data, u64 *val)
979{
980	*val = pv_prod_head;
981
982	return 0;
983}
984
985DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
986
987static __init int spinlock_debugfs_init(void)
988{
989	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
990	debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
991	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
992	if (is_shared_processor()) {
993		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
994		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
995		debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
996		debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
997		debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
998		debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
999		debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
1000		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
1001		debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
1002		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
1003	}
1004
1005	return 0;
1006}
1007device_initcall(spinlock_debugfs_init);
1008