1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Xen event channels
4 *
5 * Xen models interrupts with abstract event channels.  Because each
6 * domain gets 1024 event channels, but NR_IRQ is not that large, we
7 * must dynamically map irqs<->event channels.  The event channels
8 * interface with the rest of the kernel by defining a xen interrupt
9 * chip.  When an event is received, it is mapped to an irq and sent
10 * through the normal interrupt processing path.
11 *
12 * There are four kinds of events which can be mapped to an event
13 * channel:
14 *
15 * 1. Inter-domain notifications.  This includes all the virtual
16 *    device events, since they're driven by front-ends in another domain
17 *    (typically dom0).
18 * 2. VIRQs, typically used for timers.  These are per-cpu events.
19 * 3. IPIs.
20 * 4. PIRQs - Hardware interrupts.
21 *
22 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
23 */
24
25#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
26
27#include <linux/linkage.h>
28#include <linux/interrupt.h>
29#include <linux/irq.h>
30#include <linux/moduleparam.h>
31#include <linux/string.h>
32#include <linux/memblock.h>
33#include <linux/slab.h>
34#include <linux/irqnr.h>
35#include <linux/pci.h>
36#include <linux/rcupdate.h>
37#include <linux/spinlock.h>
38#include <linux/cpuhotplug.h>
39#include <linux/atomic.h>
40#include <linux/ktime.h>
41
42#ifdef CONFIG_X86
43#include <asm/desc.h>
44#include <asm/ptrace.h>
45#include <asm/idtentry.h>
46#include <asm/irq.h>
47#include <asm/io_apic.h>
48#include <asm/i8259.h>
49#include <asm/xen/cpuid.h>
50#include <asm/xen/pci.h>
51#endif
52#include <asm/sync_bitops.h>
53#include <asm/xen/hypercall.h>
54#include <asm/xen/hypervisor.h>
55#include <xen/page.h>
56
57#include <xen/xen.h>
58#include <xen/hvm.h>
59#include <xen/xen-ops.h>
60#include <xen/events.h>
61#include <xen/interface/xen.h>
62#include <xen/interface/event_channel.h>
63#include <xen/interface/hvm/hvm_op.h>
64#include <xen/interface/hvm/params.h>
65#include <xen/interface/physdev.h>
66#include <xen/interface/sched.h>
67#include <xen/interface/vcpu.h>
68#include <xen/xenbus.h>
69#include <asm/hw_irq.h>
70
71#include "events_internal.h"
72
73#undef MODULE_PARAM_PREFIX
74#define MODULE_PARAM_PREFIX "xen."
75
76/* Interrupt types. */
77enum xen_irq_type {
78	IRQT_UNBOUND = 0,
79	IRQT_PIRQ,
80	IRQT_VIRQ,
81	IRQT_IPI,
82	IRQT_EVTCHN
83};
84
85/*
86 * Packed IRQ information:
87 * type - enum xen_irq_type
88 * event channel - irq->event channel mapping
89 * cpu - cpu this event channel is bound to
90 * index - type-specific information:
91 *    PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
92 *           guest, or GSI (real passthrough IRQ) of the device.
93 *    VIRQ - virq number
94 *    IPI - IPI vector
95 *    EVTCHN -
96 */
97struct irq_info {
98	struct list_head list;
99	struct list_head eoi_list;
100	struct rcu_work rwork;
101	short refcnt;
102	u8 spurious_cnt;
103	u8 is_accounted;
104	short type;		/* type: IRQT_* */
105	u8 mask_reason;		/* Why is event channel masked */
106#define EVT_MASK_REASON_EXPLICIT	0x01
107#define EVT_MASK_REASON_TEMPORARY	0x02
108#define EVT_MASK_REASON_EOI_PENDING	0x04
109	u8 is_active;		/* Is event just being handled? */
110	unsigned irq;
111	evtchn_port_t evtchn;   /* event channel */
112	unsigned short cpu;     /* cpu bound */
113	unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
114	unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
115	u64 eoi_time;           /* Time in jiffies when to EOI. */
116	raw_spinlock_t lock;
117	bool is_static;           /* Is event channel static */
118
119	union {
120		unsigned short virq;
121		enum ipi_vector ipi;
122		struct {
123			unsigned short pirq;
124			unsigned short gsi;
125			unsigned char vector;
126			unsigned char flags;
127			uint16_t domid;
128		} pirq;
129		struct xenbus_device *interdomain;
130	} u;
131};
132
133#define PIRQ_NEEDS_EOI	(1 << 0)
134#define PIRQ_SHAREABLE	(1 << 1)
135#define PIRQ_MSI_GROUP	(1 << 2)
136
137static uint __read_mostly event_loop_timeout = 2;
138module_param(event_loop_timeout, uint, 0644);
139
140static uint __read_mostly event_eoi_delay = 10;
141module_param(event_eoi_delay, uint, 0644);
142
143const struct evtchn_ops *evtchn_ops;
144
145/*
146 * This lock protects updates to the following mapping and reference-count
147 * arrays. The lock does not need to be acquired to read the mapping tables.
148 */
149static DEFINE_MUTEX(irq_mapping_update_lock);
150
151/*
152 * Lock hierarchy:
153 *
154 * irq_mapping_update_lock
155 *   IRQ-desc lock
156 *     percpu eoi_list_lock
157 *       irq_info->lock
158 */
159
160static LIST_HEAD(xen_irq_list_head);
161
162/* IRQ <-> VIRQ mapping. */
163static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
164
165/* IRQ <-> IPI mapping */
166static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
167/* Cache for IPI event channels - needed for hot cpu unplug (avoid RCU usage). */
168static DEFINE_PER_CPU(evtchn_port_t [XEN_NR_IPIS], ipi_to_evtchn) = {[0 ... XEN_NR_IPIS-1] = 0};
169
170/* Event channel distribution data */
171static atomic_t channels_on_cpu[NR_CPUS];
172
173static int **evtchn_to_irq;
174#ifdef CONFIG_X86
175static unsigned long *pirq_eoi_map;
176#endif
177static bool (*pirq_needs_eoi)(unsigned irq);
178
179#define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
180#define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
181#define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
182
183/* Xen will never allocate port zero for any purpose. */
184#define VALID_EVTCHN(chn)	((chn) != 0)
185
186static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
187
188static struct irq_chip xen_dynamic_chip;
189static struct irq_chip xen_lateeoi_chip;
190static struct irq_chip xen_percpu_chip;
191static struct irq_chip xen_pirq_chip;
192static void enable_dynirq(struct irq_data *data);
193static void disable_dynirq(struct irq_data *data);
194
195static DEFINE_PER_CPU(unsigned int, irq_epoch);
196
197static void clear_evtchn_to_irq_row(int *evtchn_row)
198{
199	unsigned col;
200
201	for (col = 0; col < EVTCHN_PER_ROW; col++)
202		WRITE_ONCE(evtchn_row[col], -1);
203}
204
205static void clear_evtchn_to_irq_all(void)
206{
207	unsigned row;
208
209	for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
210		if (evtchn_to_irq[row] == NULL)
211			continue;
212		clear_evtchn_to_irq_row(evtchn_to_irq[row]);
213	}
214}
215
216static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
217{
218	unsigned row;
219	unsigned col;
220	int *evtchn_row;
221
222	if (evtchn >= xen_evtchn_max_channels())
223		return -EINVAL;
224
225	row = EVTCHN_ROW(evtchn);
226	col = EVTCHN_COL(evtchn);
227
228	if (evtchn_to_irq[row] == NULL) {
229		/* Unallocated irq entries return -1 anyway */
230		if (irq == -1)
231			return 0;
232
233		evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0);
234		if (evtchn_row == NULL)
235			return -ENOMEM;
236
237		clear_evtchn_to_irq_row(evtchn_row);
238
239		/*
240		 * We've prepared an empty row for the mapping. If a different
241		 * thread was faster inserting it, we can drop ours.
242		 */
243		if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL)
244			free_page((unsigned long) evtchn_row);
245	}
246
247	WRITE_ONCE(evtchn_to_irq[row][col], irq);
248	return 0;
249}
250
251/* Get info for IRQ */
252static struct irq_info *info_for_irq(unsigned irq)
253{
254	if (irq < nr_legacy_irqs())
255		return legacy_info_ptrs[irq];
256	else
257		return irq_get_chip_data(irq);
258}
259
260static void set_info_for_irq(unsigned int irq, struct irq_info *info)
261{
262	if (irq < nr_legacy_irqs())
263		legacy_info_ptrs[irq] = info;
264	else
265		irq_set_chip_data(irq, info);
266}
267
268static struct irq_info *evtchn_to_info(evtchn_port_t evtchn)
269{
270	int irq;
271
272	if (evtchn >= xen_evtchn_max_channels())
273		return NULL;
274	if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
275		return NULL;
276	irq = READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
277
278	return (irq < 0) ? NULL : info_for_irq(irq);
279}
280
281/* Per CPU channel accounting */
282static void channels_on_cpu_dec(struct irq_info *info)
283{
284	if (!info->is_accounted)
285		return;
286
287	info->is_accounted = 0;
288
289	if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
290		return;
291
292	WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
293}
294
295static void channels_on_cpu_inc(struct irq_info *info)
296{
297	if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
298		return;
299
300	if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
301					    INT_MAX)))
302		return;
303
304	info->is_accounted = 1;
305}
306
307static void xen_irq_free_desc(unsigned int irq)
308{
309	/* Legacy IRQ descriptors are managed by the arch. */
310	if (irq >= nr_legacy_irqs())
311		irq_free_desc(irq);
312}
313
314static void delayed_free_irq(struct work_struct *work)
315{
316	struct irq_info *info = container_of(to_rcu_work(work), struct irq_info,
317					     rwork);
318	unsigned int irq = info->irq;
319
320	/* Remove the info pointer only now, with no potential users left. */
321	set_info_for_irq(irq, NULL);
322
323	kfree(info);
324
325	xen_irq_free_desc(irq);
326}
327
328/* Constructors for packed IRQ information. */
329static int xen_irq_info_common_setup(struct irq_info *info,
330				     enum xen_irq_type type,
331				     evtchn_port_t evtchn,
332				     unsigned short cpu)
333{
334	int ret;
335
336	BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
337
338	info->type = type;
339	info->evtchn = evtchn;
340	info->cpu = cpu;
341	info->mask_reason = EVT_MASK_REASON_EXPLICIT;
342	raw_spin_lock_init(&info->lock);
343
344	ret = set_evtchn_to_irq(evtchn, info->irq);
345	if (ret < 0)
346		return ret;
347
348	irq_clear_status_flags(info->irq, IRQ_NOREQUEST | IRQ_NOAUTOEN);
349
350	return xen_evtchn_port_setup(evtchn);
351}
352
353static int xen_irq_info_evtchn_setup(struct irq_info *info,
354				     evtchn_port_t evtchn,
355				     struct xenbus_device *dev)
356{
357	int ret;
358
359	ret = xen_irq_info_common_setup(info, IRQT_EVTCHN, evtchn, 0);
360	info->u.interdomain = dev;
361	if (dev)
362		atomic_inc(&dev->event_channels);
363
364	return ret;
365}
366
367static int xen_irq_info_ipi_setup(struct irq_info *info, unsigned int cpu,
368				  evtchn_port_t evtchn, enum ipi_vector ipi)
369{
370	info->u.ipi = ipi;
371
372	per_cpu(ipi_to_irq, cpu)[ipi] = info->irq;
373	per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn;
374
375	return xen_irq_info_common_setup(info, IRQT_IPI, evtchn, 0);
376}
377
378static int xen_irq_info_virq_setup(struct irq_info *info, unsigned int cpu,
379				   evtchn_port_t evtchn, unsigned int virq)
380{
381	info->u.virq = virq;
382
383	per_cpu(virq_to_irq, cpu)[virq] = info->irq;
384
385	return xen_irq_info_common_setup(info, IRQT_VIRQ, evtchn, 0);
386}
387
388static int xen_irq_info_pirq_setup(struct irq_info *info, evtchn_port_t evtchn,
389				   unsigned int pirq, unsigned int gsi,
390				   uint16_t domid, unsigned char flags)
391{
392	info->u.pirq.pirq = pirq;
393	info->u.pirq.gsi = gsi;
394	info->u.pirq.domid = domid;
395	info->u.pirq.flags = flags;
396
397	return xen_irq_info_common_setup(info, IRQT_PIRQ, evtchn, 0);
398}
399
400static void xen_irq_info_cleanup(struct irq_info *info)
401{
402	set_evtchn_to_irq(info->evtchn, -1);
403	xen_evtchn_port_remove(info->evtchn, info->cpu);
404	info->evtchn = 0;
405	channels_on_cpu_dec(info);
406}
407
408/*
409 * Accessors for packed IRQ information.
410 */
411static evtchn_port_t evtchn_from_irq(unsigned int irq)
412{
413	const struct irq_info *info = NULL;
414
415	if (likely(irq < nr_irqs))
416		info = info_for_irq(irq);
417	if (!info)
418		return 0;
419
420	return info->evtchn;
421}
422
423unsigned int irq_from_evtchn(evtchn_port_t evtchn)
424{
425	struct irq_info *info = evtchn_to_info(evtchn);
426
427	return info ? info->irq : -1;
428}
429EXPORT_SYMBOL_GPL(irq_from_evtchn);
430
431int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
432			 evtchn_port_t *evtchn)
433{
434	int irq = per_cpu(virq_to_irq, cpu)[virq];
435
436	*evtchn = evtchn_from_irq(irq);
437
438	return irq;
439}
440
441static enum ipi_vector ipi_from_irq(struct irq_info *info)
442{
443	BUG_ON(info == NULL);
444	BUG_ON(info->type != IRQT_IPI);
445
446	return info->u.ipi;
447}
448
449static unsigned int virq_from_irq(struct irq_info *info)
450{
451	BUG_ON(info == NULL);
452	BUG_ON(info->type != IRQT_VIRQ);
453
454	return info->u.virq;
455}
456
457static unsigned pirq_from_irq(unsigned irq)
458{
459	struct irq_info *info = info_for_irq(irq);
460
461	BUG_ON(info == NULL);
462	BUG_ON(info->type != IRQT_PIRQ);
463
464	return info->u.pirq.pirq;
465}
466
467unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
468{
469	struct irq_info *info = evtchn_to_info(evtchn);
470
471	return info ? info->cpu : 0;
472}
473
474static void do_mask(struct irq_info *info, u8 reason)
475{
476	unsigned long flags;
477
478	raw_spin_lock_irqsave(&info->lock, flags);
479
480	if (!info->mask_reason)
481		mask_evtchn(info->evtchn);
482
483	info->mask_reason |= reason;
484
485	raw_spin_unlock_irqrestore(&info->lock, flags);
486}
487
488static void do_unmask(struct irq_info *info, u8 reason)
489{
490	unsigned long flags;
491
492	raw_spin_lock_irqsave(&info->lock, flags);
493
494	info->mask_reason &= ~reason;
495
496	if (!info->mask_reason)
497		unmask_evtchn(info->evtchn);
498
499	raw_spin_unlock_irqrestore(&info->lock, flags);
500}
501
502#ifdef CONFIG_X86
503static bool pirq_check_eoi_map(unsigned irq)
504{
505	return test_bit(pirq_from_irq(irq), pirq_eoi_map);
506}
507#endif
508
509static bool pirq_needs_eoi_flag(unsigned irq)
510{
511	struct irq_info *info = info_for_irq(irq);
512	BUG_ON(info->type != IRQT_PIRQ);
513
514	return info->u.pirq.flags & PIRQ_NEEDS_EOI;
515}
516
517static void bind_evtchn_to_cpu(struct irq_info *info, unsigned int cpu,
518			       bool force_affinity)
519{
520	if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
521		struct irq_data *data = irq_get_irq_data(info->irq);
522
523		irq_data_update_affinity(data, cpumask_of(cpu));
524		irq_data_update_effective_affinity(data, cpumask_of(cpu));
525	}
526
527	xen_evtchn_port_bind_to_cpu(info->evtchn, cpu, info->cpu);
528
529	channels_on_cpu_dec(info);
530	info->cpu = cpu;
531	channels_on_cpu_inc(info);
532}
533
534/**
535 * notify_remote_via_irq - send event to remote end of event channel via irq
536 * @irq: irq of event channel to send event to
537 *
538 * Unlike notify_remote_via_evtchn(), this is safe to use across
539 * save/restore. Notifications on a broken connection are silently
540 * dropped.
541 */
542void notify_remote_via_irq(int irq)
543{
544	evtchn_port_t evtchn = evtchn_from_irq(irq);
545
546	if (VALID_EVTCHN(evtchn))
547		notify_remote_via_evtchn(evtchn);
548}
549EXPORT_SYMBOL_GPL(notify_remote_via_irq);
550
551struct lateeoi_work {
552	struct delayed_work delayed;
553	spinlock_t eoi_list_lock;
554	struct list_head eoi_list;
555};
556
557static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
558
559static void lateeoi_list_del(struct irq_info *info)
560{
561	struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
562	unsigned long flags;
563
564	spin_lock_irqsave(&eoi->eoi_list_lock, flags);
565	list_del_init(&info->eoi_list);
566	spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
567}
568
569static void lateeoi_list_add(struct irq_info *info)
570{
571	struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
572	struct irq_info *elem;
573	u64 now = get_jiffies_64();
574	unsigned long delay;
575	unsigned long flags;
576
577	if (now < info->eoi_time)
578		delay = info->eoi_time - now;
579	else
580		delay = 1;
581
582	spin_lock_irqsave(&eoi->eoi_list_lock, flags);
583
584	elem = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
585					eoi_list);
586	if (!elem || info->eoi_time < elem->eoi_time) {
587		list_add(&info->eoi_list, &eoi->eoi_list);
588		mod_delayed_work_on(info->eoi_cpu, system_wq,
589				    &eoi->delayed, delay);
590	} else {
591		list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
592			if (elem->eoi_time <= info->eoi_time)
593				break;
594		}
595		list_add(&info->eoi_list, &elem->eoi_list);
596	}
597
598	spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
599}
600
601static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
602{
603	evtchn_port_t evtchn;
604	unsigned int cpu;
605	unsigned int delay = 0;
606
607	evtchn = info->evtchn;
608	if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
609		return;
610
611	if (spurious) {
612		struct xenbus_device *dev = info->u.interdomain;
613		unsigned int threshold = 1;
614
615		if (dev && dev->spurious_threshold)
616			threshold = dev->spurious_threshold;
617
618		if ((1 << info->spurious_cnt) < (HZ << 2)) {
619			if (info->spurious_cnt != 0xFF)
620				info->spurious_cnt++;
621		}
622		if (info->spurious_cnt > threshold) {
623			delay = 1 << (info->spurious_cnt - 1 - threshold);
624			if (delay > HZ)
625				delay = HZ;
626			if (!info->eoi_time)
627				info->eoi_cpu = smp_processor_id();
628			info->eoi_time = get_jiffies_64() + delay;
629			if (dev)
630				atomic_add(delay, &dev->jiffies_eoi_delayed);
631		}
632		if (dev)
633			atomic_inc(&dev->spurious_events);
634	} else {
635		info->spurious_cnt = 0;
636	}
637
638	cpu = info->eoi_cpu;
639	if (info->eoi_time &&
640	    (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
641		lateeoi_list_add(info);
642		return;
643	}
644
645	info->eoi_time = 0;
646
647	/* is_active hasn't been reset yet, do it now. */
648	smp_store_release(&info->is_active, 0);
649	do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
650}
651
652static void xen_irq_lateeoi_worker(struct work_struct *work)
653{
654	struct lateeoi_work *eoi;
655	struct irq_info *info;
656	u64 now = get_jiffies_64();
657	unsigned long flags;
658
659	eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
660
661	rcu_read_lock();
662
663	while (true) {
664		spin_lock_irqsave(&eoi->eoi_list_lock, flags);
665
666		info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
667						eoi_list);
668
669		if (info == NULL)
670			break;
671
672		if (now < info->eoi_time) {
673			mod_delayed_work_on(info->eoi_cpu, system_wq,
674					    &eoi->delayed,
675					    info->eoi_time - now);
676			break;
677		}
678
679		list_del_init(&info->eoi_list);
680
681		spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
682
683		info->eoi_time = 0;
684
685		xen_irq_lateeoi_locked(info, false);
686	}
687
688	spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
689
690	rcu_read_unlock();
691}
692
693static void xen_cpu_init_eoi(unsigned int cpu)
694{
695	struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
696
697	INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
698	spin_lock_init(&eoi->eoi_list_lock);
699	INIT_LIST_HEAD(&eoi->eoi_list);
700}
701
702void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
703{
704	struct irq_info *info;
705
706	rcu_read_lock();
707
708	info = info_for_irq(irq);
709
710	if (info)
711		xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
712
713	rcu_read_unlock();
714}
715EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
716
717static struct irq_info *xen_irq_init(unsigned int irq)
718{
719	struct irq_info *info;
720
721	info = kzalloc(sizeof(*info), GFP_KERNEL);
722	if (info) {
723		info->irq = irq;
724		info->type = IRQT_UNBOUND;
725		info->refcnt = -1;
726		INIT_RCU_WORK(&info->rwork, delayed_free_irq);
727
728		set_info_for_irq(irq, info);
729		/*
730		 * Interrupt affinity setting can be immediate. No point
731		 * in delaying it until an interrupt is handled.
732		 */
733		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
734
735		INIT_LIST_HEAD(&info->eoi_list);
736		list_add_tail(&info->list, &xen_irq_list_head);
737	}
738
739	return info;
740}
741
742static struct irq_info *xen_allocate_irq_dynamic(void)
743{
744	int irq = irq_alloc_desc_from(0, -1);
745	struct irq_info *info = NULL;
746
747	if (irq >= 0) {
748		info = xen_irq_init(irq);
749		if (!info)
750			xen_irq_free_desc(irq);
751	}
752
753	return info;
754}
755
756static struct irq_info *xen_allocate_irq_gsi(unsigned int gsi)
757{
758	int irq;
759	struct irq_info *info;
760
761	/*
762	 * A PV guest has no concept of a GSI (since it has no ACPI
763	 * nor access to/knowledge of the physical APICs). Therefore
764	 * all IRQs are dynamically allocated from the entire IRQ
765	 * space.
766	 */
767	if (xen_pv_domain() && !xen_initial_domain())
768		return xen_allocate_irq_dynamic();
769
770	/* Legacy IRQ descriptors are already allocated by the arch. */
771	if (gsi < nr_legacy_irqs())
772		irq = gsi;
773	else
774		irq = irq_alloc_desc_at(gsi, -1);
775
776	info = xen_irq_init(irq);
777	if (!info)
778		xen_irq_free_desc(irq);
779
780	return info;
781}
782
783static void xen_free_irq(struct irq_info *info)
784{
785	if (WARN_ON(!info))
786		return;
787
788	if (!list_empty(&info->eoi_list))
789		lateeoi_list_del(info);
790
791	list_del(&info->list);
792
793	WARN_ON(info->refcnt > 0);
794
795	queue_rcu_work(system_wq, &info->rwork);
796}
797
798/* Not called for lateeoi events. */
799static void event_handler_exit(struct irq_info *info)
800{
801	smp_store_release(&info->is_active, 0);
802	clear_evtchn(info->evtchn);
803}
804
805static void pirq_query_unmask(int irq)
806{
807	struct physdev_irq_status_query irq_status;
808	struct irq_info *info = info_for_irq(irq);
809
810	BUG_ON(info->type != IRQT_PIRQ);
811
812	irq_status.irq = pirq_from_irq(irq);
813	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
814		irq_status.flags = 0;
815
816	info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
817	if (irq_status.flags & XENIRQSTAT_needs_eoi)
818		info->u.pirq.flags |= PIRQ_NEEDS_EOI;
819}
820
821static void eoi_pirq(struct irq_data *data)
822{
823	struct irq_info *info = info_for_irq(data->irq);
824	evtchn_port_t evtchn = info ? info->evtchn : 0;
825	struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
826	int rc = 0;
827
828	if (!VALID_EVTCHN(evtchn))
829		return;
830
831	event_handler_exit(info);
832
833	if (pirq_needs_eoi(data->irq)) {
834		rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
835		WARN_ON(rc);
836	}
837}
838
839static void mask_ack_pirq(struct irq_data *data)
840{
841	disable_dynirq(data);
842	eoi_pirq(data);
843}
844
845static unsigned int __startup_pirq(unsigned int irq)
846{
847	struct evtchn_bind_pirq bind_pirq;
848	struct irq_info *info = info_for_irq(irq);
849	evtchn_port_t evtchn = evtchn_from_irq(irq);
850	int rc;
851
852	BUG_ON(info->type != IRQT_PIRQ);
853
854	if (VALID_EVTCHN(evtchn))
855		goto out;
856
857	bind_pirq.pirq = pirq_from_irq(irq);
858	/* NB. We are happy to share unless we are probing. */
859	bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
860					BIND_PIRQ__WILL_SHARE : 0;
861	rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
862	if (rc != 0) {
863		pr_warn("Failed to obtain physical IRQ %d\n", irq);
864		return 0;
865	}
866	evtchn = bind_pirq.port;
867
868	pirq_query_unmask(irq);
869
870	rc = set_evtchn_to_irq(evtchn, irq);
871	if (rc)
872		goto err;
873
874	info->evtchn = evtchn;
875	bind_evtchn_to_cpu(info, 0, false);
876
877	rc = xen_evtchn_port_setup(evtchn);
878	if (rc)
879		goto err;
880
881out:
882	do_unmask(info, EVT_MASK_REASON_EXPLICIT);
883
884	eoi_pirq(irq_get_irq_data(irq));
885
886	return 0;
887
888err:
889	pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
890	xen_evtchn_close(evtchn);
891	return 0;
892}
893
894static unsigned int startup_pirq(struct irq_data *data)
895{
896	return __startup_pirq(data->irq);
897}
898
899static void shutdown_pirq(struct irq_data *data)
900{
901	unsigned int irq = data->irq;
902	struct irq_info *info = info_for_irq(irq);
903	evtchn_port_t evtchn = evtchn_from_irq(irq);
904
905	BUG_ON(info->type != IRQT_PIRQ);
906
907	if (!VALID_EVTCHN(evtchn))
908		return;
909
910	do_mask(info, EVT_MASK_REASON_EXPLICIT);
911	xen_irq_info_cleanup(info);
912	xen_evtchn_close(evtchn);
913}
914
915static void enable_pirq(struct irq_data *data)
916{
917	enable_dynirq(data);
918}
919
920static void disable_pirq(struct irq_data *data)
921{
922	disable_dynirq(data);
923}
924
925int xen_irq_from_gsi(unsigned gsi)
926{
927	struct irq_info *info;
928
929	list_for_each_entry(info, &xen_irq_list_head, list) {
930		if (info->type != IRQT_PIRQ)
931			continue;
932
933		if (info->u.pirq.gsi == gsi)
934			return info->irq;
935	}
936
937	return -1;
938}
939EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
940
941static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
942{
943	evtchn_port_t evtchn;
944	bool close_evtchn = false;
945
946	if (!info) {
947		xen_irq_free_desc(irq);
948		return;
949	}
950
951	if (info->refcnt > 0) {
952		info->refcnt--;
953		if (info->refcnt != 0)
954			return;
955	}
956
957	evtchn = info->evtchn;
958
959	if (VALID_EVTCHN(evtchn)) {
960		unsigned int cpu = info->cpu;
961		struct xenbus_device *dev;
962
963		if (!info->is_static)
964			close_evtchn = true;
965
966		switch (info->type) {
967		case IRQT_VIRQ:
968			per_cpu(virq_to_irq, cpu)[virq_from_irq(info)] = -1;
969			break;
970		case IRQT_IPI:
971			per_cpu(ipi_to_irq, cpu)[ipi_from_irq(info)] = -1;
972			per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(info)] = 0;
973			break;
974		case IRQT_EVTCHN:
975			dev = info->u.interdomain;
976			if (dev)
977				atomic_dec(&dev->event_channels);
978			break;
979		default:
980			break;
981		}
982
983		xen_irq_info_cleanup(info);
984
985		if (close_evtchn)
986			xen_evtchn_close(evtchn);
987	}
988
989	xen_free_irq(info);
990}
991
992/*
993 * Do not make any assumptions regarding the relationship between the
994 * IRQ number returned here and the Xen pirq argument.
995 *
996 * Note: We don't assign an event channel until the irq actually started
997 * up.  Return an existing irq if we've already got one for the gsi.
998 *
999 * Shareable implies level triggered, not shareable implies edge
1000 * triggered here.
1001 */
1002int xen_bind_pirq_gsi_to_irq(unsigned gsi,
1003			     unsigned pirq, int shareable, char *name)
1004{
1005	struct irq_info *info;
1006	struct physdev_irq irq_op;
1007	int ret;
1008
1009	mutex_lock(&irq_mapping_update_lock);
1010
1011	ret = xen_irq_from_gsi(gsi);
1012	if (ret != -1) {
1013		pr_info("%s: returning irq %d for gsi %u\n",
1014			__func__, ret, gsi);
1015		goto out;
1016	}
1017
1018	info = xen_allocate_irq_gsi(gsi);
1019	if (!info)
1020		goto out;
1021
1022	irq_op.irq = info->irq;
1023	irq_op.vector = 0;
1024
1025	/* Only the privileged domain can do this. For non-priv, the pcifront
1026	 * driver provides a PCI bus that does the call to do exactly
1027	 * this in the priv domain. */
1028	if (xen_initial_domain() &&
1029	    HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
1030		xen_free_irq(info);
1031		ret = -ENOSPC;
1032		goto out;
1033	}
1034
1035	ret = xen_irq_info_pirq_setup(info, 0, pirq, gsi, DOMID_SELF,
1036			       shareable ? PIRQ_SHAREABLE : 0);
1037	if (ret < 0) {
1038		__unbind_from_irq(info, info->irq);
1039		goto out;
1040	}
1041
1042	pirq_query_unmask(info->irq);
1043	/* We try to use the handler with the appropriate semantic for the
1044	 * type of interrupt: if the interrupt is an edge triggered
1045	 * interrupt we use handle_edge_irq.
1046	 *
1047	 * On the other hand if the interrupt is level triggered we use
1048	 * handle_fasteoi_irq like the native code does for this kind of
1049	 * interrupts.
1050	 *
1051	 * Depending on the Xen version, pirq_needs_eoi might return true
1052	 * not only for level triggered interrupts but for edge triggered
1053	 * interrupts too. In any case Xen always honors the eoi mechanism,
1054	 * not injecting any more pirqs of the same kind if the first one
1055	 * hasn't received an eoi yet. Therefore using the fasteoi handler
1056	 * is the right choice either way.
1057	 */
1058	if (shareable)
1059		irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
1060				handle_fasteoi_irq, name);
1061	else
1062		irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
1063				handle_edge_irq, name);
1064
1065	ret = info->irq;
1066
1067out:
1068	mutex_unlock(&irq_mapping_update_lock);
1069
1070	return ret;
1071}
1072
1073#ifdef CONFIG_PCI_MSI
1074int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
1075{
1076	int rc;
1077	struct physdev_get_free_pirq op_get_free_pirq;
1078
1079	op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
1080	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
1081
1082	WARN_ONCE(rc == -ENOSYS,
1083		  "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
1084
1085	return rc ? -1 : op_get_free_pirq.pirq;
1086}
1087
1088int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
1089			     int pirq, int nvec, const char *name, domid_t domid)
1090{
1091	int i, irq, ret;
1092	struct irq_info *info;
1093
1094	mutex_lock(&irq_mapping_update_lock);
1095
1096	irq = irq_alloc_descs(-1, 0, nvec, -1);
1097	if (irq < 0)
1098		goto out;
1099
1100	for (i = 0; i < nvec; i++) {
1101		info = xen_irq_init(irq + i);
1102		if (!info) {
1103			ret = -ENOMEM;
1104			goto error_irq;
1105		}
1106
1107		irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
1108
1109		ret = xen_irq_info_pirq_setup(info, 0, pirq + i, 0, domid,
1110					      i == 0 ? 0 : PIRQ_MSI_GROUP);
1111		if (ret < 0)
1112			goto error_irq;
1113	}
1114
1115	ret = irq_set_msi_desc(irq, msidesc);
1116	if (ret < 0)
1117		goto error_irq;
1118out:
1119	mutex_unlock(&irq_mapping_update_lock);
1120	return irq;
1121
1122error_irq:
1123	while (nvec--) {
1124		info = info_for_irq(irq + nvec);
1125		__unbind_from_irq(info, irq + nvec);
1126	}
1127	mutex_unlock(&irq_mapping_update_lock);
1128	return ret;
1129}
1130#endif
1131
1132int xen_destroy_irq(int irq)
1133{
1134	struct physdev_unmap_pirq unmap_irq;
1135	struct irq_info *info = info_for_irq(irq);
1136	int rc = -ENOENT;
1137
1138	mutex_lock(&irq_mapping_update_lock);
1139
1140	/*
1141	 * If trying to remove a vector in a MSI group different
1142	 * than the first one skip the PIRQ unmap unless this vector
1143	 * is the first one in the group.
1144	 */
1145	if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
1146		unmap_irq.pirq = info->u.pirq.pirq;
1147		unmap_irq.domid = info->u.pirq.domid;
1148		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
1149		/* If another domain quits without making the pci_disable_msix
1150		 * call, the Xen hypervisor takes care of freeing the PIRQs
1151		 * (free_domain_pirqs).
1152		 */
1153		if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
1154			pr_info("domain %d does not have %d anymore\n",
1155				info->u.pirq.domid, info->u.pirq.pirq);
1156		else if (rc) {
1157			pr_warn("unmap irq failed %d\n", rc);
1158			goto out;
1159		}
1160	}
1161
1162	xen_free_irq(info);
1163
1164out:
1165	mutex_unlock(&irq_mapping_update_lock);
1166	return rc;
1167}
1168
1169int xen_irq_from_pirq(unsigned pirq)
1170{
1171	int irq;
1172
1173	struct irq_info *info;
1174
1175	mutex_lock(&irq_mapping_update_lock);
1176
1177	list_for_each_entry(info, &xen_irq_list_head, list) {
1178		if (info->type != IRQT_PIRQ)
1179			continue;
1180		irq = info->irq;
1181		if (info->u.pirq.pirq == pirq)
1182			goto out;
1183	}
1184	irq = -1;
1185out:
1186	mutex_unlock(&irq_mapping_update_lock);
1187
1188	return irq;
1189}
1190
1191
1192int xen_pirq_from_irq(unsigned irq)
1193{
1194	return pirq_from_irq(irq);
1195}
1196EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
1197
1198static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
1199				   struct xenbus_device *dev, bool shared)
1200{
1201	int ret = -ENOMEM;
1202	struct irq_info *info;
1203
1204	if (evtchn >= xen_evtchn_max_channels())
1205		return -ENOMEM;
1206
1207	mutex_lock(&irq_mapping_update_lock);
1208
1209	info = evtchn_to_info(evtchn);
1210
1211	if (!info) {
1212		info = xen_allocate_irq_dynamic();
1213		if (!info)
1214			goto out;
1215
1216		irq_set_chip_and_handler_name(info->irq, chip,
1217					      handle_edge_irq, "event");
1218
1219		ret = xen_irq_info_evtchn_setup(info, evtchn, dev);
1220		if (ret < 0) {
1221			__unbind_from_irq(info, info->irq);
1222			goto out;
1223		}
1224		/*
1225		 * New interdomain events are initially bound to vCPU0 This
1226		 * is required to setup the event channel in the first
1227		 * place and also important for UP guests because the
1228		 * affinity setting is not invoked on them so nothing would
1229		 * bind the channel.
1230		 */
1231		bind_evtchn_to_cpu(info, 0, false);
1232	} else if (!WARN_ON(info->type != IRQT_EVTCHN)) {
1233		if (shared && !WARN_ON(info->refcnt < 0))
1234			info->refcnt++;
1235	}
1236
1237	ret = info->irq;
1238
1239out:
1240	mutex_unlock(&irq_mapping_update_lock);
1241
1242	return ret;
1243}
1244
1245int bind_evtchn_to_irq(evtchn_port_t evtchn)
1246{
1247	return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL, false);
1248}
1249EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
1250
1251int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
1252{
1253	return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL, false);
1254}
1255EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
1256
1257static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
1258{
1259	struct evtchn_bind_ipi bind_ipi;
1260	evtchn_port_t evtchn;
1261	struct irq_info *info;
1262	int ret;
1263
1264	mutex_lock(&irq_mapping_update_lock);
1265
1266	ret = per_cpu(ipi_to_irq, cpu)[ipi];
1267
1268	if (ret == -1) {
1269		info = xen_allocate_irq_dynamic();
1270		if (!info)
1271			goto out;
1272
1273		irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip,
1274					      handle_percpu_irq, "ipi");
1275
1276		bind_ipi.vcpu = xen_vcpu_nr(cpu);
1277		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1278						&bind_ipi) != 0)
1279			BUG();
1280		evtchn = bind_ipi.port;
1281
1282		ret = xen_irq_info_ipi_setup(info, cpu, evtchn, ipi);
1283		if (ret < 0) {
1284			__unbind_from_irq(info, info->irq);
1285			goto out;
1286		}
1287		/*
1288		 * Force the affinity mask to the target CPU so proc shows
1289		 * the correct target.
1290		 */
1291		bind_evtchn_to_cpu(info, cpu, true);
1292		ret = info->irq;
1293	} else {
1294		info = info_for_irq(ret);
1295		WARN_ON(info == NULL || info->type != IRQT_IPI);
1296	}
1297
1298 out:
1299	mutex_unlock(&irq_mapping_update_lock);
1300	return ret;
1301}
1302
1303static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
1304					       evtchn_port_t remote_port,
1305					       struct irq_chip *chip,
1306					       bool shared)
1307{
1308	struct evtchn_bind_interdomain bind_interdomain;
1309	int err;
1310
1311	bind_interdomain.remote_dom  = dev->otherend_id;
1312	bind_interdomain.remote_port = remote_port;
1313
1314	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
1315					  &bind_interdomain);
1316
1317	return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
1318					       chip, dev, shared);
1319}
1320
1321int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
1322					   evtchn_port_t remote_port)
1323{
1324	return bind_interdomain_evtchn_to_irq_chip(dev, remote_port,
1325						   &xen_lateeoi_chip, false);
1326}
1327EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
1328
1329static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
1330{
1331	struct evtchn_status status;
1332	evtchn_port_t port;
1333	int rc = -ENOENT;
1334
1335	memset(&status, 0, sizeof(status));
1336	for (port = 0; port < xen_evtchn_max_channels(); port++) {
1337		status.dom = DOMID_SELF;
1338		status.port = port;
1339		rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
1340		if (rc < 0)
1341			continue;
1342		if (status.status != EVTCHNSTAT_virq)
1343			continue;
1344		if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
1345			*evtchn = port;
1346			break;
1347		}
1348	}
1349	return rc;
1350}
1351
1352/**
1353 * xen_evtchn_nr_channels - number of usable event channel ports
1354 *
1355 * This may be less than the maximum supported by the current
1356 * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
1357 * supported.
1358 */
1359unsigned xen_evtchn_nr_channels(void)
1360{
1361        return evtchn_ops->nr_channels();
1362}
1363EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
1364
1365int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
1366{
1367	struct evtchn_bind_virq bind_virq;
1368	evtchn_port_t evtchn = 0;
1369	struct irq_info *info;
1370	int ret;
1371
1372	mutex_lock(&irq_mapping_update_lock);
1373
1374	ret = per_cpu(virq_to_irq, cpu)[virq];
1375
1376	if (ret == -1) {
1377		info = xen_allocate_irq_dynamic();
1378		if (!info)
1379			goto out;
1380
1381		if (percpu)
1382			irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip,
1383						      handle_percpu_irq, "virq");
1384		else
1385			irq_set_chip_and_handler_name(info->irq, &xen_dynamic_chip,
1386						      handle_edge_irq, "virq");
1387
1388		bind_virq.virq = virq;
1389		bind_virq.vcpu = xen_vcpu_nr(cpu);
1390		ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1391						&bind_virq);
1392		if (ret == 0)
1393			evtchn = bind_virq.port;
1394		else {
1395			if (ret == -EEXIST)
1396				ret = find_virq(virq, cpu, &evtchn);
1397			BUG_ON(ret < 0);
1398		}
1399
1400		ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq);
1401		if (ret < 0) {
1402			__unbind_from_irq(info, info->irq);
1403			goto out;
1404		}
1405
1406		/*
1407		 * Force the affinity mask for percpu interrupts so proc
1408		 * shows the correct target.
1409		 */
1410		bind_evtchn_to_cpu(info, cpu, percpu);
1411		ret = info->irq;
1412	} else {
1413		info = info_for_irq(ret);
1414		WARN_ON(info == NULL || info->type != IRQT_VIRQ);
1415	}
1416
1417out:
1418	mutex_unlock(&irq_mapping_update_lock);
1419
1420	return ret;
1421}
1422
1423static void unbind_from_irq(unsigned int irq)
1424{
1425	struct irq_info *info;
1426
1427	mutex_lock(&irq_mapping_update_lock);
1428	info = info_for_irq(irq);
1429	__unbind_from_irq(info, irq);
1430	mutex_unlock(&irq_mapping_update_lock);
1431}
1432
1433static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
1434					  irq_handler_t handler,
1435					  unsigned long irqflags,
1436					  const char *devname, void *dev_id,
1437					  struct irq_chip *chip)
1438{
1439	int irq, retval;
1440
1441	irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL,
1442				      irqflags & IRQF_SHARED);
1443	if (irq < 0)
1444		return irq;
1445	retval = request_irq(irq, handler, irqflags, devname, dev_id);
1446	if (retval != 0) {
1447		unbind_from_irq(irq);
1448		return retval;
1449	}
1450
1451	return irq;
1452}
1453
1454int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
1455			      irq_handler_t handler,
1456			      unsigned long irqflags,
1457			      const char *devname, void *dev_id)
1458{
1459	return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1460					      devname, dev_id,
1461					      &xen_dynamic_chip);
1462}
1463EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
1464
1465int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
1466				      irq_handler_t handler,
1467				      unsigned long irqflags,
1468				      const char *devname, void *dev_id)
1469{
1470	return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1471					      devname, dev_id,
1472					      &xen_lateeoi_chip);
1473}
1474EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
1475
1476static int bind_interdomain_evtchn_to_irqhandler_chip(
1477		struct xenbus_device *dev, evtchn_port_t remote_port,
1478		irq_handler_t handler, unsigned long irqflags,
1479		const char *devname, void *dev_id, struct irq_chip *chip)
1480{
1481	int irq, retval;
1482
1483	irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip,
1484						  irqflags & IRQF_SHARED);
1485	if (irq < 0)
1486		return irq;
1487
1488	retval = request_irq(irq, handler, irqflags, devname, dev_id);
1489	if (retval != 0) {
1490		unbind_from_irq(irq);
1491		return retval;
1492	}
1493
1494	return irq;
1495}
1496
1497int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev,
1498						  evtchn_port_t remote_port,
1499						  irq_handler_t handler,
1500						  unsigned long irqflags,
1501						  const char *devname,
1502						  void *dev_id)
1503{
1504	return bind_interdomain_evtchn_to_irqhandler_chip(dev,
1505				remote_port, handler, irqflags, devname,
1506				dev_id, &xen_lateeoi_chip);
1507}
1508EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
1509
1510int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
1511			    irq_handler_t handler,
1512			    unsigned long irqflags, const char *devname, void *dev_id)
1513{
1514	int irq, retval;
1515
1516	irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU);
1517	if (irq < 0)
1518		return irq;
1519	retval = request_irq(irq, handler, irqflags, devname, dev_id);
1520	if (retval != 0) {
1521		unbind_from_irq(irq);
1522		return retval;
1523	}
1524
1525	return irq;
1526}
1527EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
1528
1529int bind_ipi_to_irqhandler(enum ipi_vector ipi,
1530			   unsigned int cpu,
1531			   irq_handler_t handler,
1532			   unsigned long irqflags,
1533			   const char *devname,
1534			   void *dev_id)
1535{
1536	int irq, retval;
1537
1538	irq = bind_ipi_to_irq(ipi, cpu);
1539	if (irq < 0)
1540		return irq;
1541
1542	irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
1543	retval = request_irq(irq, handler, irqflags, devname, dev_id);
1544	if (retval != 0) {
1545		unbind_from_irq(irq);
1546		return retval;
1547	}
1548
1549	return irq;
1550}
1551
1552void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1553{
1554	struct irq_info *info = info_for_irq(irq);
1555
1556	if (WARN_ON(!info))
1557		return;
1558	free_irq(irq, dev_id);
1559	unbind_from_irq(irq);
1560}
1561EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1562
1563/**
1564 * xen_set_irq_priority() - set an event channel priority.
1565 * @irq:irq bound to an event channel.
1566 * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1567 */
1568int xen_set_irq_priority(unsigned irq, unsigned priority)
1569{
1570	struct evtchn_set_priority set_priority;
1571
1572	set_priority.port = evtchn_from_irq(irq);
1573	set_priority.priority = priority;
1574
1575	return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
1576					   &set_priority);
1577}
1578EXPORT_SYMBOL_GPL(xen_set_irq_priority);
1579
1580int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
1581{
1582	struct irq_info *info = evtchn_to_info(evtchn);
1583
1584	if (!info)
1585		return -ENOENT;
1586
1587	WARN_ON(info->refcnt != -1);
1588
1589	info->refcnt = 1;
1590	info->is_static = is_static;
1591
1592	return 0;
1593}
1594EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
1595
1596int evtchn_get(evtchn_port_t evtchn)
1597{
1598	struct irq_info *info;
1599	int err = -ENOENT;
1600
1601	if (evtchn >= xen_evtchn_max_channels())
1602		return -EINVAL;
1603
1604	mutex_lock(&irq_mapping_update_lock);
1605
1606	info = evtchn_to_info(evtchn);
1607
1608	if (!info)
1609		goto done;
1610
1611	err = -EINVAL;
1612	if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
1613		goto done;
1614
1615	info->refcnt++;
1616	err = 0;
1617 done:
1618	mutex_unlock(&irq_mapping_update_lock);
1619
1620	return err;
1621}
1622EXPORT_SYMBOL_GPL(evtchn_get);
1623
1624void evtchn_put(evtchn_port_t evtchn)
1625{
1626	struct irq_info *info = evtchn_to_info(evtchn);
1627
1628	if (WARN_ON(!info))
1629		return;
1630	unbind_from_irq(info->irq);
1631}
1632EXPORT_SYMBOL_GPL(evtchn_put);
1633
1634void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1635{
1636	evtchn_port_t evtchn;
1637
1638#ifdef CONFIG_X86
1639	if (unlikely(vector == XEN_NMI_VECTOR)) {
1640		int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu),
1641					     NULL);
1642		if (rc < 0)
1643			printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
1644		return;
1645	}
1646#endif
1647	evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
1648	BUG_ON(evtchn == 0);
1649	notify_remote_via_evtchn(evtchn);
1650}
1651
1652struct evtchn_loop_ctrl {
1653	ktime_t timeout;
1654	unsigned count;
1655	bool defer_eoi;
1656};
1657
1658void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
1659{
1660	struct irq_info *info = evtchn_to_info(port);
1661	struct xenbus_device *dev;
1662
1663	if (!info)
1664		return;
1665
1666	/*
1667	 * Check for timeout every 256 events.
1668	 * We are setting the timeout value only after the first 256
1669	 * events in order to not hurt the common case of few loop
1670	 * iterations. The 256 is basically an arbitrary value.
1671	 *
1672	 * In case we are hitting the timeout we need to defer all further
1673	 * EOIs in order to ensure to leave the event handling loop rather
1674	 * sooner than later.
1675	 */
1676	if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
1677		ktime_t kt = ktime_get();
1678
1679		if (!ctrl->timeout) {
1680			kt = ktime_add_ms(kt,
1681					  jiffies_to_msecs(event_loop_timeout));
1682			ctrl->timeout = kt;
1683		} else if (kt > ctrl->timeout) {
1684			ctrl->defer_eoi = true;
1685		}
1686	}
1687
1688	if (xchg_acquire(&info->is_active, 1))
1689		return;
1690
1691	dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
1692	if (dev)
1693		atomic_inc(&dev->events);
1694
1695	if (ctrl->defer_eoi) {
1696		info->eoi_cpu = smp_processor_id();
1697		info->irq_epoch = __this_cpu_read(irq_epoch);
1698		info->eoi_time = get_jiffies_64() + event_eoi_delay;
1699	}
1700
1701	generic_handle_irq(info->irq);
1702}
1703
1704int xen_evtchn_do_upcall(void)
1705{
1706	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1707	int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE;
1708	int cpu = smp_processor_id();
1709	struct evtchn_loop_ctrl ctrl = { 0 };
1710
1711	/*
1712	 * When closing an event channel the associated IRQ must not be freed
1713	 * until all cpus have left the event handling loop. This is ensured
1714	 * by taking the rcu_read_lock() while handling events, as freeing of
1715	 * the IRQ is handled via queue_rcu_work() _after_ closing the event
1716	 * channel.
1717	 */
1718	rcu_read_lock();
1719
1720	do {
1721		vcpu_info->evtchn_upcall_pending = 0;
1722
1723		xen_evtchn_handle_events(cpu, &ctrl);
1724
1725		BUG_ON(!irqs_disabled());
1726
1727		virt_rmb(); /* Hypervisor can set upcall pending. */
1728
1729	} while (vcpu_info->evtchn_upcall_pending);
1730
1731	rcu_read_unlock();
1732
1733	/*
1734	 * Increment irq_epoch only now to defer EOIs only for
1735	 * xen_irq_lateeoi() invocations occurring from inside the loop
1736	 * above.
1737	 */
1738	__this_cpu_inc(irq_epoch);
1739
1740	return ret;
1741}
1742EXPORT_SYMBOL_GPL(xen_evtchn_do_upcall);
1743
1744/* Rebind a new event channel to an existing irq. */
1745void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
1746{
1747	struct irq_info *info = info_for_irq(irq);
1748
1749	if (WARN_ON(!info))
1750		return;
1751
1752	/* Make sure the irq is masked, since the new event channel
1753	   will also be masked. */
1754	disable_irq(irq);
1755
1756	mutex_lock(&irq_mapping_update_lock);
1757
1758	/* After resume the irq<->evtchn mappings are all cleared out */
1759	BUG_ON(evtchn_to_info(evtchn));
1760	/* Expect irq to have been bound before,
1761	   so there should be a proper type */
1762	BUG_ON(info->type == IRQT_UNBOUND);
1763
1764	info->irq = irq;
1765	(void)xen_irq_info_evtchn_setup(info, evtchn, NULL);
1766
1767	mutex_unlock(&irq_mapping_update_lock);
1768
1769	bind_evtchn_to_cpu(info, info->cpu, false);
1770
1771	/* Unmask the event channel. */
1772	enable_irq(irq);
1773}
1774
1775/* Rebind an evtchn so that it gets delivered to a specific cpu */
1776static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
1777{
1778	struct evtchn_bind_vcpu bind_vcpu;
1779	evtchn_port_t evtchn = info ? info->evtchn : 0;
1780
1781	if (!VALID_EVTCHN(evtchn))
1782		return -1;
1783
1784	if (!xen_support_evtchn_rebind())
1785		return -1;
1786
1787	/* Send future instances of this interrupt to other vcpu. */
1788	bind_vcpu.port = evtchn;
1789	bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
1790
1791	/*
1792	 * Mask the event while changing the VCPU binding to prevent
1793	 * it being delivered on an unexpected VCPU.
1794	 */
1795	do_mask(info, EVT_MASK_REASON_TEMPORARY);
1796
1797	/*
1798	 * If this fails, it usually just indicates that we're dealing with a
1799	 * virq or IPI channel, which don't actually need to be rebound. Ignore
1800	 * it, but don't do the xenlinux-level rebind in that case.
1801	 */
1802	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
1803		bind_evtchn_to_cpu(info, tcpu, false);
1804
1805	do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1806
1807	return 0;
1808}
1809
1810/*
1811 * Find the CPU within @dest mask which has the least number of channels
1812 * assigned. This is not precise as the per cpu counts can be modified
1813 * concurrently.
1814 */
1815static unsigned int select_target_cpu(const struct cpumask *dest)
1816{
1817	unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
1818
1819	for_each_cpu_and(cpu, dest, cpu_online_mask) {
1820		unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
1821
1822		if (curch < minch) {
1823			minch = curch;
1824			best_cpu = cpu;
1825		}
1826	}
1827
1828	/*
1829	 * Catch the unlikely case that dest contains no online CPUs. Can't
1830	 * recurse.
1831	 */
1832	if (best_cpu == UINT_MAX)
1833		return select_target_cpu(cpu_online_mask);
1834
1835	return best_cpu;
1836}
1837
1838static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1839			    bool force)
1840{
1841	unsigned int tcpu = select_target_cpu(dest);
1842	int ret;
1843
1844	ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu);
1845	if (!ret)
1846		irq_data_update_effective_affinity(data, cpumask_of(tcpu));
1847
1848	return ret;
1849}
1850
1851static void enable_dynirq(struct irq_data *data)
1852{
1853	struct irq_info *info = info_for_irq(data->irq);
1854	evtchn_port_t evtchn = info ? info->evtchn : 0;
1855
1856	if (VALID_EVTCHN(evtchn))
1857		do_unmask(info, EVT_MASK_REASON_EXPLICIT);
1858}
1859
1860static void disable_dynirq(struct irq_data *data)
1861{
1862	struct irq_info *info = info_for_irq(data->irq);
1863	evtchn_port_t evtchn = info ? info->evtchn : 0;
1864
1865	if (VALID_EVTCHN(evtchn))
1866		do_mask(info, EVT_MASK_REASON_EXPLICIT);
1867}
1868
1869static void ack_dynirq(struct irq_data *data)
1870{
1871	struct irq_info *info = info_for_irq(data->irq);
1872	evtchn_port_t evtchn = info ? info->evtchn : 0;
1873
1874	if (VALID_EVTCHN(evtchn))
1875		event_handler_exit(info);
1876}
1877
1878static void mask_ack_dynirq(struct irq_data *data)
1879{
1880	disable_dynirq(data);
1881	ack_dynirq(data);
1882}
1883
1884static void lateeoi_ack_dynirq(struct irq_data *data)
1885{
1886	struct irq_info *info = info_for_irq(data->irq);
1887	evtchn_port_t evtchn = info ? info->evtchn : 0;
1888
1889	if (VALID_EVTCHN(evtchn)) {
1890		do_mask(info, EVT_MASK_REASON_EOI_PENDING);
1891		/*
1892		 * Don't call event_handler_exit().
1893		 * Need to keep is_active non-zero in order to ignore re-raised
1894		 * events after cpu affinity changes while a lateeoi is pending.
1895		 */
1896		clear_evtchn(evtchn);
1897	}
1898}
1899
1900static void lateeoi_mask_ack_dynirq(struct irq_data *data)
1901{
1902	struct irq_info *info = info_for_irq(data->irq);
1903	evtchn_port_t evtchn = info ? info->evtchn : 0;
1904
1905	if (VALID_EVTCHN(evtchn)) {
1906		do_mask(info, EVT_MASK_REASON_EXPLICIT);
1907		event_handler_exit(info);
1908	}
1909}
1910
1911static int retrigger_dynirq(struct irq_data *data)
1912{
1913	struct irq_info *info = info_for_irq(data->irq);
1914	evtchn_port_t evtchn = info ? info->evtchn : 0;
1915
1916	if (!VALID_EVTCHN(evtchn))
1917		return 0;
1918
1919	do_mask(info, EVT_MASK_REASON_TEMPORARY);
1920	set_evtchn(evtchn);
1921	do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1922
1923	return 1;
1924}
1925
1926static void restore_pirqs(void)
1927{
1928	int pirq, rc, irq, gsi;
1929	struct physdev_map_pirq map_irq;
1930	struct irq_info *info;
1931
1932	list_for_each_entry(info, &xen_irq_list_head, list) {
1933		if (info->type != IRQT_PIRQ)
1934			continue;
1935
1936		pirq = info->u.pirq.pirq;
1937		gsi = info->u.pirq.gsi;
1938		irq = info->irq;
1939
1940		/* save/restore of PT devices doesn't work, so at this point the
1941		 * only devices present are GSI based emulated devices */
1942		if (!gsi)
1943			continue;
1944
1945		map_irq.domid = DOMID_SELF;
1946		map_irq.type = MAP_PIRQ_TYPE_GSI;
1947		map_irq.index = gsi;
1948		map_irq.pirq = pirq;
1949
1950		rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1951		if (rc) {
1952			pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
1953				gsi, irq, pirq, rc);
1954			xen_free_irq(info);
1955			continue;
1956		}
1957
1958		printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1959
1960		__startup_pirq(irq);
1961	}
1962}
1963
1964static void restore_cpu_virqs(unsigned int cpu)
1965{
1966	struct evtchn_bind_virq bind_virq;
1967	evtchn_port_t evtchn;
1968	struct irq_info *info;
1969	int virq, irq;
1970
1971	for (virq = 0; virq < NR_VIRQS; virq++) {
1972		if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
1973			continue;
1974		info = info_for_irq(irq);
1975
1976		BUG_ON(virq_from_irq(info) != virq);
1977
1978		/* Get a new binding from Xen. */
1979		bind_virq.virq = virq;
1980		bind_virq.vcpu = xen_vcpu_nr(cpu);
1981		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1982						&bind_virq) != 0)
1983			BUG();
1984		evtchn = bind_virq.port;
1985
1986		/* Record the new mapping. */
1987		xen_irq_info_virq_setup(info, cpu, evtchn, virq);
1988		/* The affinity mask is still valid */
1989		bind_evtchn_to_cpu(info, cpu, false);
1990	}
1991}
1992
1993static void restore_cpu_ipis(unsigned int cpu)
1994{
1995	struct evtchn_bind_ipi bind_ipi;
1996	evtchn_port_t evtchn;
1997	struct irq_info *info;
1998	int ipi, irq;
1999
2000	for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
2001		if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
2002			continue;
2003		info = info_for_irq(irq);
2004
2005		BUG_ON(ipi_from_irq(info) != ipi);
2006
2007		/* Get a new binding from Xen. */
2008		bind_ipi.vcpu = xen_vcpu_nr(cpu);
2009		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
2010						&bind_ipi) != 0)
2011			BUG();
2012		evtchn = bind_ipi.port;
2013
2014		/* Record the new mapping. */
2015		xen_irq_info_ipi_setup(info, cpu, evtchn, ipi);
2016		/* The affinity mask is still valid */
2017		bind_evtchn_to_cpu(info, cpu, false);
2018	}
2019}
2020
2021/* Clear an irq's pending state, in preparation for polling on it */
2022void xen_clear_irq_pending(int irq)
2023{
2024	struct irq_info *info = info_for_irq(irq);
2025	evtchn_port_t evtchn = info ? info->evtchn : 0;
2026
2027	if (VALID_EVTCHN(evtchn))
2028		event_handler_exit(info);
2029}
2030EXPORT_SYMBOL(xen_clear_irq_pending);
2031void xen_set_irq_pending(int irq)
2032{
2033	evtchn_port_t evtchn = evtchn_from_irq(irq);
2034
2035	if (VALID_EVTCHN(evtchn))
2036		set_evtchn(evtchn);
2037}
2038
2039bool xen_test_irq_pending(int irq)
2040{
2041	evtchn_port_t evtchn = evtchn_from_irq(irq);
2042	bool ret = false;
2043
2044	if (VALID_EVTCHN(evtchn))
2045		ret = test_evtchn(evtchn);
2046
2047	return ret;
2048}
2049
2050/* Poll waiting for an irq to become pending with timeout.  In the usual case,
2051 * the irq will be disabled so it won't deliver an interrupt. */
2052void xen_poll_irq_timeout(int irq, u64 timeout)
2053{
2054	evtchn_port_t evtchn = evtchn_from_irq(irq);
2055
2056	if (VALID_EVTCHN(evtchn)) {
2057		struct sched_poll poll;
2058
2059		poll.nr_ports = 1;
2060		poll.timeout = timeout;
2061		set_xen_guest_handle(poll.ports, &evtchn);
2062
2063		if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
2064			BUG();
2065	}
2066}
2067EXPORT_SYMBOL(xen_poll_irq_timeout);
2068/* Poll waiting for an irq to become pending.  In the usual case, the
2069 * irq will be disabled so it won't deliver an interrupt. */
2070void xen_poll_irq(int irq)
2071{
2072	xen_poll_irq_timeout(irq, 0 /* no timeout */);
2073}
2074
2075/* Check whether the IRQ line is shared with other guests. */
2076int xen_test_irq_shared(int irq)
2077{
2078	struct irq_info *info = info_for_irq(irq);
2079	struct physdev_irq_status_query irq_status;
2080
2081	if (WARN_ON(!info))
2082		return -ENOENT;
2083
2084	irq_status.irq = info->u.pirq.pirq;
2085
2086	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
2087		return 0;
2088	return !(irq_status.flags & XENIRQSTAT_shared);
2089}
2090EXPORT_SYMBOL_GPL(xen_test_irq_shared);
2091
2092void xen_irq_resume(void)
2093{
2094	unsigned int cpu;
2095	struct irq_info *info;
2096
2097	/* New event-channel space is not 'live' yet. */
2098	xen_evtchn_resume();
2099
2100	/* No IRQ <-> event-channel mappings. */
2101	list_for_each_entry(info, &xen_irq_list_head, list) {
2102		/* Zap event-channel binding */
2103		info->evtchn = 0;
2104		/* Adjust accounting */
2105		channels_on_cpu_dec(info);
2106	}
2107
2108	clear_evtchn_to_irq_all();
2109
2110	for_each_possible_cpu(cpu) {
2111		restore_cpu_virqs(cpu);
2112		restore_cpu_ipis(cpu);
2113	}
2114
2115	restore_pirqs();
2116}
2117
2118static struct irq_chip xen_dynamic_chip __read_mostly = {
2119	.name			= "xen-dyn",
2120
2121	.irq_disable		= disable_dynirq,
2122	.irq_mask		= disable_dynirq,
2123	.irq_unmask		= enable_dynirq,
2124
2125	.irq_ack		= ack_dynirq,
2126	.irq_mask_ack		= mask_ack_dynirq,
2127
2128	.irq_set_affinity	= set_affinity_irq,
2129	.irq_retrigger		= retrigger_dynirq,
2130};
2131
2132static struct irq_chip xen_lateeoi_chip __read_mostly = {
2133	/* The chip name needs to contain "xen-dyn" for irqbalance to work. */
2134	.name			= "xen-dyn-lateeoi",
2135
2136	.irq_disable		= disable_dynirq,
2137	.irq_mask		= disable_dynirq,
2138	.irq_unmask		= enable_dynirq,
2139
2140	.irq_ack		= lateeoi_ack_dynirq,
2141	.irq_mask_ack		= lateeoi_mask_ack_dynirq,
2142
2143	.irq_set_affinity	= set_affinity_irq,
2144	.irq_retrigger		= retrigger_dynirq,
2145};
2146
2147static struct irq_chip xen_pirq_chip __read_mostly = {
2148	.name			= "xen-pirq",
2149
2150	.irq_startup		= startup_pirq,
2151	.irq_shutdown		= shutdown_pirq,
2152	.irq_enable		= enable_pirq,
2153	.irq_disable		= disable_pirq,
2154
2155	.irq_mask		= disable_dynirq,
2156	.irq_unmask		= enable_dynirq,
2157
2158	.irq_ack		= eoi_pirq,
2159	.irq_eoi		= eoi_pirq,
2160	.irq_mask_ack		= mask_ack_pirq,
2161
2162	.irq_set_affinity	= set_affinity_irq,
2163
2164	.irq_retrigger		= retrigger_dynirq,
2165};
2166
2167static struct irq_chip xen_percpu_chip __read_mostly = {
2168	.name			= "xen-percpu",
2169
2170	.irq_disable		= disable_dynirq,
2171	.irq_mask		= disable_dynirq,
2172	.irq_unmask		= enable_dynirq,
2173
2174	.irq_ack		= ack_dynirq,
2175};
2176
2177#ifdef CONFIG_X86
2178#ifdef CONFIG_XEN_PVHVM
2179/* Vector callbacks are better than PCI interrupts to receive event
2180 * channel notifications because we can receive vector callbacks on any
2181 * vcpu and we don't need PCI support or APIC interactions. */
2182void xen_setup_callback_vector(void)
2183{
2184	uint64_t callback_via;
2185
2186	if (xen_have_vector_callback) {
2187		callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
2188		if (xen_set_callback_via(callback_via)) {
2189			pr_err("Request for Xen HVM callback vector failed\n");
2190			xen_have_vector_callback = false;
2191		}
2192	}
2193}
2194
2195/*
2196 * Setup per-vCPU vector-type callbacks. If this setup is unavailable,
2197 * fallback to the global vector-type callback.
2198 */
2199static __init void xen_init_setup_upcall_vector(void)
2200{
2201	if (!xen_have_vector_callback)
2202		return;
2203
2204	if ((cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_UPCALL_VECTOR) &&
2205	    !xen_set_upcall_vector(0))
2206		xen_percpu_upcall = true;
2207	else if (xen_feature(XENFEAT_hvm_callback_vector))
2208		xen_setup_callback_vector();
2209	else
2210		xen_have_vector_callback = false;
2211}
2212
2213int xen_set_upcall_vector(unsigned int cpu)
2214{
2215	int rc;
2216	xen_hvm_evtchn_upcall_vector_t op = {
2217		.vector = HYPERVISOR_CALLBACK_VECTOR,
2218		.vcpu = per_cpu(xen_vcpu_id, cpu),
2219	};
2220
2221	rc = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &op);
2222	if (rc)
2223		return rc;
2224
2225	/* Trick toolstack to think we are enlightened. */
2226	if (!cpu)
2227		rc = xen_set_callback_via(1);
2228
2229	return rc;
2230}
2231
2232static __init void xen_alloc_callback_vector(void)
2233{
2234	if (!xen_have_vector_callback)
2235		return;
2236
2237	pr_info("Xen HVM callback vector for event delivery is enabled\n");
2238	alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
2239}
2240#else
2241void xen_setup_callback_vector(void) {}
2242static inline void xen_init_setup_upcall_vector(void) {}
2243int xen_set_upcall_vector(unsigned int cpu) {}
2244static inline void xen_alloc_callback_vector(void) {}
2245#endif /* CONFIG_XEN_PVHVM */
2246#endif /* CONFIG_X86 */
2247
2248bool xen_fifo_events = true;
2249module_param_named(fifo_events, xen_fifo_events, bool, 0);
2250
2251static int xen_evtchn_cpu_prepare(unsigned int cpu)
2252{
2253	int ret = 0;
2254
2255	xen_cpu_init_eoi(cpu);
2256
2257	if (evtchn_ops->percpu_init)
2258		ret = evtchn_ops->percpu_init(cpu);
2259
2260	return ret;
2261}
2262
2263static int xen_evtchn_cpu_dead(unsigned int cpu)
2264{
2265	int ret = 0;
2266
2267	if (evtchn_ops->percpu_deinit)
2268		ret = evtchn_ops->percpu_deinit(cpu);
2269
2270	return ret;
2271}
2272
2273void __init xen_init_IRQ(void)
2274{
2275	int ret = -EINVAL;
2276	evtchn_port_t evtchn;
2277
2278	if (xen_fifo_events)
2279		ret = xen_evtchn_fifo_init();
2280	if (ret < 0) {
2281		xen_evtchn_2l_init();
2282		xen_fifo_events = false;
2283	}
2284
2285	xen_cpu_init_eoi(smp_processor_id());
2286
2287	cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
2288				  "xen/evtchn:prepare",
2289				  xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
2290
2291	evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
2292				sizeof(*evtchn_to_irq), GFP_KERNEL);
2293	BUG_ON(!evtchn_to_irq);
2294
2295	/* No event channels are 'live' right now. */
2296	for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
2297		mask_evtchn(evtchn);
2298
2299	pirq_needs_eoi = pirq_needs_eoi_flag;
2300
2301#ifdef CONFIG_X86
2302	if (xen_pv_domain()) {
2303		if (xen_initial_domain())
2304			pci_xen_initial_domain();
2305	}
2306	xen_init_setup_upcall_vector();
2307	xen_alloc_callback_vector();
2308
2309
2310	if (xen_hvm_domain()) {
2311		native_init_IRQ();
2312		/* pci_xen_hvm_init must be called after native_init_IRQ so that
2313		 * __acpi_register_gsi can point at the right function */
2314		pci_xen_hvm_init();
2315	} else {
2316		int rc;
2317		struct physdev_pirq_eoi_gmfn eoi_gmfn;
2318
2319		pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
2320		eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
2321		rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
2322		if (rc != 0) {
2323			free_page((unsigned long) pirq_eoi_map);
2324			pirq_eoi_map = NULL;
2325		} else
2326			pirq_needs_eoi = pirq_check_eoi_map;
2327	}
2328#endif
2329}
2330