1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 */
5
6/**
7 * DOC: Nitro Enclaves (NE) PCI device driver.
8 */
9
10#include <linux/delay.h>
11#include <linux/device.h>
12#include <linux/list.h>
13#include <linux/module.h>
14#include <linux/mutex.h>
15#include <linux/nitro_enclaves.h>
16#include <linux/pci.h>
17#include <linux/types.h>
18#include <linux/wait.h>
19
20#include "ne_misc_dev.h"
21#include "ne_pci_dev.h"
22
23/**
24 * NE_DEFAULT_TIMEOUT_MSECS - Default timeout to wait for a reply from
25 *			      the NE PCI device.
26 */
27#define NE_DEFAULT_TIMEOUT_MSECS	(120000) /* 120 sec */
28
29static const struct pci_device_id ne_pci_ids[] = {
30	{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_NE) },
31	{ 0, }
32};
33
34MODULE_DEVICE_TABLE(pci, ne_pci_ids);
35
36/**
37 * ne_submit_request() - Submit command request to the PCI device based on the
38 *			 command type.
39 * @pdev:		PCI device to send the command to.
40 * @cmd_type:		Command type of the request sent to the PCI device.
41 * @cmd_request:	Command request payload.
42 * @cmd_request_size:	Size of the command request payload.
43 *
44 * Context: Process context. This function is called with the ne_pci_dev mutex held.
45 */
46static void ne_submit_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
47			      void *cmd_request, size_t cmd_request_size)
48{
49	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
50
51	memcpy_toio(ne_pci_dev->iomem_base + NE_SEND_DATA, cmd_request, cmd_request_size);
52
53	iowrite32(cmd_type, ne_pci_dev->iomem_base + NE_COMMAND);
54}
55
56/**
57 * ne_retrieve_reply() - Retrieve reply from the PCI device.
58 * @pdev:		PCI device to receive the reply from.
59 * @cmd_reply:		Command reply payload.
60 * @cmd_reply_size:	Size of the command reply payload.
61 *
62 * Context: Process context. This function is called with the ne_pci_dev mutex held.
63 */
64static void ne_retrieve_reply(struct pci_dev *pdev, struct ne_pci_dev_cmd_reply *cmd_reply,
65			      size_t cmd_reply_size)
66{
67	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
68
69	memcpy_fromio(cmd_reply, ne_pci_dev->iomem_base + NE_RECV_DATA, cmd_reply_size);
70}
71
72/**
73 * ne_wait_for_reply() - Wait for a reply of a PCI device command.
74 * @pdev:	PCI device for which a reply is waited.
75 *
76 * Context: Process context. This function is called with the ne_pci_dev mutex held.
77 * Return:
78 * * 0 on success.
79 * * Negative return value on failure.
80 */
81static int ne_wait_for_reply(struct pci_dev *pdev)
82{
83	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
84	int rc = -EINVAL;
85
86	/*
87	 * TODO: Update to _interruptible and handle interrupted wait event
88	 * e.g. -ERESTARTSYS, incoming signals + update timeout, if needed.
89	 */
90	rc = wait_event_timeout(ne_pci_dev->cmd_reply_wait_q,
91				atomic_read(&ne_pci_dev->cmd_reply_avail) != 0,
92				msecs_to_jiffies(NE_DEFAULT_TIMEOUT_MSECS));
93	if (!rc)
94		return -ETIMEDOUT;
95
96	return 0;
97}
98
99int ne_do_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
100		  void *cmd_request, size_t cmd_request_size,
101		  struct ne_pci_dev_cmd_reply *cmd_reply, size_t cmd_reply_size)
102{
103	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
104	int rc = -EINVAL;
105
106	if (cmd_type <= INVALID_CMD || cmd_type >= MAX_CMD) {
107		dev_err_ratelimited(&pdev->dev, "Invalid cmd type=%u\n", cmd_type);
108
109		return -EINVAL;
110	}
111
112	if (!cmd_request) {
113		dev_err_ratelimited(&pdev->dev, "Null cmd request for cmd type=%u\n",
114				    cmd_type);
115
116		return -EINVAL;
117	}
118
119	if (cmd_request_size > NE_SEND_DATA_SIZE) {
120		dev_err_ratelimited(&pdev->dev, "Invalid req size=%zu for cmd type=%u\n",
121				    cmd_request_size, cmd_type);
122
123		return -EINVAL;
124	}
125
126	if (!cmd_reply) {
127		dev_err_ratelimited(&pdev->dev, "Null cmd reply for cmd type=%u\n",
128				    cmd_type);
129
130		return -EINVAL;
131	}
132
133	if (cmd_reply_size > NE_RECV_DATA_SIZE) {
134		dev_err_ratelimited(&pdev->dev, "Invalid reply size=%zu for cmd type=%u\n",
135				    cmd_reply_size, cmd_type);
136
137		return -EINVAL;
138	}
139
140	/*
141	 * Use this mutex so that the PCI device handles one command request at
142	 * a time.
143	 */
144	mutex_lock(&ne_pci_dev->pci_dev_mutex);
145
146	atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
147
148	ne_submit_request(pdev, cmd_type, cmd_request, cmd_request_size);
149
150	rc = ne_wait_for_reply(pdev);
151	if (rc < 0) {
152		dev_err_ratelimited(&pdev->dev, "Error in wait for reply for cmd type=%u [rc=%d]\n",
153				    cmd_type, rc);
154
155		goto unlock_mutex;
156	}
157
158	ne_retrieve_reply(pdev, cmd_reply, cmd_reply_size);
159
160	atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
161
162	if (cmd_reply->rc < 0) {
163		rc = cmd_reply->rc;
164
165		dev_err_ratelimited(&pdev->dev, "Error in cmd process logic, cmd type=%u [rc=%d]\n",
166				    cmd_type, rc);
167
168		goto unlock_mutex;
169	}
170
171	rc = 0;
172
173unlock_mutex:
174	mutex_unlock(&ne_pci_dev->pci_dev_mutex);
175
176	return rc;
177}
178
179/**
180 * ne_reply_handler() - Interrupt handler for retrieving a reply matching a
181 *			request sent to the PCI device for enclave lifetime
182 *			management.
183 * @irq:	Received interrupt for a reply sent by the PCI device.
184 * @args:	PCI device private data structure.
185 *
186 * Context: Interrupt context.
187 * Return:
188 * * IRQ_HANDLED on handled interrupt.
189 */
190static irqreturn_t ne_reply_handler(int irq, void *args)
191{
192	struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
193
194	atomic_set(&ne_pci_dev->cmd_reply_avail, 1);
195
196	/* TODO: Update to _interruptible. */
197	wake_up(&ne_pci_dev->cmd_reply_wait_q);
198
199	return IRQ_HANDLED;
200}
201
202/**
203 * ne_event_work_handler() - Work queue handler for notifying enclaves on a
204 *			     state change received by the event interrupt
205 *			     handler.
206 * @work:	Item containing the NE PCI device for which an out-of-band event
207 *		was issued.
208 *
209 * An out-of-band event is being issued by the Nitro Hypervisor when at least
210 * one enclave is changing state without client interaction.
211 *
212 * Context: Work queue context.
213 */
214static void ne_event_work_handler(struct work_struct *work)
215{
216	struct ne_pci_dev_cmd_reply cmd_reply = {};
217	struct ne_enclave *ne_enclave = NULL;
218	struct ne_pci_dev *ne_pci_dev =
219		container_of(work, struct ne_pci_dev, notify_work);
220	struct pci_dev *pdev = ne_pci_dev->pdev;
221	int rc = -EINVAL;
222	struct slot_info_req slot_info_req = {};
223
224	mutex_lock(&ne_pci_dev->enclaves_list_mutex);
225
226	/*
227	 * Iterate over all enclaves registered for the Nitro Enclaves
228	 * PCI device and determine for which enclave(s) the out-of-band event
229	 * is corresponding to.
230	 */
231	list_for_each_entry(ne_enclave, &ne_pci_dev->enclaves_list, enclave_list_entry) {
232		mutex_lock(&ne_enclave->enclave_info_mutex);
233
234		/*
235		 * Enclaves that were never started cannot receive out-of-band
236		 * events.
237		 */
238		if (ne_enclave->state != NE_STATE_RUNNING)
239			goto unlock;
240
241		slot_info_req.slot_uid = ne_enclave->slot_uid;
242
243		rc = ne_do_request(pdev, SLOT_INFO,
244				   &slot_info_req, sizeof(slot_info_req),
245				   &cmd_reply, sizeof(cmd_reply));
246		if (rc < 0)
247			dev_err(&pdev->dev, "Error in slot info [rc=%d]\n", rc);
248
249		/* Notify enclave process that the enclave state changed. */
250		if (ne_enclave->state != cmd_reply.state) {
251			ne_enclave->state = cmd_reply.state;
252
253			ne_enclave->has_event = true;
254
255			wake_up_interruptible(&ne_enclave->eventq);
256		}
257
258unlock:
259		 mutex_unlock(&ne_enclave->enclave_info_mutex);
260	}
261
262	mutex_unlock(&ne_pci_dev->enclaves_list_mutex);
263}
264
265/**
266 * ne_event_handler() - Interrupt handler for PCI device out-of-band events.
267 *			This interrupt does not supply any data in the MMIO
268 *			region. It notifies a change in the state of any of
269 *			the launched enclaves.
270 * @irq:	Received interrupt for an out-of-band event.
271 * @args:	PCI device private data structure.
272 *
273 * Context: Interrupt context.
274 * Return:
275 * * IRQ_HANDLED on handled interrupt.
276 */
277static irqreturn_t ne_event_handler(int irq, void *args)
278{
279	struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
280
281	queue_work(ne_pci_dev->event_wq, &ne_pci_dev->notify_work);
282
283	return IRQ_HANDLED;
284}
285
286/**
287 * ne_setup_msix() - Setup MSI-X vectors for the PCI device.
288 * @pdev:	PCI device to setup the MSI-X for.
289 *
290 * Context: Process context.
291 * Return:
292 * * 0 on success.
293 * * Negative return value on failure.
294 */
295static int ne_setup_msix(struct pci_dev *pdev)
296{
297	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
298	int nr_vecs = 0;
299	int rc = -EINVAL;
300
301	nr_vecs = pci_msix_vec_count(pdev);
302	if (nr_vecs < 0) {
303		rc = nr_vecs;
304
305		dev_err(&pdev->dev, "Error in getting vec count [rc=%d]\n", rc);
306
307		return rc;
308	}
309
310	rc = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
311	if (rc < 0) {
312		dev_err(&pdev->dev, "Error in alloc MSI-X vecs [rc=%d]\n", rc);
313
314		return rc;
315	}
316
317	/*
318	 * This IRQ gets triggered every time the PCI device responds to a
319	 * command request. The reply is then retrieved, reading from the MMIO
320	 * space of the PCI device.
321	 */
322	rc = request_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_reply_handler,
323			 0, "enclave_cmd", ne_pci_dev);
324	if (rc < 0) {
325		dev_err(&pdev->dev, "Error in request irq reply [rc=%d]\n", rc);
326
327		goto free_irq_vectors;
328	}
329
330	ne_pci_dev->event_wq = create_singlethread_workqueue("ne_pci_dev_wq");
331	if (!ne_pci_dev->event_wq) {
332		rc = -ENOMEM;
333
334		dev_err(&pdev->dev, "Cannot get wq for dev events [rc=%d]\n", rc);
335
336		goto free_reply_irq_vec;
337	}
338
339	INIT_WORK(&ne_pci_dev->notify_work, ne_event_work_handler);
340
341	/*
342	 * This IRQ gets triggered every time any enclave's state changes. Its
343	 * handler then scans for the changes and propagates them to the user
344	 * space.
345	 */
346	rc = request_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_event_handler,
347			 0, "enclave_evt", ne_pci_dev);
348	if (rc < 0) {
349		dev_err(&pdev->dev, "Error in request irq event [rc=%d]\n", rc);
350
351		goto destroy_wq;
352	}
353
354	return 0;
355
356destroy_wq:
357	destroy_workqueue(ne_pci_dev->event_wq);
358free_reply_irq_vec:
359	free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
360free_irq_vectors:
361	pci_free_irq_vectors(pdev);
362
363	return rc;
364}
365
366/**
367 * ne_teardown_msix() - Teardown MSI-X vectors for the PCI device.
368 * @pdev:	PCI device to teardown the MSI-X for.
369 *
370 * Context: Process context.
371 */
372static void ne_teardown_msix(struct pci_dev *pdev)
373{
374	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
375
376	free_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_pci_dev);
377
378	flush_work(&ne_pci_dev->notify_work);
379	flush_workqueue(ne_pci_dev->event_wq);
380	destroy_workqueue(ne_pci_dev->event_wq);
381
382	free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
383
384	pci_free_irq_vectors(pdev);
385}
386
387/**
388 * ne_pci_dev_enable() - Select the PCI device version and enable it.
389 * @pdev:	PCI device to select version for and then enable.
390 *
391 * Context: Process context.
392 * Return:
393 * * 0 on success.
394 * * Negative return value on failure.
395 */
396static int ne_pci_dev_enable(struct pci_dev *pdev)
397{
398	u8 dev_enable_reply = 0;
399	u16 dev_version_reply = 0;
400	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
401
402	iowrite16(NE_VERSION_MAX, ne_pci_dev->iomem_base + NE_VERSION);
403
404	dev_version_reply = ioread16(ne_pci_dev->iomem_base + NE_VERSION);
405	if (dev_version_reply != NE_VERSION_MAX) {
406		dev_err(&pdev->dev, "Error in pci dev version cmd\n");
407
408		return -EIO;
409	}
410
411	iowrite8(NE_ENABLE_ON, ne_pci_dev->iomem_base + NE_ENABLE);
412
413	dev_enable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
414	if (dev_enable_reply != NE_ENABLE_ON) {
415		dev_err(&pdev->dev, "Error in pci dev enable cmd\n");
416
417		return -EIO;
418	}
419
420	return 0;
421}
422
423/**
424 * ne_pci_dev_disable() - Disable the PCI device.
425 * @pdev:	PCI device to disable.
426 *
427 * Context: Process context.
428 */
429static void ne_pci_dev_disable(struct pci_dev *pdev)
430{
431	u8 dev_disable_reply = 0;
432	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
433	const unsigned int sleep_time = 10; /* 10 ms */
434	unsigned int sleep_time_count = 0;
435
436	iowrite8(NE_ENABLE_OFF, ne_pci_dev->iomem_base + NE_ENABLE);
437
438	/*
439	 * Check for NE_ENABLE_OFF in a loop, to handle cases when the device
440	 * state is not immediately set to disabled and going through a
441	 * transitory state of disabling.
442	 */
443	while (sleep_time_count < NE_DEFAULT_TIMEOUT_MSECS) {
444		dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
445		if (dev_disable_reply == NE_ENABLE_OFF)
446			return;
447
448		msleep_interruptible(sleep_time);
449		sleep_time_count += sleep_time;
450	}
451
452	dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
453	if (dev_disable_reply != NE_ENABLE_OFF)
454		dev_err(&pdev->dev, "Error in pci dev disable cmd\n");
455}
456
457/**
458 * ne_pci_probe() - Probe function for the NE PCI device.
459 * @pdev:	PCI device to match with the NE PCI driver.
460 * @id :	PCI device id table associated with the NE PCI driver.
461 *
462 * Context: Process context.
463 * Return:
464 * * 0 on success.
465 * * Negative return value on failure.
466 */
467static int ne_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
468{
469	struct ne_pci_dev *ne_pci_dev = NULL;
470	int rc = -EINVAL;
471
472	ne_pci_dev = kzalloc(sizeof(*ne_pci_dev), GFP_KERNEL);
473	if (!ne_pci_dev)
474		return -ENOMEM;
475
476	rc = pci_enable_device(pdev);
477	if (rc < 0) {
478		dev_err(&pdev->dev, "Error in pci dev enable [rc=%d]\n", rc);
479
480		goto free_ne_pci_dev;
481	}
482
483	rc = pci_request_regions_exclusive(pdev, "nitro_enclaves");
484	if (rc < 0) {
485		dev_err(&pdev->dev, "Error in pci request regions [rc=%d]\n", rc);
486
487		goto disable_pci_dev;
488	}
489
490	ne_pci_dev->iomem_base = pci_iomap(pdev, PCI_BAR_NE, 0);
491	if (!ne_pci_dev->iomem_base) {
492		rc = -ENOMEM;
493
494		dev_err(&pdev->dev, "Error in pci iomap [rc=%d]\n", rc);
495
496		goto release_pci_regions;
497	}
498
499	pci_set_drvdata(pdev, ne_pci_dev);
500
501	rc = ne_setup_msix(pdev);
502	if (rc < 0) {
503		dev_err(&pdev->dev, "Error in pci dev msix setup [rc=%d]\n", rc);
504
505		goto iounmap_pci_bar;
506	}
507
508	ne_pci_dev_disable(pdev);
509
510	rc = ne_pci_dev_enable(pdev);
511	if (rc < 0) {
512		dev_err(&pdev->dev, "Error in ne_pci_dev enable [rc=%d]\n", rc);
513
514		goto teardown_msix;
515	}
516
517	atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
518	init_waitqueue_head(&ne_pci_dev->cmd_reply_wait_q);
519	INIT_LIST_HEAD(&ne_pci_dev->enclaves_list);
520	mutex_init(&ne_pci_dev->enclaves_list_mutex);
521	mutex_init(&ne_pci_dev->pci_dev_mutex);
522	ne_pci_dev->pdev = pdev;
523
524	ne_devs.ne_pci_dev = ne_pci_dev;
525
526	rc = misc_register(ne_devs.ne_misc_dev);
527	if (rc < 0) {
528		dev_err(&pdev->dev, "Error in misc dev register [rc=%d]\n", rc);
529
530		goto disable_ne_pci_dev;
531	}
532
533	return 0;
534
535disable_ne_pci_dev:
536	ne_devs.ne_pci_dev = NULL;
537	ne_pci_dev_disable(pdev);
538teardown_msix:
539	ne_teardown_msix(pdev);
540iounmap_pci_bar:
541	pci_set_drvdata(pdev, NULL);
542	pci_iounmap(pdev, ne_pci_dev->iomem_base);
543release_pci_regions:
544	pci_release_regions(pdev);
545disable_pci_dev:
546	pci_disable_device(pdev);
547free_ne_pci_dev:
548	kfree(ne_pci_dev);
549
550	return rc;
551}
552
553/**
554 * ne_pci_remove() - Remove function for the NE PCI device.
555 * @pdev:	PCI device associated with the NE PCI driver.
556 *
557 * Context: Process context.
558 */
559static void ne_pci_remove(struct pci_dev *pdev)
560{
561	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
562
563	misc_deregister(ne_devs.ne_misc_dev);
564
565	ne_devs.ne_pci_dev = NULL;
566
567	ne_pci_dev_disable(pdev);
568
569	ne_teardown_msix(pdev);
570
571	pci_set_drvdata(pdev, NULL);
572
573	pci_iounmap(pdev, ne_pci_dev->iomem_base);
574
575	pci_release_regions(pdev);
576
577	pci_disable_device(pdev);
578
579	kfree(ne_pci_dev);
580}
581
582/**
583 * ne_pci_shutdown() - Shutdown function for the NE PCI device.
584 * @pdev:	PCI device associated with the NE PCI driver.
585 *
586 * Context: Process context.
587 */
588static void ne_pci_shutdown(struct pci_dev *pdev)
589{
590	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
591
592	if (!ne_pci_dev)
593		return;
594
595	misc_deregister(ne_devs.ne_misc_dev);
596
597	ne_devs.ne_pci_dev = NULL;
598
599	ne_pci_dev_disable(pdev);
600
601	ne_teardown_msix(pdev);
602
603	pci_set_drvdata(pdev, NULL);
604
605	pci_iounmap(pdev, ne_pci_dev->iomem_base);
606
607	pci_release_regions(pdev);
608
609	pci_disable_device(pdev);
610
611	kfree(ne_pci_dev);
612}
613
614/*
615 * TODO: Add suspend / resume functions for power management w/ CONFIG_PM, if
616 * needed.
617 */
618/* NE PCI device driver. */
619struct pci_driver ne_pci_driver = {
620	.name		= "nitro_enclaves",
621	.id_table	= ne_pci_ids,
622	.probe		= ne_pci_probe,
623	.remove		= ne_pci_remove,
624	.shutdown	= ne_pci_shutdown,
625};
626