1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4/*          Kai Shen <kaishen@linux.alibaba.com> */
5/* Copyright (c) 2020-2022, Alibaba Group. */
6
7#include <linux/module.h>
8#include <net/addrconf.h>
9#include <rdma/erdma-abi.h>
10
11#include "erdma.h"
12#include "erdma_cm.h"
13#include "erdma_verbs.h"
14
15MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>");
16MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
17MODULE_LICENSE("Dual BSD/GPL");
18
19static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
20			      void *arg)
21{
22	struct net_device *netdev = netdev_notifier_info_to_dev(arg);
23	struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb);
24
25	if (dev->netdev == NULL || dev->netdev != netdev)
26		goto done;
27
28	switch (event) {
29	case NETDEV_UP:
30		dev->state = IB_PORT_ACTIVE;
31		erdma_port_event(dev, IB_EVENT_PORT_ACTIVE);
32		break;
33	case NETDEV_DOWN:
34		dev->state = IB_PORT_DOWN;
35		erdma_port_event(dev, IB_EVENT_PORT_ERR);
36		break;
37	case NETDEV_CHANGEMTU:
38		if (dev->mtu != netdev->mtu) {
39			erdma_set_mtu(dev, netdev->mtu);
40			dev->mtu = netdev->mtu;
41		}
42		break;
43	case NETDEV_REGISTER:
44	case NETDEV_UNREGISTER:
45	case NETDEV_CHANGEADDR:
46	case NETDEV_GOING_DOWN:
47	case NETDEV_CHANGE:
48	default:
49		break;
50	}
51
52done:
53	return NOTIFY_OK;
54}
55
56static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
57{
58	struct net_device *netdev;
59	int ret = -EPROBE_DEFER;
60
61	/* Already binded to a net_device, so we skip. */
62	if (dev->netdev)
63		return 0;
64
65	rtnl_lock();
66	for_each_netdev(&init_net, netdev) {
67		/*
68		 * In erdma, the paired netdev and ibdev should have the same
69		 * MAC address. erdma can get the value from its PCIe bar
70		 * registers. Since erdma can not get the paired netdev
71		 * reference directly, we do a traverse here to get the paired
72		 * netdev.
73		 */
74		if (ether_addr_equal_unaligned(netdev->perm_addr,
75					       dev->attrs.peer_addr)) {
76			ret = ib_device_set_netdev(&dev->ibdev, netdev, 1);
77			if (ret) {
78				rtnl_unlock();
79				ibdev_warn(&dev->ibdev,
80					   "failed (%d) to link netdev", ret);
81				return ret;
82			}
83
84			dev->netdev = netdev;
85			break;
86		}
87	}
88
89	rtnl_unlock();
90
91	return ret;
92}
93
94static int erdma_device_register(struct erdma_dev *dev)
95{
96	struct ib_device *ibdev = &dev->ibdev;
97	int ret;
98
99	ret = erdma_enum_and_get_netdev(dev);
100	if (ret)
101		return ret;
102
103	dev->mtu = dev->netdev->mtu;
104	addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr);
105
106	ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev);
107	if (ret) {
108		dev_err(&dev->pdev->dev,
109			"ib_register_device failed: ret = %d\n", ret);
110		return ret;
111	}
112
113	dev->netdev_nb.notifier_call = erdma_netdev_event;
114	ret = register_netdevice_notifier(&dev->netdev_nb);
115	if (ret) {
116		ibdev_err(&dev->ibdev, "failed to register notifier.\n");
117		ib_unregister_device(ibdev);
118	}
119
120	return ret;
121}
122
123static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
124{
125	struct erdma_dev *dev = data;
126
127	erdma_cmdq_completion_handler(&dev->cmdq);
128	erdma_aeq_event_handler(dev);
129
130	return IRQ_HANDLED;
131}
132
133static int erdma_request_vectors(struct erdma_dev *dev)
134{
135	int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
136	int ret;
137
138	ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX);
139	if (ret < 0) {
140		dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n",
141			ret);
142		return ret;
143	}
144	dev->attrs.irq_num = ret;
145
146	return 0;
147}
148
149static int erdma_comm_irq_init(struct erdma_dev *dev)
150{
151	snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s",
152		 pci_name(dev->pdev));
153	dev->comm_irq.msix_vector =
154		pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ);
155
156	cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)),
157			&dev->comm_irq.affinity_hint_mask);
158	irq_set_affinity_hint(dev->comm_irq.msix_vector,
159			      &dev->comm_irq.affinity_hint_mask);
160
161	return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0,
162			   dev->comm_irq.name, dev);
163}
164
165static void erdma_comm_irq_uninit(struct erdma_dev *dev)
166{
167	irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL);
168	free_irq(dev->comm_irq.msix_vector, dev);
169}
170
171static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
172{
173	int ret;
174
175	ret = dma_set_mask_and_coherent(&pdev->dev,
176					DMA_BIT_MASK(ERDMA_PCI_WIDTH));
177	if (ret)
178		return ret;
179
180	dma_set_max_seg_size(&pdev->dev, UINT_MAX);
181
182	return 0;
183}
184
185static void erdma_hw_reset(struct erdma_dev *dev)
186{
187	u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
188
189	erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
190}
191
192static int erdma_wait_hw_init_done(struct erdma_dev *dev)
193{
194	int i;
195
196	erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG,
197			  FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1));
198
199	for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
200		if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
201					   ERDMA_REG_DEV_ST_INIT_DONE_MASK))
202			break;
203
204		msleep(ERDMA_REG_ACCESS_WAIT_MS);
205	}
206
207	if (i == ERDMA_WAIT_DEV_DONE_CNT) {
208		dev_err(&dev->pdev->dev, "wait init done failed.\n");
209		return -ETIMEDOUT;
210	}
211
212	return 0;
213}
214
215static const struct pci_device_id erdma_pci_tbl[] = {
216	{ PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
217	{}
218};
219
220static int erdma_probe_dev(struct pci_dev *pdev)
221{
222	struct erdma_dev *dev;
223	int bars, err;
224	u32 version;
225
226	err = pci_enable_device(pdev);
227	if (err) {
228		dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
229		return err;
230	}
231
232	pci_set_master(pdev);
233
234	dev = ib_alloc_device(erdma_dev, ibdev);
235	if (!dev) {
236		dev_err(&pdev->dev, "ib_alloc_device failed\n");
237		err = -ENOMEM;
238		goto err_disable_device;
239	}
240
241	pci_set_drvdata(pdev, dev);
242	dev->pdev = pdev;
243	dev->attrs.numa_node = dev_to_node(&pdev->dev);
244
245	bars = pci_select_bars(pdev, IORESOURCE_MEM);
246	err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
247	if (bars != ERDMA_BAR_MASK || err) {
248		err = err ? err : -EINVAL;
249		goto err_ib_device_release;
250	}
251
252	dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR);
253	dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR);
254
255	dev->func_bar =
256		devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len);
257	if (!dev->func_bar) {
258		dev_err(&pdev->dev, "devm_ioremap failed.\n");
259		err = -EFAULT;
260		goto err_release_bars;
261	}
262
263	version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
264	if (version == 0) {
265		/* we knows that it is a non-functional function. */
266		err = -ENODEV;
267		goto err_iounmap_func_bar;
268	}
269
270	err = erdma_device_init(dev, pdev);
271	if (err)
272		goto err_iounmap_func_bar;
273
274	err = erdma_request_vectors(dev);
275	if (err)
276		goto err_iounmap_func_bar;
277
278	err = erdma_comm_irq_init(dev);
279	if (err)
280		goto err_free_vectors;
281
282	err = erdma_aeq_init(dev);
283	if (err)
284		goto err_uninit_comm_irq;
285
286	err = erdma_cmdq_init(dev);
287	if (err)
288		goto err_uninit_aeq;
289
290	err = erdma_wait_hw_init_done(dev);
291	if (err)
292		goto err_uninit_cmdq;
293
294	err = erdma_ceqs_init(dev);
295	if (err)
296		goto err_reset_hw;
297
298	erdma_finish_cmdq_init(dev);
299
300	return 0;
301
302err_reset_hw:
303	erdma_hw_reset(dev);
304
305err_uninit_cmdq:
306	erdma_cmdq_destroy(dev);
307
308err_uninit_aeq:
309	erdma_aeq_destroy(dev);
310
311err_uninit_comm_irq:
312	erdma_comm_irq_uninit(dev);
313
314err_free_vectors:
315	pci_free_irq_vectors(dev->pdev);
316
317err_iounmap_func_bar:
318	devm_iounmap(&pdev->dev, dev->func_bar);
319
320err_release_bars:
321	pci_release_selected_regions(pdev, bars);
322
323err_ib_device_release:
324	ib_dealloc_device(&dev->ibdev);
325
326err_disable_device:
327	pci_disable_device(pdev);
328
329	return err;
330}
331
332static void erdma_remove_dev(struct pci_dev *pdev)
333{
334	struct erdma_dev *dev = pci_get_drvdata(pdev);
335
336	erdma_ceqs_uninit(dev);
337	erdma_hw_reset(dev);
338	erdma_cmdq_destroy(dev);
339	erdma_aeq_destroy(dev);
340	erdma_comm_irq_uninit(dev);
341	pci_free_irq_vectors(dev->pdev);
342
343	devm_iounmap(&pdev->dev, dev->func_bar);
344	pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
345
346	ib_dealloc_device(&dev->ibdev);
347
348	pci_disable_device(pdev);
349}
350
351#define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
352
353static int erdma_dev_attrs_init(struct erdma_dev *dev)
354{
355	int err;
356	u64 req_hdr, cap0, cap1;
357
358	erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA,
359				CMDQ_OPCODE_QUERY_DEVICE);
360
361	err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
362				  &cap1);
363	if (err)
364		return err;
365
366	dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0);
367	dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
368	dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
369	dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
370	dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
371	dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
372	dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
373	dev->attrs.max_mr = dev->attrs.max_qp << 1;
374	dev->attrs.max_cq = dev->attrs.max_qp << 1;
375	dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0);
376
377	dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
378	dev->attrs.max_ord = ERDMA_MAX_ORD;
379	dev->attrs.max_ird = ERDMA_MAX_IRD;
380	dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE;
381	dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE;
382	dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD;
383	dev->attrs.max_pd = ERDMA_MAX_PD;
384
385	dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
386	dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
387
388	erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
389				CMDQ_OPCODE_QUERY_FW_INFO);
390
391	err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
392				  &cap1);
393	if (!err)
394		dev->attrs.fw_version =
395			FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
396
397	return err;
398}
399
400static int erdma_device_config(struct erdma_dev *dev)
401{
402	struct erdma_cmdq_config_device_req req = {};
403
404	if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_EXTEND_DB))
405		return 0;
406
407	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
408				CMDQ_OPCODE_CONF_DEVICE);
409
410	req.cfg = FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK, PAGE_SHIFT) |
411		  FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK, 1);
412
413	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
414}
415
416static int erdma_res_cb_init(struct erdma_dev *dev)
417{
418	int i, j;
419
420	for (i = 0; i < ERDMA_RES_CNT; i++) {
421		dev->res_cb[i].next_alloc_idx = 1;
422		spin_lock_init(&dev->res_cb[i].lock);
423		dev->res_cb[i].bitmap =
424			bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL);
425		if (!dev->res_cb[i].bitmap)
426			goto err;
427	}
428
429	return 0;
430
431err:
432	for (j = 0; j < i; j++)
433		bitmap_free(dev->res_cb[j].bitmap);
434
435	return -ENOMEM;
436}
437
438static void erdma_res_cb_free(struct erdma_dev *dev)
439{
440	int i;
441
442	for (i = 0; i < ERDMA_RES_CNT; i++)
443		bitmap_free(dev->res_cb[i].bitmap);
444}
445
446static const struct ib_device_ops erdma_device_ops = {
447	.owner = THIS_MODULE,
448	.driver_id = RDMA_DRIVER_ERDMA,
449	.uverbs_abi_ver = ERDMA_ABI_VERSION,
450
451	.alloc_mr = erdma_ib_alloc_mr,
452	.alloc_pd = erdma_alloc_pd,
453	.alloc_ucontext = erdma_alloc_ucontext,
454	.create_cq = erdma_create_cq,
455	.create_qp = erdma_create_qp,
456	.dealloc_pd = erdma_dealloc_pd,
457	.dealloc_ucontext = erdma_dealloc_ucontext,
458	.dereg_mr = erdma_dereg_mr,
459	.destroy_cq = erdma_destroy_cq,
460	.destroy_qp = erdma_destroy_qp,
461	.get_dma_mr = erdma_get_dma_mr,
462	.get_port_immutable = erdma_get_port_immutable,
463	.iw_accept = erdma_accept,
464	.iw_add_ref = erdma_qp_get_ref,
465	.iw_connect = erdma_connect,
466	.iw_create_listen = erdma_create_listen,
467	.iw_destroy_listen = erdma_destroy_listen,
468	.iw_get_qp = erdma_get_ibqp,
469	.iw_reject = erdma_reject,
470	.iw_rem_ref = erdma_qp_put_ref,
471	.map_mr_sg = erdma_map_mr_sg,
472	.mmap = erdma_mmap,
473	.mmap_free = erdma_mmap_free,
474	.modify_qp = erdma_modify_qp,
475	.post_recv = erdma_post_recv,
476	.post_send = erdma_post_send,
477	.poll_cq = erdma_poll_cq,
478	.query_device = erdma_query_device,
479	.query_gid = erdma_query_gid,
480	.query_port = erdma_query_port,
481	.query_qp = erdma_query_qp,
482	.req_notify_cq = erdma_req_notify_cq,
483	.reg_user_mr = erdma_reg_user_mr,
484
485	INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
486	INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
487	INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext),
488	INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp),
489};
490
491static int erdma_ib_device_add(struct pci_dev *pdev)
492{
493	struct erdma_dev *dev = pci_get_drvdata(pdev);
494	struct ib_device *ibdev = &dev->ibdev;
495	u64 mac;
496	int ret;
497
498	ret = erdma_dev_attrs_init(dev);
499	if (ret)
500		return ret;
501
502	ret = erdma_device_config(dev);
503	if (ret)
504		return ret;
505
506	ibdev->node_type = RDMA_NODE_RNIC;
507	memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
508
509	/*
510	 * Current model (one-to-one device association):
511	 * One ERDMA device per net_device or, equivalently,
512	 * per physical port.
513	 */
514	ibdev->phys_port_cnt = 1;
515	ibdev->num_comp_vectors = dev->attrs.irq_num - 1;
516
517	ib_set_device_ops(ibdev, &erdma_device_ops);
518
519	INIT_LIST_HEAD(&dev->cep_list);
520
521	spin_lock_init(&dev->lock);
522	xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1);
523	xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1);
524	dev->next_alloc_cqn = 1;
525	dev->next_alloc_qpn = 1;
526
527	ret = erdma_res_cb_init(dev);
528	if (ret)
529		return ret;
530
531	atomic_set(&dev->num_ctx, 0);
532
533	mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
534	mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32;
535
536	u64_to_ether_addr(mac, dev->attrs.peer_addr);
537
538	dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND,
539					  WQ_UNBOUND_MAX_ACTIVE);
540	if (!dev->reflush_wq) {
541		ret = -ENOMEM;
542		goto err_alloc_workqueue;
543	}
544
545	ret = erdma_device_register(dev);
546	if (ret)
547		goto err_register;
548
549	return 0;
550
551err_register:
552	destroy_workqueue(dev->reflush_wq);
553err_alloc_workqueue:
554	xa_destroy(&dev->qp_xa);
555	xa_destroy(&dev->cq_xa);
556
557	erdma_res_cb_free(dev);
558
559	return ret;
560}
561
562static void erdma_ib_device_remove(struct pci_dev *pdev)
563{
564	struct erdma_dev *dev = pci_get_drvdata(pdev);
565
566	unregister_netdevice_notifier(&dev->netdev_nb);
567	ib_unregister_device(&dev->ibdev);
568
569	destroy_workqueue(dev->reflush_wq);
570	erdma_res_cb_free(dev);
571	xa_destroy(&dev->qp_xa);
572	xa_destroy(&dev->cq_xa);
573}
574
575static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
576{
577	int ret;
578
579	ret = erdma_probe_dev(pdev);
580	if (ret)
581		return ret;
582
583	ret = erdma_ib_device_add(pdev);
584	if (ret) {
585		erdma_remove_dev(pdev);
586		return ret;
587	}
588
589	return 0;
590}
591
592static void erdma_remove(struct pci_dev *pdev)
593{
594	erdma_ib_device_remove(pdev);
595	erdma_remove_dev(pdev);
596}
597
598static struct pci_driver erdma_pci_driver = {
599	.name = DRV_MODULE_NAME,
600	.id_table = erdma_pci_tbl,
601	.probe = erdma_probe,
602	.remove = erdma_remove
603};
604
605MODULE_DEVICE_TABLE(pci, erdma_pci_tbl);
606
607static __init int erdma_init_module(void)
608{
609	int ret;
610
611	ret = erdma_cm_init();
612	if (ret)
613		return ret;
614
615	ret = pci_register_driver(&erdma_pci_driver);
616	if (ret)
617		erdma_cm_exit();
618
619	return ret;
620}
621
622static void __exit erdma_exit_module(void)
623{
624	pci_unregister_driver(&erdma_pci_driver);
625
626	erdma_cm_exit();
627}
628
629module_init(erdma_init_module);
630module_exit(erdma_exit_module);
631