1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (c) 2007-2014 Nicira, Inc. 4 */ 5 6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8#include <linux/init.h> 9#include <linux/module.h> 10#include <linux/if_arp.h> 11#include <linux/if_vlan.h> 12#include <linux/in.h> 13#include <linux/ip.h> 14#include <linux/jhash.h> 15#include <linux/delay.h> 16#include <linux/time.h> 17#include <linux/etherdevice.h> 18#include <linux/genetlink.h> 19#include <linux/kernel.h> 20#include <linux/kthread.h> 21#include <linux/mutex.h> 22#include <linux/percpu.h> 23#include <linux/rcupdate.h> 24#include <linux/tcp.h> 25#include <linux/udp.h> 26#include <linux/ethtool.h> 27#include <linux/wait.h> 28#include <asm/div64.h> 29#include <linux/highmem.h> 30#include <linux/netfilter_bridge.h> 31#include <linux/netfilter_ipv4.h> 32#include <linux/inetdevice.h> 33#include <linux/list.h> 34#include <linux/openvswitch.h> 35#include <linux/rculist.h> 36#include <linux/dmi.h> 37#include <net/genetlink.h> 38#include <net/net_namespace.h> 39#include <net/netns/generic.h> 40 41#include "datapath.h" 42#include "flow.h" 43#include "flow_table.h" 44#include "flow_netlink.h" 45#include "meter.h" 46#include "vport-internal_dev.h" 47#include "vport-netdev.h" 48 49unsigned int ovs_net_id __read_mostly; 50 51static struct genl_family dp_packet_genl_family; 52static struct genl_family dp_flow_genl_family; 53static struct genl_family dp_datapath_genl_family; 54 55static const struct nla_policy flow_policy[]; 56 57static const struct genl_multicast_group ovs_dp_flow_multicast_group = { 58 .name = OVS_FLOW_MCGROUP, 59}; 60 61static const struct genl_multicast_group ovs_dp_datapath_multicast_group = { 62 .name = OVS_DATAPATH_MCGROUP, 63}; 64 65static const struct genl_multicast_group ovs_dp_vport_multicast_group = { 66 .name = OVS_VPORT_MCGROUP, 67}; 68 69/* Check if need to build a reply message. 70 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ 71static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, 72 unsigned int group) 73{ 74 return info->nlhdr->nlmsg_flags & NLM_F_ECHO || 75 genl_has_listeners(family, genl_info_net(info), group); 76} 77 78static void ovs_notify(struct genl_family *family, 79 struct sk_buff *skb, struct genl_info *info) 80{ 81 genl_notify(family, skb, info, 0, GFP_KERNEL); 82} 83 84/** 85 * DOC: Locking: 86 * 87 * All writes e.g. Writes to device state (add/remove datapath, port, set 88 * operations on vports, etc.), Writes to other state (flow table 89 * modifications, set miscellaneous datapath parameters, etc.) are protected 90 * by ovs_lock. 91 * 92 * Reads are protected by RCU. 93 * 94 * There are a few special cases (mostly stats) that have their own 95 * synchronization but they nest under all of above and don't interact with 96 * each other. 97 * 98 * The RTNL lock nests inside ovs_mutex. 99 */ 100 101static DEFINE_MUTEX(ovs_mutex); 102 103void ovs_lock(void) 104{ 105 mutex_lock(&ovs_mutex); 106} 107 108void ovs_unlock(void) 109{ 110 mutex_unlock(&ovs_mutex); 111} 112 113#ifdef CONFIG_LOCKDEP 114int lockdep_ovsl_is_held(void) 115{ 116 if (debug_locks) 117 return lockdep_is_held(&ovs_mutex); 118 else 119 return 1; 120} 121#endif 122 123static struct vport *new_vport(const struct vport_parms *); 124static int queue_gso_packets(struct datapath *dp, struct sk_buff *, 125 const struct sw_flow_key *, 126 const struct dp_upcall_info *, 127 uint32_t cutlen); 128static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, 129 const struct sw_flow_key *, 130 const struct dp_upcall_info *, 131 uint32_t cutlen); 132 133static void ovs_dp_masks_rebalance(struct work_struct *work); 134 135/* Must be called with rcu_read_lock or ovs_mutex. */ 136const char *ovs_dp_name(const struct datapath *dp) 137{ 138 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); 139 return ovs_vport_name(vport); 140} 141 142static int get_dpifindex(const struct datapath *dp) 143{ 144 struct vport *local; 145 int ifindex; 146 147 rcu_read_lock(); 148 149 local = ovs_vport_rcu(dp, OVSP_LOCAL); 150 if (local) 151 ifindex = local->dev->ifindex; 152 else 153 ifindex = 0; 154 155 rcu_read_unlock(); 156 157 return ifindex; 158} 159 160static void destroy_dp_rcu(struct rcu_head *rcu) 161{ 162 struct datapath *dp = container_of(rcu, struct datapath, rcu); 163 164 ovs_flow_tbl_destroy(&dp->table); 165 free_percpu(dp->stats_percpu); 166 kfree(dp->ports); 167 ovs_meters_exit(dp); 168 kfree(dp); 169} 170 171static struct hlist_head *vport_hash_bucket(const struct datapath *dp, 172 u16 port_no) 173{ 174 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; 175} 176 177/* Called with ovs_mutex or RCU read lock. */ 178struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) 179{ 180 struct vport *vport; 181 struct hlist_head *head; 182 183 head = vport_hash_bucket(dp, port_no); 184 hlist_for_each_entry_rcu(vport, head, dp_hash_node, 185 lockdep_ovsl_is_held()) { 186 if (vport->port_no == port_no) 187 return vport; 188 } 189 return NULL; 190} 191 192/* Called with ovs_mutex. */ 193static struct vport *new_vport(const struct vport_parms *parms) 194{ 195 struct vport *vport; 196 197 vport = ovs_vport_add(parms); 198 if (!IS_ERR(vport)) { 199 struct datapath *dp = parms->dp; 200 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); 201 202 hlist_add_head_rcu(&vport->dp_hash_node, head); 203 } 204 return vport; 205} 206 207void ovs_dp_detach_port(struct vport *p) 208{ 209 ASSERT_OVSL(); 210 211 /* First drop references to device. */ 212 hlist_del_rcu(&p->dp_hash_node); 213 214 /* Then destroy it. */ 215 ovs_vport_del(p); 216} 217 218/* Must be called with rcu_read_lock. */ 219void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) 220{ 221 const struct vport *p = OVS_CB(skb)->input_vport; 222 struct datapath *dp = p->dp; 223 struct sw_flow *flow; 224 struct sw_flow_actions *sf_acts; 225 struct dp_stats_percpu *stats; 226 u64 *stats_counter; 227 u32 n_mask_hit; 228 u32 n_cache_hit; 229 int error; 230 231 stats = this_cpu_ptr(dp->stats_percpu); 232 233 /* Look up flow. */ 234 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), 235 &n_mask_hit, &n_cache_hit); 236 if (unlikely(!flow)) { 237 struct dp_upcall_info upcall; 238 239 memset(&upcall, 0, sizeof(upcall)); 240 upcall.cmd = OVS_PACKET_CMD_MISS; 241 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 242 upcall.mru = OVS_CB(skb)->mru; 243 error = ovs_dp_upcall(dp, skb, key, &upcall, 0); 244 switch (error) { 245 case 0: 246 case -EAGAIN: 247 case -ERESTARTSYS: 248 case -EINTR: 249 consume_skb(skb); 250 break; 251 default: 252 kfree_skb(skb); 253 break; 254 } 255 stats_counter = &stats->n_missed; 256 goto out; 257 } 258 259 ovs_flow_stats_update(flow, key->tp.flags, skb); 260 sf_acts = rcu_dereference(flow->sf_acts); 261 error = ovs_execute_actions(dp, skb, sf_acts, key); 262 if (unlikely(error)) 263 net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", 264 ovs_dp_name(dp), error); 265 266 stats_counter = &stats->n_hit; 267 268out: 269 /* Update datapath statistics. */ 270 u64_stats_update_begin(&stats->syncp); 271 (*stats_counter)++; 272 stats->n_mask_hit += n_mask_hit; 273 stats->n_cache_hit += n_cache_hit; 274 u64_stats_update_end(&stats->syncp); 275} 276 277int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, 278 const struct sw_flow_key *key, 279 const struct dp_upcall_info *upcall_info, 280 uint32_t cutlen) 281{ 282 struct dp_stats_percpu *stats; 283 int err; 284 285 if (upcall_info->portid == 0) { 286 err = -ENOTCONN; 287 goto err; 288 } 289 290 if (!skb_is_gso(skb)) 291 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); 292 else 293 err = queue_gso_packets(dp, skb, key, upcall_info, cutlen); 294 if (err) 295 goto err; 296 297 return 0; 298 299err: 300 stats = this_cpu_ptr(dp->stats_percpu); 301 302 u64_stats_update_begin(&stats->syncp); 303 stats->n_lost++; 304 u64_stats_update_end(&stats->syncp); 305 306 return err; 307} 308 309static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, 310 const struct sw_flow_key *key, 311 const struct dp_upcall_info *upcall_info, 312 uint32_t cutlen) 313{ 314 unsigned int gso_type = skb_shinfo(skb)->gso_type; 315 struct sw_flow_key later_key; 316 struct sk_buff *segs, *nskb; 317 int err; 318 319 BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET); 320 segs = __skb_gso_segment(skb, NETIF_F_SG, false); 321 if (IS_ERR(segs)) 322 return PTR_ERR(segs); 323 if (segs == NULL) 324 return -EINVAL; 325 326 if (gso_type & SKB_GSO_UDP) { 327 /* The initial flow key extracted by ovs_flow_key_extract() 328 * in this case is for a first fragment, so we need to 329 * properly mark later fragments. 330 */ 331 later_key = *key; 332 later_key.ip.frag = OVS_FRAG_TYPE_LATER; 333 } 334 335 /* Queue all of the segments. */ 336 skb_list_walk_safe(segs, skb, nskb) { 337 if (gso_type & SKB_GSO_UDP && skb != segs) 338 key = &later_key; 339 340 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); 341 if (err) 342 break; 343 344 } 345 346 /* Free all of the segments. */ 347 skb_list_walk_safe(segs, skb, nskb) { 348 if (err) 349 kfree_skb(skb); 350 else 351 consume_skb(skb); 352 } 353 return err; 354} 355 356static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, 357 unsigned int hdrlen, int actions_attrlen) 358{ 359 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) 360 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ 361 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */ 362 + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */ 363 + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */ 364 365 /* OVS_PACKET_ATTR_USERDATA */ 366 if (upcall_info->userdata) 367 size += NLA_ALIGN(upcall_info->userdata->nla_len); 368 369 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */ 370 if (upcall_info->egress_tun_info) 371 size += nla_total_size(ovs_tun_key_attr_size()); 372 373 /* OVS_PACKET_ATTR_ACTIONS */ 374 if (upcall_info->actions_len) 375 size += nla_total_size(actions_attrlen); 376 377 /* OVS_PACKET_ATTR_MRU */ 378 if (upcall_info->mru) 379 size += nla_total_size(sizeof(upcall_info->mru)); 380 381 return size; 382} 383 384static void pad_packet(struct datapath *dp, struct sk_buff *skb) 385{ 386 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { 387 size_t plen = NLA_ALIGN(skb->len) - skb->len; 388 389 if (plen > 0) 390 skb_put_zero(skb, plen); 391 } 392} 393 394static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 395 const struct sw_flow_key *key, 396 const struct dp_upcall_info *upcall_info, 397 uint32_t cutlen) 398{ 399 struct ovs_header *upcall; 400 struct sk_buff *nskb = NULL; 401 struct sk_buff *user_skb = NULL; /* to be queued to userspace */ 402 struct nlattr *nla; 403 size_t len; 404 unsigned int hlen; 405 int err, dp_ifindex; 406 u64 hash; 407 408 dp_ifindex = get_dpifindex(dp); 409 if (!dp_ifindex) 410 return -ENODEV; 411 412 if (skb_vlan_tag_present(skb)) { 413 nskb = skb_clone(skb, GFP_ATOMIC); 414 if (!nskb) 415 return -ENOMEM; 416 417 nskb = __vlan_hwaccel_push_inside(nskb); 418 if (!nskb) 419 return -ENOMEM; 420 421 skb = nskb; 422 } 423 424 if (nla_attr_size(skb->len) > USHRT_MAX) { 425 err = -EFBIG; 426 goto out; 427 } 428 429 /* Complete checksum if needed */ 430 if (skb->ip_summed == CHECKSUM_PARTIAL && 431 (err = skb_csum_hwoffload_help(skb, 0))) 432 goto out; 433 434 /* Older versions of OVS user space enforce alignment of the last 435 * Netlink attribute to NLA_ALIGNTO which would require extensive 436 * padding logic. Only perform zerocopy if padding is not required. 437 */ 438 if (dp->user_features & OVS_DP_F_UNALIGNED) 439 hlen = skb_zerocopy_headlen(skb); 440 else 441 hlen = skb->len; 442 443 len = upcall_msg_size(upcall_info, hlen - cutlen, 444 OVS_CB(skb)->acts_origlen); 445 user_skb = genlmsg_new(len, GFP_ATOMIC); 446 if (!user_skb) { 447 err = -ENOMEM; 448 goto out; 449 } 450 451 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 452 0, upcall_info->cmd); 453 if (!upcall) { 454 err = -EINVAL; 455 goto out; 456 } 457 upcall->dp_ifindex = dp_ifindex; 458 459 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); 460 if (err) 461 goto out; 462 463 if (upcall_info->userdata) 464 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, 465 nla_len(upcall_info->userdata), 466 nla_data(upcall_info->userdata)); 467 468 if (upcall_info->egress_tun_info) { 469 nla = nla_nest_start_noflag(user_skb, 470 OVS_PACKET_ATTR_EGRESS_TUN_KEY); 471 if (!nla) { 472 err = -EMSGSIZE; 473 goto out; 474 } 475 err = ovs_nla_put_tunnel_info(user_skb, 476 upcall_info->egress_tun_info); 477 if (err) 478 goto out; 479 480 nla_nest_end(user_skb, nla); 481 } 482 483 if (upcall_info->actions_len) { 484 nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS); 485 if (!nla) { 486 err = -EMSGSIZE; 487 goto out; 488 } 489 err = ovs_nla_put_actions(upcall_info->actions, 490 upcall_info->actions_len, 491 user_skb); 492 if (!err) 493 nla_nest_end(user_skb, nla); 494 else 495 nla_nest_cancel(user_skb, nla); 496 } 497 498 /* Add OVS_PACKET_ATTR_MRU */ 499 if (upcall_info->mru && 500 nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) { 501 err = -ENOBUFS; 502 goto out; 503 } 504 505 /* Add OVS_PACKET_ATTR_LEN when packet is truncated */ 506 if (cutlen > 0 && 507 nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) { 508 err = -ENOBUFS; 509 goto out; 510 } 511 512 /* Add OVS_PACKET_ATTR_HASH */ 513 hash = skb_get_hash_raw(skb); 514 if (skb->sw_hash) 515 hash |= OVS_PACKET_HASH_SW_BIT; 516 517 if (skb->l4_hash) 518 hash |= OVS_PACKET_HASH_L4_BIT; 519 520 if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) { 521 err = -ENOBUFS; 522 goto out; 523 } 524 525 /* Only reserve room for attribute header, packet data is added 526 * in skb_zerocopy() */ 527 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 528 err = -ENOBUFS; 529 goto out; 530 } 531 nla->nla_len = nla_attr_size(skb->len - cutlen); 532 533 err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen); 534 if (err) 535 goto out; 536 537 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ 538 pad_packet(dp, user_skb); 539 540 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; 541 542 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); 543 user_skb = NULL; 544out: 545 if (err) 546 skb_tx_error(skb); 547 consume_skb(user_skb); 548 consume_skb(nskb); 549 550 return err; 551} 552 553static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) 554{ 555 struct ovs_header *ovs_header = info->userhdr; 556 struct net *net = sock_net(skb->sk); 557 struct nlattr **a = info->attrs; 558 struct sw_flow_actions *acts; 559 struct sk_buff *packet; 560 struct sw_flow *flow; 561 struct sw_flow_actions *sf_acts; 562 struct datapath *dp; 563 struct vport *input_vport; 564 u16 mru = 0; 565 u64 hash; 566 int len; 567 int err; 568 bool log = !a[OVS_PACKET_ATTR_PROBE]; 569 570 err = -EINVAL; 571 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 572 !a[OVS_PACKET_ATTR_ACTIONS]) 573 goto err; 574 575 len = nla_len(a[OVS_PACKET_ATTR_PACKET]); 576 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); 577 err = -ENOMEM; 578 if (!packet) 579 goto err; 580 skb_reserve(packet, NET_IP_ALIGN); 581 582 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); 583 584 /* Set packet's mru */ 585 if (a[OVS_PACKET_ATTR_MRU]) { 586 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); 587 packet->ignore_df = 1; 588 } 589 OVS_CB(packet)->mru = mru; 590 591 if (a[OVS_PACKET_ATTR_HASH]) { 592 hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]); 593 594 __skb_set_hash(packet, hash & 0xFFFFFFFFULL, 595 !!(hash & OVS_PACKET_HASH_SW_BIT), 596 !!(hash & OVS_PACKET_HASH_L4_BIT)); 597 } 598 599 /* Build an sw_flow for sending this packet. */ 600 flow = ovs_flow_alloc(); 601 err = PTR_ERR(flow); 602 if (IS_ERR(flow)) 603 goto err_kfree_skb; 604 605 err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY], 606 packet, &flow->key, log); 607 if (err) 608 goto err_flow_free; 609 610 err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS], 611 &flow->key, &acts, log); 612 if (err) 613 goto err_flow_free; 614 615 rcu_assign_pointer(flow->sf_acts, acts); 616 packet->priority = flow->key.phy.priority; 617 packet->mark = flow->key.phy.skb_mark; 618 619 rcu_read_lock(); 620 dp = get_dp_rcu(net, ovs_header->dp_ifindex); 621 err = -ENODEV; 622 if (!dp) 623 goto err_unlock; 624 625 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port); 626 if (!input_vport) 627 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL); 628 629 if (!input_vport) 630 goto err_unlock; 631 632 packet->dev = input_vport->dev; 633 OVS_CB(packet)->input_vport = input_vport; 634 sf_acts = rcu_dereference(flow->sf_acts); 635 636 local_bh_disable(); 637 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); 638 local_bh_enable(); 639 rcu_read_unlock(); 640 641 ovs_flow_free(flow, false); 642 return err; 643 644err_unlock: 645 rcu_read_unlock(); 646err_flow_free: 647 ovs_flow_free(flow, false); 648err_kfree_skb: 649 kfree_skb(packet); 650err: 651 return err; 652} 653 654static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { 655 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN }, 656 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 657 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 658 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, 659 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, 660 [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 }, 661}; 662 663static const struct genl_small_ops dp_packet_genl_ops[] = { 664 { .cmd = OVS_PACKET_CMD_EXECUTE, 665 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 666 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 667 .doit = ovs_packet_cmd_execute 668 } 669}; 670 671static struct genl_family dp_packet_genl_family __ro_after_init = { 672 .hdrsize = sizeof(struct ovs_header), 673 .name = OVS_PACKET_FAMILY, 674 .version = OVS_PACKET_VERSION, 675 .maxattr = OVS_PACKET_ATTR_MAX, 676 .policy = packet_policy, 677 .netnsok = true, 678 .parallel_ops = true, 679 .small_ops = dp_packet_genl_ops, 680 .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops), 681 .module = THIS_MODULE, 682}; 683 684static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, 685 struct ovs_dp_megaflow_stats *mega_stats) 686{ 687 int i; 688 689 memset(mega_stats, 0, sizeof(*mega_stats)); 690 691 stats->n_flows = ovs_flow_tbl_count(&dp->table); 692 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table); 693 694 stats->n_hit = stats->n_missed = stats->n_lost = 0; 695 696 for_each_possible_cpu(i) { 697 const struct dp_stats_percpu *percpu_stats; 698 struct dp_stats_percpu local_stats; 699 unsigned int start; 700 701 percpu_stats = per_cpu_ptr(dp->stats_percpu, i); 702 703 do { 704 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); 705 local_stats = *percpu_stats; 706 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); 707 708 stats->n_hit += local_stats.n_hit; 709 stats->n_missed += local_stats.n_missed; 710 stats->n_lost += local_stats.n_lost; 711 mega_stats->n_mask_hit += local_stats.n_mask_hit; 712 mega_stats->n_cache_hit += local_stats.n_cache_hit; 713 } 714} 715 716static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags) 717{ 718 return ovs_identifier_is_ufid(sfid) && 719 !(ufid_flags & OVS_UFID_F_OMIT_KEY); 720} 721 722static bool should_fill_mask(uint32_t ufid_flags) 723{ 724 return !(ufid_flags & OVS_UFID_F_OMIT_MASK); 725} 726 727static bool should_fill_actions(uint32_t ufid_flags) 728{ 729 return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS); 730} 731 732static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, 733 const struct sw_flow_id *sfid, 734 uint32_t ufid_flags) 735{ 736 size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); 737 738 /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback 739 * see ovs_nla_put_identifier() 740 */ 741 if (sfid && ovs_identifier_is_ufid(sfid)) 742 len += nla_total_size(sfid->ufid_len); 743 else 744 len += nla_total_size(ovs_key_attr_size()); 745 746 /* OVS_FLOW_ATTR_KEY */ 747 if (!sfid || should_fill_key(sfid, ufid_flags)) 748 len += nla_total_size(ovs_key_attr_size()); 749 750 /* OVS_FLOW_ATTR_MASK */ 751 if (should_fill_mask(ufid_flags)) 752 len += nla_total_size(ovs_key_attr_size()); 753 754 /* OVS_FLOW_ATTR_ACTIONS */ 755 if (should_fill_actions(ufid_flags)) 756 len += nla_total_size(acts->orig_len); 757 758 return len 759 + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 760 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 761 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */ 762} 763 764/* Called with ovs_mutex or RCU read lock. */ 765static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow, 766 struct sk_buff *skb) 767{ 768 struct ovs_flow_stats stats; 769 __be16 tcp_flags; 770 unsigned long used; 771 772 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); 773 774 if (used && 775 nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used), 776 OVS_FLOW_ATTR_PAD)) 777 return -EMSGSIZE; 778 779 if (stats.n_packets && 780 nla_put_64bit(skb, OVS_FLOW_ATTR_STATS, 781 sizeof(struct ovs_flow_stats), &stats, 782 OVS_FLOW_ATTR_PAD)) 783 return -EMSGSIZE; 784 785 if ((u8)ntohs(tcp_flags) && 786 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) 787 return -EMSGSIZE; 788 789 return 0; 790} 791 792/* Called with ovs_mutex or RCU read lock. */ 793static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, 794 struct sk_buff *skb, int skb_orig_len) 795{ 796 struct nlattr *start; 797 int err; 798 799 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if 800 * this is the first flow to be dumped into 'skb'. This is unusual for 801 * Netlink but individual action lists can be longer than 802 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. 803 * The userspace caller can always fetch the actions separately if it 804 * really wants them. (Most userspace callers in fact don't care.) 805 * 806 * This can only fail for dump operations because the skb is always 807 * properly sized for single flows. 808 */ 809 start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS); 810 if (start) { 811 const struct sw_flow_actions *sf_acts; 812 813 sf_acts = rcu_dereference_ovsl(flow->sf_acts); 814 err = ovs_nla_put_actions(sf_acts->actions, 815 sf_acts->actions_len, skb); 816 817 if (!err) 818 nla_nest_end(skb, start); 819 else { 820 if (skb_orig_len) 821 return err; 822 823 nla_nest_cancel(skb, start); 824 } 825 } else if (skb_orig_len) { 826 return -EMSGSIZE; 827 } 828 829 return 0; 830} 831 832/* Called with ovs_mutex or RCU read lock. */ 833static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, 834 struct sk_buff *skb, u32 portid, 835 u32 seq, u32 flags, u8 cmd, u32 ufid_flags) 836{ 837 const int skb_orig_len = skb->len; 838 struct ovs_header *ovs_header; 839 int err; 840 841 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, 842 flags, cmd); 843 if (!ovs_header) 844 return -EMSGSIZE; 845 846 ovs_header->dp_ifindex = dp_ifindex; 847 848 err = ovs_nla_put_identifier(flow, skb); 849 if (err) 850 goto error; 851 852 if (should_fill_key(&flow->id, ufid_flags)) { 853 err = ovs_nla_put_masked_key(flow, skb); 854 if (err) 855 goto error; 856 } 857 858 if (should_fill_mask(ufid_flags)) { 859 err = ovs_nla_put_mask(flow, skb); 860 if (err) 861 goto error; 862 } 863 864 err = ovs_flow_cmd_fill_stats(flow, skb); 865 if (err) 866 goto error; 867 868 if (should_fill_actions(ufid_flags)) { 869 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); 870 if (err) 871 goto error; 872 } 873 874 genlmsg_end(skb, ovs_header); 875 return 0; 876 877error: 878 genlmsg_cancel(skb, ovs_header); 879 return err; 880} 881 882/* May not be called with RCU read lock. */ 883static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, 884 const struct sw_flow_id *sfid, 885 struct genl_info *info, 886 bool always, 887 uint32_t ufid_flags) 888{ 889 struct sk_buff *skb; 890 size_t len; 891 892 if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) 893 return NULL; 894 895 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); 896 skb = genlmsg_new(len, GFP_KERNEL); 897 if (!skb) 898 return ERR_PTR(-ENOMEM); 899 900 return skb; 901} 902 903/* Called with ovs_mutex. */ 904static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, 905 int dp_ifindex, 906 struct genl_info *info, u8 cmd, 907 bool always, u32 ufid_flags) 908{ 909 struct sk_buff *skb; 910 int retval; 911 912 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), 913 &flow->id, info, always, ufid_flags); 914 if (IS_ERR_OR_NULL(skb)) 915 return skb; 916 917 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, 918 info->snd_portid, info->snd_seq, 0, 919 cmd, ufid_flags); 920 if (WARN_ON_ONCE(retval < 0)) { 921 kfree_skb(skb); 922 skb = ERR_PTR(retval); 923 } 924 return skb; 925} 926 927static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) 928{ 929 struct net *net = sock_net(skb->sk); 930 struct nlattr **a = info->attrs; 931 struct ovs_header *ovs_header = info->userhdr; 932 struct sw_flow *flow = NULL, *new_flow; 933 struct sw_flow_mask mask; 934 struct sk_buff *reply; 935 struct datapath *dp; 936 struct sw_flow_key *key; 937 struct sw_flow_actions *acts; 938 struct sw_flow_match match; 939 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 940 int error; 941 bool log = !a[OVS_FLOW_ATTR_PROBE]; 942 943 /* Must have key and actions. */ 944 error = -EINVAL; 945 if (!a[OVS_FLOW_ATTR_KEY]) { 946 OVS_NLERR(log, "Flow key attr not present in new flow."); 947 goto error; 948 } 949 if (!a[OVS_FLOW_ATTR_ACTIONS]) { 950 OVS_NLERR(log, "Flow actions attr not present in new flow."); 951 goto error; 952 } 953 954 /* Most of the time we need to allocate a new flow, do it before 955 * locking. 956 */ 957 new_flow = ovs_flow_alloc(); 958 if (IS_ERR(new_flow)) { 959 error = PTR_ERR(new_flow); 960 goto error; 961 } 962 963 /* Extract key. */ 964 key = kzalloc(sizeof(*key), GFP_KERNEL); 965 if (!key) { 966 error = -ENOMEM; 967 goto err_kfree_flow; 968 } 969 970 ovs_match_init(&match, key, false, &mask); 971 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], 972 a[OVS_FLOW_ATTR_MASK], log); 973 if (error) 974 goto err_kfree_key; 975 976 ovs_flow_mask_key(&new_flow->key, key, true, &mask); 977 978 /* Extract flow identifier. */ 979 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], 980 key, log); 981 if (error) 982 goto err_kfree_key; 983 984 /* Validate actions. */ 985 error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], 986 &new_flow->key, &acts, log); 987 if (error) { 988 OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); 989 goto err_kfree_key; 990 } 991 992 reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, 993 ufid_flags); 994 if (IS_ERR(reply)) { 995 error = PTR_ERR(reply); 996 goto err_kfree_acts; 997 } 998 999 ovs_lock(); 1000 dp = get_dp(net, ovs_header->dp_ifindex); 1001 if (unlikely(!dp)) { 1002 error = -ENODEV; 1003 goto err_unlock_ovs; 1004 } 1005 1006 /* Check if this is a duplicate flow */ 1007 if (ovs_identifier_is_ufid(&new_flow->id)) 1008 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); 1009 if (!flow) 1010 flow = ovs_flow_tbl_lookup(&dp->table, key); 1011 if (likely(!flow)) { 1012 rcu_assign_pointer(new_flow->sf_acts, acts); 1013 1014 /* Put flow in bucket. */ 1015 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); 1016 if (unlikely(error)) { 1017 acts = NULL; 1018 goto err_unlock_ovs; 1019 } 1020 1021 if (unlikely(reply)) { 1022 error = ovs_flow_cmd_fill_info(new_flow, 1023 ovs_header->dp_ifindex, 1024 reply, info->snd_portid, 1025 info->snd_seq, 0, 1026 OVS_FLOW_CMD_NEW, 1027 ufid_flags); 1028 BUG_ON(error < 0); 1029 } 1030 ovs_unlock(); 1031 } else { 1032 struct sw_flow_actions *old_acts; 1033 1034 /* Bail out if we're not allowed to modify an existing flow. 1035 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL 1036 * because Generic Netlink treats the latter as a dump 1037 * request. We also accept NLM_F_EXCL in case that bug ever 1038 * gets fixed. 1039 */ 1040 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE 1041 | NLM_F_EXCL))) { 1042 error = -EEXIST; 1043 goto err_unlock_ovs; 1044 } 1045 /* The flow identifier has to be the same for flow updates. 1046 * Look for any overlapping flow. 1047 */ 1048 if (unlikely(!ovs_flow_cmp(flow, &match))) { 1049 if (ovs_identifier_is_key(&flow->id)) 1050 flow = ovs_flow_tbl_lookup_exact(&dp->table, 1051 &match); 1052 else /* UFID matches but key is different */ 1053 flow = NULL; 1054 if (!flow) { 1055 error = -ENOENT; 1056 goto err_unlock_ovs; 1057 } 1058 } 1059 /* Update actions. */ 1060 old_acts = ovsl_dereference(flow->sf_acts); 1061 rcu_assign_pointer(flow->sf_acts, acts); 1062 1063 if (unlikely(reply)) { 1064 error = ovs_flow_cmd_fill_info(flow, 1065 ovs_header->dp_ifindex, 1066 reply, info->snd_portid, 1067 info->snd_seq, 0, 1068 OVS_FLOW_CMD_NEW, 1069 ufid_flags); 1070 BUG_ON(error < 0); 1071 } 1072 ovs_unlock(); 1073 1074 ovs_nla_free_flow_actions_rcu(old_acts); 1075 ovs_flow_free(new_flow, false); 1076 } 1077 1078 if (reply) 1079 ovs_notify(&dp_flow_genl_family, reply, info); 1080 1081 kfree(key); 1082 return 0; 1083 1084err_unlock_ovs: 1085 ovs_unlock(); 1086 kfree_skb(reply); 1087err_kfree_acts: 1088 ovs_nla_free_flow_actions(acts); 1089err_kfree_key: 1090 kfree(key); 1091err_kfree_flow: 1092 ovs_flow_free(new_flow, false); 1093error: 1094 return error; 1095} 1096 1097/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ 1098static noinline_for_stack 1099struct sw_flow_actions *get_flow_actions(struct net *net, 1100 const struct nlattr *a, 1101 const struct sw_flow_key *key, 1102 const struct sw_flow_mask *mask, 1103 bool log) 1104{ 1105 struct sw_flow_actions *acts; 1106 struct sw_flow_key masked_key; 1107 int error; 1108 1109 ovs_flow_mask_key(&masked_key, key, true, mask); 1110 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log); 1111 if (error) { 1112 OVS_NLERR(log, 1113 "Actions may not be safe on all matching packets"); 1114 return ERR_PTR(error); 1115 } 1116 1117 return acts; 1118} 1119 1120/* Factor out match-init and action-copy to avoid 1121 * "Wframe-larger-than=1024" warning. Because mask is only 1122 * used to get actions, we new a function to save some 1123 * stack space. 1124 * 1125 * If there are not key and action attrs, we return 0 1126 * directly. In the case, the caller will also not use the 1127 * match as before. If there is action attr, we try to get 1128 * actions and save them to *acts. Before returning from 1129 * the function, we reset the match->mask pointer. Because 1130 * we should not to return match object with dangling reference 1131 * to mask. 1132 * */ 1133static noinline_for_stack int 1134ovs_nla_init_match_and_action(struct net *net, 1135 struct sw_flow_match *match, 1136 struct sw_flow_key *key, 1137 struct nlattr **a, 1138 struct sw_flow_actions **acts, 1139 bool log) 1140{ 1141 struct sw_flow_mask mask; 1142 int error = 0; 1143 1144 if (a[OVS_FLOW_ATTR_KEY]) { 1145 ovs_match_init(match, key, true, &mask); 1146 error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY], 1147 a[OVS_FLOW_ATTR_MASK], log); 1148 if (error) 1149 goto error; 1150 } 1151 1152 if (a[OVS_FLOW_ATTR_ACTIONS]) { 1153 if (!a[OVS_FLOW_ATTR_KEY]) { 1154 OVS_NLERR(log, 1155 "Flow key attribute not present in set flow."); 1156 error = -EINVAL; 1157 goto error; 1158 } 1159 1160 *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key, 1161 &mask, log); 1162 if (IS_ERR(*acts)) { 1163 error = PTR_ERR(*acts); 1164 goto error; 1165 } 1166 } 1167 1168 /* On success, error is 0. */ 1169error: 1170 match->mask = NULL; 1171 return error; 1172} 1173 1174static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) 1175{ 1176 struct net *net = sock_net(skb->sk); 1177 struct nlattr **a = info->attrs; 1178 struct ovs_header *ovs_header = info->userhdr; 1179 struct sw_flow_key key; 1180 struct sw_flow *flow; 1181 struct sk_buff *reply = NULL; 1182 struct datapath *dp; 1183 struct sw_flow_actions *old_acts = NULL, *acts = NULL; 1184 struct sw_flow_match match; 1185 struct sw_flow_id sfid; 1186 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1187 int error = 0; 1188 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1189 bool ufid_present; 1190 1191 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); 1192 if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) { 1193 OVS_NLERR(log, 1194 "Flow set message rejected, Key attribute missing."); 1195 return -EINVAL; 1196 } 1197 1198 error = ovs_nla_init_match_and_action(net, &match, &key, a, 1199 &acts, log); 1200 if (error) 1201 goto error; 1202 1203 if (acts) { 1204 /* Can allocate before locking if have acts. */ 1205 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false, 1206 ufid_flags); 1207 if (IS_ERR(reply)) { 1208 error = PTR_ERR(reply); 1209 goto err_kfree_acts; 1210 } 1211 } 1212 1213 ovs_lock(); 1214 dp = get_dp(net, ovs_header->dp_ifindex); 1215 if (unlikely(!dp)) { 1216 error = -ENODEV; 1217 goto err_unlock_ovs; 1218 } 1219 /* Check that the flow exists. */ 1220 if (ufid_present) 1221 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid); 1222 else 1223 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1224 if (unlikely(!flow)) { 1225 error = -ENOENT; 1226 goto err_unlock_ovs; 1227 } 1228 1229 /* Update actions, if present. */ 1230 if (likely(acts)) { 1231 old_acts = ovsl_dereference(flow->sf_acts); 1232 rcu_assign_pointer(flow->sf_acts, acts); 1233 1234 if (unlikely(reply)) { 1235 error = ovs_flow_cmd_fill_info(flow, 1236 ovs_header->dp_ifindex, 1237 reply, info->snd_portid, 1238 info->snd_seq, 0, 1239 OVS_FLOW_CMD_SET, 1240 ufid_flags); 1241 BUG_ON(error < 0); 1242 } 1243 } else { 1244 /* Could not alloc without acts before locking. */ 1245 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, 1246 info, OVS_FLOW_CMD_SET, false, 1247 ufid_flags); 1248 1249 if (IS_ERR(reply)) { 1250 error = PTR_ERR(reply); 1251 goto err_unlock_ovs; 1252 } 1253 } 1254 1255 /* Clear stats. */ 1256 if (a[OVS_FLOW_ATTR_CLEAR]) 1257 ovs_flow_stats_clear(flow); 1258 ovs_unlock(); 1259 1260 if (reply) 1261 ovs_notify(&dp_flow_genl_family, reply, info); 1262 if (old_acts) 1263 ovs_nla_free_flow_actions_rcu(old_acts); 1264 1265 return 0; 1266 1267err_unlock_ovs: 1268 ovs_unlock(); 1269 kfree_skb(reply); 1270err_kfree_acts: 1271 ovs_nla_free_flow_actions(acts); 1272error: 1273 return error; 1274} 1275 1276static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) 1277{ 1278 struct nlattr **a = info->attrs; 1279 struct ovs_header *ovs_header = info->userhdr; 1280 struct net *net = sock_net(skb->sk); 1281 struct sw_flow_key key; 1282 struct sk_buff *reply; 1283 struct sw_flow *flow; 1284 struct datapath *dp; 1285 struct sw_flow_match match; 1286 struct sw_flow_id ufid; 1287 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1288 int err = 0; 1289 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1290 bool ufid_present; 1291 1292 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); 1293 if (a[OVS_FLOW_ATTR_KEY]) { 1294 ovs_match_init(&match, &key, true, NULL); 1295 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, 1296 log); 1297 } else if (!ufid_present) { 1298 OVS_NLERR(log, 1299 "Flow get message rejected, Key attribute missing."); 1300 err = -EINVAL; 1301 } 1302 if (err) 1303 return err; 1304 1305 ovs_lock(); 1306 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1307 if (!dp) { 1308 err = -ENODEV; 1309 goto unlock; 1310 } 1311 1312 if (ufid_present) 1313 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); 1314 else 1315 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1316 if (!flow) { 1317 err = -ENOENT; 1318 goto unlock; 1319 } 1320 1321 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, 1322 OVS_FLOW_CMD_GET, true, ufid_flags); 1323 if (IS_ERR(reply)) { 1324 err = PTR_ERR(reply); 1325 goto unlock; 1326 } 1327 1328 ovs_unlock(); 1329 return genlmsg_reply(reply, info); 1330unlock: 1331 ovs_unlock(); 1332 return err; 1333} 1334 1335static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1336{ 1337 struct nlattr **a = info->attrs; 1338 struct ovs_header *ovs_header = info->userhdr; 1339 struct net *net = sock_net(skb->sk); 1340 struct sw_flow_key key; 1341 struct sk_buff *reply; 1342 struct sw_flow *flow = NULL; 1343 struct datapath *dp; 1344 struct sw_flow_match match; 1345 struct sw_flow_id ufid; 1346 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1347 int err; 1348 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1349 bool ufid_present; 1350 1351 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); 1352 if (a[OVS_FLOW_ATTR_KEY]) { 1353 ovs_match_init(&match, &key, true, NULL); 1354 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], 1355 NULL, log); 1356 if (unlikely(err)) 1357 return err; 1358 } 1359 1360 ovs_lock(); 1361 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1362 if (unlikely(!dp)) { 1363 err = -ENODEV; 1364 goto unlock; 1365 } 1366 1367 if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) { 1368 err = ovs_flow_tbl_flush(&dp->table); 1369 goto unlock; 1370 } 1371 1372 if (ufid_present) 1373 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); 1374 else 1375 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1376 if (unlikely(!flow)) { 1377 err = -ENOENT; 1378 goto unlock; 1379 } 1380 1381 ovs_flow_tbl_remove(&dp->table, flow); 1382 ovs_unlock(); 1383 1384 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, 1385 &flow->id, info, false, ufid_flags); 1386 if (likely(reply)) { 1387 if (!IS_ERR(reply)) { 1388 rcu_read_lock(); /*To keep RCU checker happy. */ 1389 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, 1390 reply, info->snd_portid, 1391 info->snd_seq, 0, 1392 OVS_FLOW_CMD_DEL, 1393 ufid_flags); 1394 rcu_read_unlock(); 1395 if (WARN_ON_ONCE(err < 0)) { 1396 kfree_skb(reply); 1397 goto out_free; 1398 } 1399 1400 ovs_notify(&dp_flow_genl_family, reply, info); 1401 } else { 1402 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, 1403 PTR_ERR(reply)); 1404 } 1405 } 1406 1407out_free: 1408 ovs_flow_free(flow, true); 1409 return 0; 1410unlock: 1411 ovs_unlock(); 1412 return err; 1413} 1414 1415static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1416{ 1417 struct nlattr *a[__OVS_FLOW_ATTR_MAX]; 1418 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1419 struct table_instance *ti; 1420 struct datapath *dp; 1421 u32 ufid_flags; 1422 int err; 1423 1424 err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a, 1425 OVS_FLOW_ATTR_MAX, flow_policy, NULL); 1426 if (err) 1427 return err; 1428 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1429 1430 rcu_read_lock(); 1431 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); 1432 if (!dp) { 1433 rcu_read_unlock(); 1434 return -ENODEV; 1435 } 1436 1437 ti = rcu_dereference(dp->table.ti); 1438 for (;;) { 1439 struct sw_flow *flow; 1440 u32 bucket, obj; 1441 1442 bucket = cb->args[0]; 1443 obj = cb->args[1]; 1444 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); 1445 if (!flow) 1446 break; 1447 1448 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, 1449 NETLINK_CB(cb->skb).portid, 1450 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1451 OVS_FLOW_CMD_GET, ufid_flags) < 0) 1452 break; 1453 1454 cb->args[0] = bucket; 1455 cb->args[1] = obj; 1456 } 1457 rcu_read_unlock(); 1458 return skb->len; 1459} 1460 1461static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { 1462 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, 1463 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED }, 1464 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, 1465 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, 1466 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, 1467 [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 }, 1468 [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 }, 1469}; 1470 1471static const struct genl_small_ops dp_flow_genl_ops[] = { 1472 { .cmd = OVS_FLOW_CMD_NEW, 1473 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1474 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1475 .doit = ovs_flow_cmd_new 1476 }, 1477 { .cmd = OVS_FLOW_CMD_DEL, 1478 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1479 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1480 .doit = ovs_flow_cmd_del 1481 }, 1482 { .cmd = OVS_FLOW_CMD_GET, 1483 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1484 .flags = 0, /* OK for unprivileged users. */ 1485 .doit = ovs_flow_cmd_get, 1486 .dumpit = ovs_flow_cmd_dump 1487 }, 1488 { .cmd = OVS_FLOW_CMD_SET, 1489 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1490 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1491 .doit = ovs_flow_cmd_set, 1492 }, 1493}; 1494 1495static struct genl_family dp_flow_genl_family __ro_after_init = { 1496 .hdrsize = sizeof(struct ovs_header), 1497 .name = OVS_FLOW_FAMILY, 1498 .version = OVS_FLOW_VERSION, 1499 .maxattr = OVS_FLOW_ATTR_MAX, 1500 .policy = flow_policy, 1501 .netnsok = true, 1502 .parallel_ops = true, 1503 .small_ops = dp_flow_genl_ops, 1504 .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops), 1505 .mcgrps = &ovs_dp_flow_multicast_group, 1506 .n_mcgrps = 1, 1507 .module = THIS_MODULE, 1508}; 1509 1510static size_t ovs_dp_cmd_msg_size(void) 1511{ 1512 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); 1513 1514 msgsize += nla_total_size(IFNAMSIZ); 1515 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats)); 1516 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats)); 1517 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ 1518 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */ 1519 1520 return msgsize; 1521} 1522 1523/* Called with ovs_mutex. */ 1524static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1525 u32 portid, u32 seq, u32 flags, u8 cmd) 1526{ 1527 struct ovs_header *ovs_header; 1528 struct ovs_dp_stats dp_stats; 1529 struct ovs_dp_megaflow_stats dp_megaflow_stats; 1530 int err; 1531 1532 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, 1533 flags, cmd); 1534 if (!ovs_header) 1535 goto error; 1536 1537 ovs_header->dp_ifindex = get_dpifindex(dp); 1538 1539 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); 1540 if (err) 1541 goto nla_put_failure; 1542 1543 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); 1544 if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), 1545 &dp_stats, OVS_DP_ATTR_PAD)) 1546 goto nla_put_failure; 1547 1548 if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS, 1549 sizeof(struct ovs_dp_megaflow_stats), 1550 &dp_megaflow_stats, OVS_DP_ATTR_PAD)) 1551 goto nla_put_failure; 1552 1553 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) 1554 goto nla_put_failure; 1555 1556 if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE, 1557 ovs_flow_tbl_masks_cache_size(&dp->table))) 1558 goto nla_put_failure; 1559 1560 genlmsg_end(skb, ovs_header); 1561 return 0; 1562 1563nla_put_failure: 1564 genlmsg_cancel(skb, ovs_header); 1565error: 1566 return -EMSGSIZE; 1567} 1568 1569static struct sk_buff *ovs_dp_cmd_alloc_info(void) 1570{ 1571 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); 1572} 1573 1574/* Called with rcu_read_lock or ovs_mutex. */ 1575static struct datapath *lookup_datapath(struct net *net, 1576 const struct ovs_header *ovs_header, 1577 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1578{ 1579 struct datapath *dp; 1580 1581 if (!a[OVS_DP_ATTR_NAME]) 1582 dp = get_dp(net, ovs_header->dp_ifindex); 1583 else { 1584 struct vport *vport; 1585 1586 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); 1587 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; 1588 } 1589 return dp ? dp : ERR_PTR(-ENODEV); 1590} 1591 1592static void ovs_dp_reset_user_features(struct sk_buff *skb, 1593 struct genl_info *info) 1594{ 1595 struct datapath *dp; 1596 1597 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, 1598 info->attrs); 1599 if (IS_ERR(dp)) 1600 return; 1601 1602 pr_warn("%s: Dropping previously announced user features\n", 1603 ovs_dp_name(dp)); 1604 dp->user_features = 0; 1605} 1606 1607DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support); 1608 1609static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) 1610{ 1611 u32 user_features = 0; 1612 1613 if (a[OVS_DP_ATTR_USER_FEATURES]) { 1614 user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); 1615 1616 if (user_features & ~(OVS_DP_F_VPORT_PIDS | 1617 OVS_DP_F_UNALIGNED | 1618 OVS_DP_F_TC_RECIRC_SHARING)) 1619 return -EOPNOTSUPP; 1620 1621#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 1622 if (user_features & OVS_DP_F_TC_RECIRC_SHARING) 1623 return -EOPNOTSUPP; 1624#endif 1625 } 1626 1627 if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) { 1628 int err; 1629 u32 cache_size; 1630 1631 cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]); 1632 err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size); 1633 if (err) 1634 return err; 1635 } 1636 1637 dp->user_features = user_features; 1638 1639 if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) 1640 static_branch_enable(&tc_recirc_sharing_support); 1641 else 1642 static_branch_disable(&tc_recirc_sharing_support); 1643 1644 return 0; 1645} 1646 1647static int ovs_dp_stats_init(struct datapath *dp) 1648{ 1649 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); 1650 if (!dp->stats_percpu) 1651 return -ENOMEM; 1652 1653 return 0; 1654} 1655 1656static int ovs_dp_vport_init(struct datapath *dp) 1657{ 1658 int i; 1659 1660 dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, 1661 sizeof(struct hlist_head), 1662 GFP_KERNEL); 1663 if (!dp->ports) 1664 return -ENOMEM; 1665 1666 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) 1667 INIT_HLIST_HEAD(&dp->ports[i]); 1668 1669 return 0; 1670} 1671 1672static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) 1673{ 1674 struct nlattr **a = info->attrs; 1675 struct vport_parms parms; 1676 struct sk_buff *reply; 1677 struct datapath *dp; 1678 struct vport *vport; 1679 struct ovs_net *ovs_net; 1680 int err; 1681 1682 err = -EINVAL; 1683 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1684 goto err; 1685 1686 reply = ovs_dp_cmd_alloc_info(); 1687 if (!reply) 1688 return -ENOMEM; 1689 1690 err = -ENOMEM; 1691 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1692 if (dp == NULL) 1693 goto err_destroy_reply; 1694 1695 ovs_dp_set_net(dp, sock_net(skb->sk)); 1696 1697 /* Allocate table. */ 1698 err = ovs_flow_tbl_init(&dp->table); 1699 if (err) 1700 goto err_destroy_dp; 1701 1702 err = ovs_dp_stats_init(dp); 1703 if (err) 1704 goto err_destroy_table; 1705 1706 err = ovs_dp_vport_init(dp); 1707 if (err) 1708 goto err_destroy_stats; 1709 1710 err = ovs_meters_init(dp); 1711 if (err) 1712 goto err_destroy_ports; 1713 1714 /* Set up our datapath device. */ 1715 parms.name = nla_data(a[OVS_DP_ATTR_NAME]); 1716 parms.type = OVS_VPORT_TYPE_INTERNAL; 1717 parms.options = NULL; 1718 parms.dp = dp; 1719 parms.port_no = OVSP_LOCAL; 1720 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; 1721 1722 /* So far only local changes have been made, now need the lock. */ 1723 ovs_lock(); 1724 1725 err = ovs_dp_change(dp, a); 1726 if (err) 1727 goto err_unlock_and_destroy_meters; 1728 1729 vport = new_vport(&parms); 1730 if (IS_ERR(vport)) { 1731 err = PTR_ERR(vport); 1732 if (err == -EBUSY) 1733 err = -EEXIST; 1734 1735 if (err == -EEXIST) { 1736 /* An outdated user space instance that does not understand 1737 * the concept of user_features has attempted to create a new 1738 * datapath and is likely to reuse it. Drop all user features. 1739 */ 1740 if (info->genlhdr->version < OVS_DP_VER_FEATURES) 1741 ovs_dp_reset_user_features(skb, info); 1742 } 1743 1744 goto err_unlock_and_destroy_meters; 1745 } 1746 1747 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1748 info->snd_seq, 0, OVS_DP_CMD_NEW); 1749 BUG_ON(err < 0); 1750 1751 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1752 list_add_tail_rcu(&dp->list_node, &ovs_net->dps); 1753 1754 ovs_unlock(); 1755 1756 ovs_notify(&dp_datapath_genl_family, reply, info); 1757 return 0; 1758 1759err_unlock_and_destroy_meters: 1760 ovs_unlock(); 1761 ovs_meters_exit(dp); 1762err_destroy_ports: 1763 kfree(dp->ports); 1764err_destroy_stats: 1765 free_percpu(dp->stats_percpu); 1766err_destroy_table: 1767 ovs_flow_tbl_destroy(&dp->table); 1768err_destroy_dp: 1769 kfree(dp); 1770err_destroy_reply: 1771 kfree_skb(reply); 1772err: 1773 return err; 1774} 1775 1776/* Called with ovs_mutex. */ 1777static void __dp_destroy(struct datapath *dp) 1778{ 1779 struct flow_table *table = &dp->table; 1780 int i; 1781 1782 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1783 struct vport *vport; 1784 struct hlist_node *n; 1785 1786 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) 1787 if (vport->port_no != OVSP_LOCAL) 1788 ovs_dp_detach_port(vport); 1789 } 1790 1791 list_del_rcu(&dp->list_node); 1792 1793 /* OVSP_LOCAL is datapath internal port. We need to make sure that 1794 * all ports in datapath are destroyed first before freeing datapath. 1795 */ 1796 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); 1797 1798 /* Flush sw_flow in the tables. RCU cb only releases resource 1799 * such as dp, ports and tables. That may avoid some issues 1800 * such as RCU usage warning. 1801 */ 1802 table_instance_flow_flush(table, ovsl_dereference(table->ti), 1803 ovsl_dereference(table->ufid_ti)); 1804 1805 /* RCU destroy the ports, meters and flow tables. */ 1806 call_rcu(&dp->rcu, destroy_dp_rcu); 1807} 1808 1809static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) 1810{ 1811 struct sk_buff *reply; 1812 struct datapath *dp; 1813 int err; 1814 1815 reply = ovs_dp_cmd_alloc_info(); 1816 if (!reply) 1817 return -ENOMEM; 1818 1819 ovs_lock(); 1820 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1821 err = PTR_ERR(dp); 1822 if (IS_ERR(dp)) 1823 goto err_unlock_free; 1824 1825 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1826 info->snd_seq, 0, OVS_DP_CMD_DEL); 1827 BUG_ON(err < 0); 1828 1829 __dp_destroy(dp); 1830 ovs_unlock(); 1831 1832 ovs_notify(&dp_datapath_genl_family, reply, info); 1833 1834 return 0; 1835 1836err_unlock_free: 1837 ovs_unlock(); 1838 kfree_skb(reply); 1839 return err; 1840} 1841 1842static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1843{ 1844 struct sk_buff *reply; 1845 struct datapath *dp; 1846 int err; 1847 1848 reply = ovs_dp_cmd_alloc_info(); 1849 if (!reply) 1850 return -ENOMEM; 1851 1852 ovs_lock(); 1853 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1854 err = PTR_ERR(dp); 1855 if (IS_ERR(dp)) 1856 goto err_unlock_free; 1857 1858 err = ovs_dp_change(dp, info->attrs); 1859 if (err) 1860 goto err_unlock_free; 1861 1862 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1863 info->snd_seq, 0, OVS_DP_CMD_SET); 1864 BUG_ON(err < 0); 1865 1866 ovs_unlock(); 1867 ovs_notify(&dp_datapath_genl_family, reply, info); 1868 1869 return 0; 1870 1871err_unlock_free: 1872 ovs_unlock(); 1873 kfree_skb(reply); 1874 return err; 1875} 1876 1877static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1878{ 1879 struct sk_buff *reply; 1880 struct datapath *dp; 1881 int err; 1882 1883 reply = ovs_dp_cmd_alloc_info(); 1884 if (!reply) 1885 return -ENOMEM; 1886 1887 ovs_lock(); 1888 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1889 if (IS_ERR(dp)) { 1890 err = PTR_ERR(dp); 1891 goto err_unlock_free; 1892 } 1893 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1894 info->snd_seq, 0, OVS_DP_CMD_GET); 1895 BUG_ON(err < 0); 1896 ovs_unlock(); 1897 1898 return genlmsg_reply(reply, info); 1899 1900err_unlock_free: 1901 ovs_unlock(); 1902 kfree_skb(reply); 1903 return err; 1904} 1905 1906static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1907{ 1908 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); 1909 struct datapath *dp; 1910 int skip = cb->args[0]; 1911 int i = 0; 1912 1913 ovs_lock(); 1914 list_for_each_entry(dp, &ovs_net->dps, list_node) { 1915 if (i >= skip && 1916 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1917 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1918 OVS_DP_CMD_GET) < 0) 1919 break; 1920 i++; 1921 } 1922 ovs_unlock(); 1923 1924 cb->args[0] = i; 1925 1926 return skb->len; 1927} 1928 1929static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { 1930 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 1931 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 1932 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, 1933 [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0, 1934 PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)), 1935}; 1936 1937static const struct genl_small_ops dp_datapath_genl_ops[] = { 1938 { .cmd = OVS_DP_CMD_NEW, 1939 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1940 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1941 .doit = ovs_dp_cmd_new 1942 }, 1943 { .cmd = OVS_DP_CMD_DEL, 1944 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1945 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1946 .doit = ovs_dp_cmd_del 1947 }, 1948 { .cmd = OVS_DP_CMD_GET, 1949 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1950 .flags = 0, /* OK for unprivileged users. */ 1951 .doit = ovs_dp_cmd_get, 1952 .dumpit = ovs_dp_cmd_dump 1953 }, 1954 { .cmd = OVS_DP_CMD_SET, 1955 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1956 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1957 .doit = ovs_dp_cmd_set, 1958 }, 1959}; 1960 1961static struct genl_family dp_datapath_genl_family __ro_after_init = { 1962 .hdrsize = sizeof(struct ovs_header), 1963 .name = OVS_DATAPATH_FAMILY, 1964 .version = OVS_DATAPATH_VERSION, 1965 .maxattr = OVS_DP_ATTR_MAX, 1966 .policy = datapath_policy, 1967 .netnsok = true, 1968 .parallel_ops = true, 1969 .small_ops = dp_datapath_genl_ops, 1970 .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops), 1971 .mcgrps = &ovs_dp_datapath_multicast_group, 1972 .n_mcgrps = 1, 1973 .module = THIS_MODULE, 1974}; 1975 1976/* Called with ovs_mutex or RCU read lock. */ 1977static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1978 struct net *net, u32 portid, u32 seq, 1979 u32 flags, u8 cmd, gfp_t gfp) 1980{ 1981 struct ovs_header *ovs_header; 1982 struct ovs_vport_stats vport_stats; 1983 int err; 1984 1985 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, 1986 flags, cmd); 1987 if (!ovs_header) 1988 return -EMSGSIZE; 1989 1990 ovs_header->dp_ifindex = get_dpifindex(vport->dp); 1991 1992 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || 1993 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || 1994 nla_put_string(skb, OVS_VPORT_ATTR_NAME, 1995 ovs_vport_name(vport)) || 1996 nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) 1997 goto nla_put_failure; 1998 1999 if (!net_eq(net, dev_net(vport->dev))) { 2000 int id = peernet2id_alloc(net, dev_net(vport->dev), gfp); 2001 2002 if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) 2003 goto nla_put_failure; 2004 } 2005 2006 ovs_vport_get_stats(vport, &vport_stats); 2007 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, 2008 sizeof(struct ovs_vport_stats), &vport_stats, 2009 OVS_VPORT_ATTR_PAD)) 2010 goto nla_put_failure; 2011 2012 if (ovs_vport_get_upcall_portids(vport, skb)) 2013 goto nla_put_failure; 2014 2015 err = ovs_vport_get_options(vport, skb); 2016 if (err == -EMSGSIZE) 2017 goto error; 2018 2019 genlmsg_end(skb, ovs_header); 2020 return 0; 2021 2022nla_put_failure: 2023 err = -EMSGSIZE; 2024error: 2025 genlmsg_cancel(skb, ovs_header); 2026 return err; 2027} 2028 2029static struct sk_buff *ovs_vport_cmd_alloc_info(void) 2030{ 2031 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2032} 2033 2034/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ 2035struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, 2036 u32 portid, u32 seq, u8 cmd) 2037{ 2038 struct sk_buff *skb; 2039 int retval; 2040 2041 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2042 if (!skb) 2043 return ERR_PTR(-ENOMEM); 2044 2045 retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd, 2046 GFP_KERNEL); 2047 BUG_ON(retval < 0); 2048 2049 return skb; 2050} 2051 2052/* Called with ovs_mutex or RCU read lock. */ 2053static struct vport *lookup_vport(struct net *net, 2054 const struct ovs_header *ovs_header, 2055 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 2056{ 2057 struct datapath *dp; 2058 struct vport *vport; 2059 2060 if (a[OVS_VPORT_ATTR_IFINDEX]) 2061 return ERR_PTR(-EOPNOTSUPP); 2062 if (a[OVS_VPORT_ATTR_NAME]) { 2063 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); 2064 if (!vport) 2065 return ERR_PTR(-ENODEV); 2066 if (ovs_header->dp_ifindex && 2067 ovs_header->dp_ifindex != get_dpifindex(vport->dp)) 2068 return ERR_PTR(-ENODEV); 2069 return vport; 2070 } else if (a[OVS_VPORT_ATTR_PORT_NO]) { 2071 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); 2072 2073 if (port_no >= DP_MAX_PORTS) 2074 return ERR_PTR(-EFBIG); 2075 2076 dp = get_dp(net, ovs_header->dp_ifindex); 2077 if (!dp) 2078 return ERR_PTR(-ENODEV); 2079 2080 vport = ovs_vport_ovsl_rcu(dp, port_no); 2081 if (!vport) 2082 return ERR_PTR(-ENODEV); 2083 return vport; 2084 } else 2085 return ERR_PTR(-EINVAL); 2086 2087} 2088 2089static unsigned int ovs_get_max_headroom(struct datapath *dp) 2090{ 2091 unsigned int dev_headroom, max_headroom = 0; 2092 struct net_device *dev; 2093 struct vport *vport; 2094 int i; 2095 2096 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2097 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, 2098 lockdep_ovsl_is_held()) { 2099 dev = vport->dev; 2100 dev_headroom = netdev_get_fwd_headroom(dev); 2101 if (dev_headroom > max_headroom) 2102 max_headroom = dev_headroom; 2103 } 2104 } 2105 2106 return max_headroom; 2107} 2108 2109/* Called with ovs_mutex */ 2110static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom) 2111{ 2112 struct vport *vport; 2113 int i; 2114 2115 dp->max_headroom = new_headroom; 2116 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2117 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, 2118 lockdep_ovsl_is_held()) 2119 netdev_set_rx_headroom(vport->dev, new_headroom); 2120 } 2121} 2122 2123static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) 2124{ 2125 struct nlattr **a = info->attrs; 2126 struct ovs_header *ovs_header = info->userhdr; 2127 struct vport_parms parms; 2128 struct sk_buff *reply; 2129 struct vport *vport; 2130 struct datapath *dp; 2131 unsigned int new_headroom; 2132 u32 port_no; 2133 int err; 2134 2135 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || 2136 !a[OVS_VPORT_ATTR_UPCALL_PID]) 2137 return -EINVAL; 2138 if (a[OVS_VPORT_ATTR_IFINDEX]) 2139 return -EOPNOTSUPP; 2140 2141 port_no = a[OVS_VPORT_ATTR_PORT_NO] 2142 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; 2143 if (port_no >= DP_MAX_PORTS) 2144 return -EFBIG; 2145 2146 reply = ovs_vport_cmd_alloc_info(); 2147 if (!reply) 2148 return -ENOMEM; 2149 2150 ovs_lock(); 2151restart: 2152 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 2153 err = -ENODEV; 2154 if (!dp) 2155 goto exit_unlock_free; 2156 2157 if (port_no) { 2158 vport = ovs_vport_ovsl(dp, port_no); 2159 err = -EBUSY; 2160 if (vport) 2161 goto exit_unlock_free; 2162 } else { 2163 for (port_no = 1; ; port_no++) { 2164 if (port_no >= DP_MAX_PORTS) { 2165 err = -EFBIG; 2166 goto exit_unlock_free; 2167 } 2168 vport = ovs_vport_ovsl(dp, port_no); 2169 if (!vport) 2170 break; 2171 } 2172 } 2173 2174 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); 2175 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); 2176 parms.options = a[OVS_VPORT_ATTR_OPTIONS]; 2177 parms.dp = dp; 2178 parms.port_no = port_no; 2179 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; 2180 2181 vport = new_vport(&parms); 2182 err = PTR_ERR(vport); 2183 if (IS_ERR(vport)) { 2184 if (err == -EAGAIN) 2185 goto restart; 2186 goto exit_unlock_free; 2187 } 2188 2189 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2190 info->snd_portid, info->snd_seq, 0, 2191 OVS_VPORT_CMD_NEW, GFP_KERNEL); 2192 2193 new_headroom = netdev_get_fwd_headroom(vport->dev); 2194 2195 if (new_headroom > dp->max_headroom) 2196 ovs_update_headroom(dp, new_headroom); 2197 else 2198 netdev_set_rx_headroom(vport->dev, dp->max_headroom); 2199 2200 BUG_ON(err < 0); 2201 ovs_unlock(); 2202 2203 ovs_notify(&dp_vport_genl_family, reply, info); 2204 return 0; 2205 2206exit_unlock_free: 2207 ovs_unlock(); 2208 kfree_skb(reply); 2209 return err; 2210} 2211 2212static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) 2213{ 2214 struct nlattr **a = info->attrs; 2215 struct sk_buff *reply; 2216 struct vport *vport; 2217 int err; 2218 2219 reply = ovs_vport_cmd_alloc_info(); 2220 if (!reply) 2221 return -ENOMEM; 2222 2223 ovs_lock(); 2224 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 2225 err = PTR_ERR(vport); 2226 if (IS_ERR(vport)) 2227 goto exit_unlock_free; 2228 2229 if (a[OVS_VPORT_ATTR_TYPE] && 2230 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { 2231 err = -EINVAL; 2232 goto exit_unlock_free; 2233 } 2234 2235 if (a[OVS_VPORT_ATTR_OPTIONS]) { 2236 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); 2237 if (err) 2238 goto exit_unlock_free; 2239 } 2240 2241 2242 if (a[OVS_VPORT_ATTR_UPCALL_PID]) { 2243 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID]; 2244 2245 err = ovs_vport_set_upcall_portids(vport, ids); 2246 if (err) 2247 goto exit_unlock_free; 2248 } 2249 2250 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2251 info->snd_portid, info->snd_seq, 0, 2252 OVS_VPORT_CMD_SET, GFP_KERNEL); 2253 BUG_ON(err < 0); 2254 2255 ovs_unlock(); 2256 ovs_notify(&dp_vport_genl_family, reply, info); 2257 return 0; 2258 2259exit_unlock_free: 2260 ovs_unlock(); 2261 kfree_skb(reply); 2262 return err; 2263} 2264 2265static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) 2266{ 2267 bool update_headroom = false; 2268 struct nlattr **a = info->attrs; 2269 struct sk_buff *reply; 2270 struct datapath *dp; 2271 struct vport *vport; 2272 unsigned int new_headroom; 2273 int err; 2274 2275 reply = ovs_vport_cmd_alloc_info(); 2276 if (!reply) 2277 return -ENOMEM; 2278 2279 ovs_lock(); 2280 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 2281 err = PTR_ERR(vport); 2282 if (IS_ERR(vport)) 2283 goto exit_unlock_free; 2284 2285 if (vport->port_no == OVSP_LOCAL) { 2286 err = -EINVAL; 2287 goto exit_unlock_free; 2288 } 2289 2290 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2291 info->snd_portid, info->snd_seq, 0, 2292 OVS_VPORT_CMD_DEL, GFP_KERNEL); 2293 BUG_ON(err < 0); 2294 2295 /* the vport deletion may trigger dp headroom update */ 2296 dp = vport->dp; 2297 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom) 2298 update_headroom = true; 2299 2300 netdev_reset_rx_headroom(vport->dev); 2301 ovs_dp_detach_port(vport); 2302 2303 if (update_headroom) { 2304 new_headroom = ovs_get_max_headroom(dp); 2305 2306 if (new_headroom < dp->max_headroom) 2307 ovs_update_headroom(dp, new_headroom); 2308 } 2309 ovs_unlock(); 2310 2311 ovs_notify(&dp_vport_genl_family, reply, info); 2312 return 0; 2313 2314exit_unlock_free: 2315 ovs_unlock(); 2316 kfree_skb(reply); 2317 return err; 2318} 2319 2320static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) 2321{ 2322 struct nlattr **a = info->attrs; 2323 struct ovs_header *ovs_header = info->userhdr; 2324 struct sk_buff *reply; 2325 struct vport *vport; 2326 int err; 2327 2328 reply = ovs_vport_cmd_alloc_info(); 2329 if (!reply) 2330 return -ENOMEM; 2331 2332 rcu_read_lock(); 2333 vport = lookup_vport(sock_net(skb->sk), ovs_header, a); 2334 err = PTR_ERR(vport); 2335 if (IS_ERR(vport)) 2336 goto exit_unlock_free; 2337 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2338 info->snd_portid, info->snd_seq, 0, 2339 OVS_VPORT_CMD_GET, GFP_ATOMIC); 2340 BUG_ON(err < 0); 2341 rcu_read_unlock(); 2342 2343 return genlmsg_reply(reply, info); 2344 2345exit_unlock_free: 2346 rcu_read_unlock(); 2347 kfree_skb(reply); 2348 return err; 2349} 2350 2351static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 2352{ 2353 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 2354 struct datapath *dp; 2355 int bucket = cb->args[0], skip = cb->args[1]; 2356 int i, j = 0; 2357 2358 rcu_read_lock(); 2359 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); 2360 if (!dp) { 2361 rcu_read_unlock(); 2362 return -ENODEV; 2363 } 2364 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { 2365 struct vport *vport; 2366 2367 j = 0; 2368 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { 2369 if (j >= skip && 2370 ovs_vport_cmd_fill_info(vport, skb, 2371 sock_net(skb->sk), 2372 NETLINK_CB(cb->skb).portid, 2373 cb->nlh->nlmsg_seq, 2374 NLM_F_MULTI, 2375 OVS_VPORT_CMD_GET, 2376 GFP_ATOMIC) < 0) 2377 goto out; 2378 2379 j++; 2380 } 2381 skip = 0; 2382 } 2383out: 2384 rcu_read_unlock(); 2385 2386 cb->args[0] = i; 2387 cb->args[1] = j; 2388 2389 return skb->len; 2390} 2391 2392static void ovs_dp_masks_rebalance(struct work_struct *work) 2393{ 2394 struct ovs_net *ovs_net = container_of(work, struct ovs_net, 2395 masks_rebalance.work); 2396 struct datapath *dp; 2397 2398 ovs_lock(); 2399 2400 list_for_each_entry(dp, &ovs_net->dps, list_node) 2401 ovs_flow_masks_rebalance(&dp->table); 2402 2403 ovs_unlock(); 2404 2405 schedule_delayed_work(&ovs_net->masks_rebalance, 2406 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); 2407} 2408 2409static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { 2410 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 2411 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, 2412 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, 2413 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, 2414 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC }, 2415 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, 2416 [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 }, 2417 [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 }, 2418}; 2419 2420static const struct genl_small_ops dp_vport_genl_ops[] = { 2421 { .cmd = OVS_VPORT_CMD_NEW, 2422 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2423 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2424 .doit = ovs_vport_cmd_new 2425 }, 2426 { .cmd = OVS_VPORT_CMD_DEL, 2427 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2428 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2429 .doit = ovs_vport_cmd_del 2430 }, 2431 { .cmd = OVS_VPORT_CMD_GET, 2432 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2433 .flags = 0, /* OK for unprivileged users. */ 2434 .doit = ovs_vport_cmd_get, 2435 .dumpit = ovs_vport_cmd_dump 2436 }, 2437 { .cmd = OVS_VPORT_CMD_SET, 2438 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2439 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2440 .doit = ovs_vport_cmd_set, 2441 }, 2442}; 2443 2444struct genl_family dp_vport_genl_family __ro_after_init = { 2445 .hdrsize = sizeof(struct ovs_header), 2446 .name = OVS_VPORT_FAMILY, 2447 .version = OVS_VPORT_VERSION, 2448 .maxattr = OVS_VPORT_ATTR_MAX, 2449 .policy = vport_policy, 2450 .netnsok = true, 2451 .parallel_ops = true, 2452 .small_ops = dp_vport_genl_ops, 2453 .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops), 2454 .mcgrps = &ovs_dp_vport_multicast_group, 2455 .n_mcgrps = 1, 2456 .module = THIS_MODULE, 2457}; 2458 2459static struct genl_family * const dp_genl_families[] = { 2460 &dp_datapath_genl_family, 2461 &dp_vport_genl_family, 2462 &dp_flow_genl_family, 2463 &dp_packet_genl_family, 2464 &dp_meter_genl_family, 2465#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 2466 &dp_ct_limit_genl_family, 2467#endif 2468}; 2469 2470static void dp_unregister_genl(int n_families) 2471{ 2472 int i; 2473 2474 for (i = 0; i < n_families; i++) 2475 genl_unregister_family(dp_genl_families[i]); 2476} 2477 2478static int __init dp_register_genl(void) 2479{ 2480 int err; 2481 int i; 2482 2483 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { 2484 2485 err = genl_register_family(dp_genl_families[i]); 2486 if (err) 2487 goto error; 2488 } 2489 2490 return 0; 2491 2492error: 2493 dp_unregister_genl(i); 2494 return err; 2495} 2496 2497static int __net_init ovs_init_net(struct net *net) 2498{ 2499 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2500 int err; 2501 2502 INIT_LIST_HEAD(&ovs_net->dps); 2503 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); 2504 INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance); 2505 2506 err = ovs_ct_init(net); 2507 if (err) 2508 return err; 2509 2510 schedule_delayed_work(&ovs_net->masks_rebalance, 2511 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); 2512 return 0; 2513} 2514 2515static void __net_exit list_vports_from_net(struct net *net, struct net *dnet, 2516 struct list_head *head) 2517{ 2518 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2519 struct datapath *dp; 2520 2521 list_for_each_entry(dp, &ovs_net->dps, list_node) { 2522 int i; 2523 2524 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2525 struct vport *vport; 2526 2527 hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) { 2528 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL) 2529 continue; 2530 2531 if (dev_net(vport->dev) == dnet) 2532 list_add(&vport->detach_list, head); 2533 } 2534 } 2535 } 2536} 2537 2538static void __net_exit ovs_exit_net(struct net *dnet) 2539{ 2540 struct datapath *dp, *dp_next; 2541 struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id); 2542 struct vport *vport, *vport_next; 2543 struct net *net; 2544 LIST_HEAD(head); 2545 2546 ovs_lock(); 2547 2548 ovs_ct_exit(dnet); 2549 2550 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2551 __dp_destroy(dp); 2552 2553 down_read(&net_rwsem); 2554 for_each_net(net) 2555 list_vports_from_net(net, dnet, &head); 2556 up_read(&net_rwsem); 2557 2558 /* Detach all vports from given namespace. */ 2559 list_for_each_entry_safe(vport, vport_next, &head, detach_list) { 2560 list_del(&vport->detach_list); 2561 ovs_dp_detach_port(vport); 2562 } 2563 2564 ovs_unlock(); 2565 2566 cancel_delayed_work_sync(&ovs_net->masks_rebalance); 2567 cancel_work_sync(&ovs_net->dp_notify_work); 2568} 2569 2570static struct pernet_operations ovs_net_ops = { 2571 .init = ovs_init_net, 2572 .exit = ovs_exit_net, 2573 .id = &ovs_net_id, 2574 .size = sizeof(struct ovs_net), 2575}; 2576 2577static int __init dp_init(void) 2578{ 2579 int err; 2580 2581 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > 2582 sizeof_field(struct sk_buff, cb)); 2583 2584 pr_info("Open vSwitch switching datapath\n"); 2585 2586 err = action_fifos_init(); 2587 if (err) 2588 goto error; 2589 2590 err = ovs_internal_dev_rtnl_link_register(); 2591 if (err) 2592 goto error_action_fifos_exit; 2593 2594 err = ovs_flow_init(); 2595 if (err) 2596 goto error_unreg_rtnl_link; 2597 2598 err = ovs_vport_init(); 2599 if (err) 2600 goto error_flow_exit; 2601 2602 err = register_pernet_device(&ovs_net_ops); 2603 if (err) 2604 goto error_vport_exit; 2605 2606 err = register_netdevice_notifier(&ovs_dp_device_notifier); 2607 if (err) 2608 goto error_netns_exit; 2609 2610 err = ovs_netdev_init(); 2611 if (err) 2612 goto error_unreg_notifier; 2613 2614 err = dp_register_genl(); 2615 if (err < 0) 2616 goto error_unreg_netdev; 2617 2618 return 0; 2619 2620error_unreg_netdev: 2621 ovs_netdev_exit(); 2622error_unreg_notifier: 2623 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2624error_netns_exit: 2625 unregister_pernet_device(&ovs_net_ops); 2626error_vport_exit: 2627 ovs_vport_exit(); 2628error_flow_exit: 2629 ovs_flow_exit(); 2630error_unreg_rtnl_link: 2631 ovs_internal_dev_rtnl_link_unregister(); 2632error_action_fifos_exit: 2633 action_fifos_exit(); 2634error: 2635 return err; 2636} 2637 2638static void dp_cleanup(void) 2639{ 2640 dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); 2641 ovs_netdev_exit(); 2642 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2643 unregister_pernet_device(&ovs_net_ops); 2644 rcu_barrier(); 2645 ovs_vport_exit(); 2646 ovs_flow_exit(); 2647 ovs_internal_dev_rtnl_link_unregister(); 2648 action_fifos_exit(); 2649} 2650 2651module_init(dp_init); 2652module_exit(dp_cleanup); 2653 2654MODULE_DESCRIPTION("Open vSwitch switching datapath"); 2655MODULE_LICENSE("GPL"); 2656MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY); 2657MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); 2658MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); 2659MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); 2660MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY); 2661MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY); 2662