1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * lwtunnel Infrastructure for light weight tunnels like mpls 4 * 5 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> 6 */ 7 8#include <linux/capability.h> 9#include <linux/module.h> 10#include <linux/types.h> 11#include <linux/kernel.h> 12#include <linux/slab.h> 13#include <linux/uaccess.h> 14#include <linux/skbuff.h> 15#include <linux/netdevice.h> 16#include <linux/lwtunnel.h> 17#include <linux/in.h> 18#include <linux/init.h> 19#include <linux/err.h> 20 21#include <net/lwtunnel.h> 22#include <net/rtnetlink.h> 23#include <net/ip6_fib.h> 24#include <net/rtnh.h> 25 26#ifdef CONFIG_MODULES 27 28static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) 29{ 30 /* Only lwt encaps implemented without using an interface for 31 * the encap need to return a string here. 32 */ 33 switch (encap_type) { 34 case LWTUNNEL_ENCAP_MPLS: 35 return "MPLS"; 36 case LWTUNNEL_ENCAP_ILA: 37 return "ILA"; 38 case LWTUNNEL_ENCAP_SEG6: 39 return "SEG6"; 40 case LWTUNNEL_ENCAP_BPF: 41 return "BPF"; 42 case LWTUNNEL_ENCAP_SEG6_LOCAL: 43 return "SEG6LOCAL"; 44 case LWTUNNEL_ENCAP_RPL: 45 return "RPL"; 46 case LWTUNNEL_ENCAP_IP6: 47 case LWTUNNEL_ENCAP_IP: 48 case LWTUNNEL_ENCAP_NONE: 49 case __LWTUNNEL_ENCAP_MAX: 50 /* should not have got here */ 51 WARN_ON(1); 52 break; 53 } 54 return NULL; 55} 56 57#endif /* CONFIG_MODULES */ 58 59struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 60{ 61 struct lwtunnel_state *lws; 62 63 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); 64 65 return lws; 66} 67EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); 68 69static const struct lwtunnel_encap_ops __rcu * 70 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; 71 72int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, 73 unsigned int num) 74{ 75 if (num > LWTUNNEL_ENCAP_MAX) 76 return -ERANGE; 77 78 return !cmpxchg((const struct lwtunnel_encap_ops **) 79 &lwtun_encaps[num], 80 NULL, ops) ? 0 : -1; 81} 82EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); 83 84int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, 85 unsigned int encap_type) 86{ 87 int ret; 88 89 if (encap_type == LWTUNNEL_ENCAP_NONE || 90 encap_type > LWTUNNEL_ENCAP_MAX) 91 return -ERANGE; 92 93 ret = (cmpxchg((const struct lwtunnel_encap_ops **) 94 &lwtun_encaps[encap_type], 95 ops, NULL) == ops) ? 0 : -1; 96 97 synchronize_net(); 98 99 return ret; 100} 101EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); 102 103int lwtunnel_build_state(struct net *net, u16 encap_type, 104 struct nlattr *encap, unsigned int family, 105 const void *cfg, struct lwtunnel_state **lws, 106 struct netlink_ext_ack *extack) 107{ 108 const struct lwtunnel_encap_ops *ops; 109 bool found = false; 110 int ret = -EINVAL; 111 112 if (encap_type == LWTUNNEL_ENCAP_NONE || 113 encap_type > LWTUNNEL_ENCAP_MAX) { 114 NL_SET_ERR_MSG_ATTR(extack, encap, 115 "Unknown LWT encapsulation type"); 116 return ret; 117 } 118 119 ret = -EOPNOTSUPP; 120 rcu_read_lock(); 121 ops = rcu_dereference(lwtun_encaps[encap_type]); 122 if (likely(ops && ops->build_state && try_module_get(ops->owner))) 123 found = true; 124 rcu_read_unlock(); 125 126 if (found) { 127 ret = ops->build_state(net, encap, family, cfg, lws, extack); 128 if (ret) 129 module_put(ops->owner); 130 } else { 131 /* don't rely on -EOPNOTSUPP to detect match as build_state 132 * handlers could return it 133 */ 134 NL_SET_ERR_MSG_ATTR(extack, encap, 135 "LWT encapsulation type not supported"); 136 } 137 138 return ret; 139} 140EXPORT_SYMBOL_GPL(lwtunnel_build_state); 141 142int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) 143{ 144 const struct lwtunnel_encap_ops *ops; 145 int ret = -EINVAL; 146 147 if (encap_type == LWTUNNEL_ENCAP_NONE || 148 encap_type > LWTUNNEL_ENCAP_MAX) { 149 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); 150 return ret; 151 } 152 153 rcu_read_lock(); 154 ops = rcu_dereference(lwtun_encaps[encap_type]); 155 rcu_read_unlock(); 156#ifdef CONFIG_MODULES 157 if (!ops) { 158 const char *encap_type_str = lwtunnel_encap_str(encap_type); 159 160 if (encap_type_str) { 161 __rtnl_unlock(); 162 request_module("rtnl-lwt-%s", encap_type_str); 163 rtnl_lock(); 164 165 rcu_read_lock(); 166 ops = rcu_dereference(lwtun_encaps[encap_type]); 167 rcu_read_unlock(); 168 } 169 } 170#endif 171 ret = ops ? 0 : -EOPNOTSUPP; 172 if (ret < 0) 173 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); 174 175 return ret; 176} 177EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); 178 179int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, 180 struct netlink_ext_ack *extack) 181{ 182 struct rtnexthop *rtnh = (struct rtnexthop *)attr; 183 struct nlattr *nla_entype; 184 struct nlattr *attrs; 185 u16 encap_type; 186 int attrlen; 187 188 while (rtnh_ok(rtnh, remaining)) { 189 attrlen = rtnh_attrlen(rtnh); 190 if (attrlen > 0) { 191 attrs = rtnh_attrs(rtnh); 192 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 193 194 if (nla_entype) { 195 if (nla_len(nla_entype) < sizeof(u16)) { 196 NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE"); 197 return -EINVAL; 198 } 199 encap_type = nla_get_u16(nla_entype); 200 201 if (lwtunnel_valid_encap_type(encap_type, 202 extack) != 0) 203 return -EOPNOTSUPP; 204 } 205 } 206 rtnh = rtnh_next(rtnh, &remaining); 207 } 208 209 return 0; 210} 211EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); 212 213void lwtstate_free(struct lwtunnel_state *lws) 214{ 215 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; 216 217 if (ops->destroy_state) { 218 ops->destroy_state(lws); 219 kfree_rcu(lws, rcu); 220 } else { 221 kfree(lws); 222 } 223 module_put(ops->owner); 224} 225EXPORT_SYMBOL_GPL(lwtstate_free); 226 227int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, 228 int encap_attr, int encap_type_attr) 229{ 230 const struct lwtunnel_encap_ops *ops; 231 struct nlattr *nest; 232 int ret; 233 234 if (!lwtstate) 235 return 0; 236 237 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 238 lwtstate->type > LWTUNNEL_ENCAP_MAX) 239 return 0; 240 241 nest = nla_nest_start_noflag(skb, encap_attr); 242 if (!nest) 243 return -EMSGSIZE; 244 245 ret = -EOPNOTSUPP; 246 rcu_read_lock(); 247 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 248 if (likely(ops && ops->fill_encap)) 249 ret = ops->fill_encap(skb, lwtstate); 250 rcu_read_unlock(); 251 252 if (ret) 253 goto nla_put_failure; 254 nla_nest_end(skb, nest); 255 ret = nla_put_u16(skb, encap_type_attr, lwtstate->type); 256 if (ret) 257 goto nla_put_failure; 258 259 return 0; 260 261nla_put_failure: 262 nla_nest_cancel(skb, nest); 263 264 return (ret == -EOPNOTSUPP ? 0 : ret); 265} 266EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); 267 268int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) 269{ 270 const struct lwtunnel_encap_ops *ops; 271 int ret = 0; 272 273 if (!lwtstate) 274 return 0; 275 276 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 277 lwtstate->type > LWTUNNEL_ENCAP_MAX) 278 return 0; 279 280 rcu_read_lock(); 281 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 282 if (likely(ops && ops->get_encap_size)) 283 ret = nla_total_size(ops->get_encap_size(lwtstate)); 284 rcu_read_unlock(); 285 286 return ret; 287} 288EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); 289 290int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) 291{ 292 const struct lwtunnel_encap_ops *ops; 293 int ret = 0; 294 295 if (!a && !b) 296 return 0; 297 298 if (!a || !b) 299 return 1; 300 301 if (a->type != b->type) 302 return 1; 303 304 if (a->type == LWTUNNEL_ENCAP_NONE || 305 a->type > LWTUNNEL_ENCAP_MAX) 306 return 0; 307 308 rcu_read_lock(); 309 ops = rcu_dereference(lwtun_encaps[a->type]); 310 if (likely(ops && ops->cmp_encap)) 311 ret = ops->cmp_encap(a, b); 312 rcu_read_unlock(); 313 314 return ret; 315} 316EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); 317 318int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) 319{ 320 struct dst_entry *dst = skb_dst(skb); 321 const struct lwtunnel_encap_ops *ops; 322 struct lwtunnel_state *lwtstate; 323 int ret = -EINVAL; 324 325 if (!dst) 326 goto drop; 327 lwtstate = dst->lwtstate; 328 329 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 330 lwtstate->type > LWTUNNEL_ENCAP_MAX) 331 return 0; 332 333 ret = -EOPNOTSUPP; 334 rcu_read_lock(); 335 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 336 if (likely(ops && ops->output)) 337 ret = ops->output(net, sk, skb); 338 rcu_read_unlock(); 339 340 if (ret == -EOPNOTSUPP) 341 goto drop; 342 343 return ret; 344 345drop: 346 kfree_skb(skb); 347 348 return ret; 349} 350EXPORT_SYMBOL_GPL(lwtunnel_output); 351 352int lwtunnel_xmit(struct sk_buff *skb) 353{ 354 struct dst_entry *dst = skb_dst(skb); 355 const struct lwtunnel_encap_ops *ops; 356 struct lwtunnel_state *lwtstate; 357 int ret = -EINVAL; 358 359 if (!dst) 360 goto drop; 361 362 lwtstate = dst->lwtstate; 363 364 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 365 lwtstate->type > LWTUNNEL_ENCAP_MAX) 366 return 0; 367 368 ret = -EOPNOTSUPP; 369 rcu_read_lock(); 370 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 371 if (likely(ops && ops->xmit)) 372 ret = ops->xmit(skb); 373 rcu_read_unlock(); 374 375 if (ret == -EOPNOTSUPP) 376 goto drop; 377 378 return ret; 379 380drop: 381 kfree_skb(skb); 382 383 return ret; 384} 385EXPORT_SYMBOL_GPL(lwtunnel_xmit); 386 387int lwtunnel_input(struct sk_buff *skb) 388{ 389 struct dst_entry *dst = skb_dst(skb); 390 const struct lwtunnel_encap_ops *ops; 391 struct lwtunnel_state *lwtstate; 392 int ret = -EINVAL; 393 394 if (!dst) 395 goto drop; 396 lwtstate = dst->lwtstate; 397 398 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 399 lwtstate->type > LWTUNNEL_ENCAP_MAX) 400 return 0; 401 402 ret = -EOPNOTSUPP; 403 rcu_read_lock(); 404 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 405 if (likely(ops && ops->input)) 406 ret = ops->input(skb); 407 rcu_read_unlock(); 408 409 if (ret == -EOPNOTSUPP) 410 goto drop; 411 412 return ret; 413 414drop: 415 kfree_skb(skb); 416 417 return ret; 418} 419EXPORT_SYMBOL_GPL(lwtunnel_input); 420