1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2007-2017 Nicira, Inc.
4 */
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include "flow.h"
9#include "datapath.h"
10#include <linux/uaccess.h>
11#include <linux/netdevice.h>
12#include <linux/etherdevice.h>
13#include <linux/if_ether.h>
14#include <linux/if_vlan.h>
15#include <net/llc_pdu.h>
16#include <linux/kernel.h>
17#include <linux/jhash.h>
18#include <linux/jiffies.h>
19#include <linux/llc.h>
20#include <linux/module.h>
21#include <linux/in.h>
22#include <linux/rcupdate.h>
23#include <linux/if_arp.h>
24#include <linux/ip.h>
25#include <linux/ipv6.h>
26#include <linux/sctp.h>
27#include <linux/tcp.h>
28#include <linux/udp.h>
29#include <linux/icmp.h>
30#include <linux/icmpv6.h>
31#include <linux/rculist.h>
32#include <net/geneve.h>
33#include <net/ip.h>
34#include <net/ipv6.h>
35#include <net/ndisc.h>
36#include <net/mpls.h>
37#include <net/vxlan.h>
38#include <net/tun_proto.h>
39#include <net/erspan.h>
40
41#include "flow_netlink.h"
42
43struct ovs_len_tbl {
44	int len;
45	const struct ovs_len_tbl *next;
46};
47
48#define OVS_ATTR_NESTED -1
49#define OVS_ATTR_VARIABLE -2
50#define OVS_COPY_ACTIONS_MAX_DEPTH 16
51
52static bool actions_may_change_flow(const struct nlattr *actions)
53{
54	struct nlattr *nla;
55	int rem;
56
57	nla_for_each_nested(nla, actions, rem) {
58		u16 action = nla_type(nla);
59
60		switch (action) {
61		case OVS_ACTION_ATTR_OUTPUT:
62		case OVS_ACTION_ATTR_RECIRC:
63		case OVS_ACTION_ATTR_TRUNC:
64		case OVS_ACTION_ATTR_USERSPACE:
65			break;
66
67		case OVS_ACTION_ATTR_CT:
68		case OVS_ACTION_ATTR_CT_CLEAR:
69		case OVS_ACTION_ATTR_HASH:
70		case OVS_ACTION_ATTR_POP_ETH:
71		case OVS_ACTION_ATTR_POP_MPLS:
72		case OVS_ACTION_ATTR_POP_NSH:
73		case OVS_ACTION_ATTR_POP_VLAN:
74		case OVS_ACTION_ATTR_PUSH_ETH:
75		case OVS_ACTION_ATTR_PUSH_MPLS:
76		case OVS_ACTION_ATTR_PUSH_NSH:
77		case OVS_ACTION_ATTR_PUSH_VLAN:
78		case OVS_ACTION_ATTR_SAMPLE:
79		case OVS_ACTION_ATTR_SET:
80		case OVS_ACTION_ATTR_SET_MASKED:
81		case OVS_ACTION_ATTR_METER:
82		case OVS_ACTION_ATTR_CHECK_PKT_LEN:
83		case OVS_ACTION_ATTR_ADD_MPLS:
84		case OVS_ACTION_ATTR_DEC_TTL:
85		default:
86			return true;
87		}
88	}
89	return false;
90}
91
92static void update_range(struct sw_flow_match *match,
93			 size_t offset, size_t size, bool is_mask)
94{
95	struct sw_flow_key_range *range;
96	size_t start = rounddown(offset, sizeof(long));
97	size_t end = roundup(offset + size, sizeof(long));
98
99	if (!is_mask)
100		range = &match->range;
101	else
102		range = &match->mask->range;
103
104	if (range->start == range->end) {
105		range->start = start;
106		range->end = end;
107		return;
108	}
109
110	if (range->start > start)
111		range->start = start;
112
113	if (range->end < end)
114		range->end = end;
115}
116
117#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
118	do { \
119		update_range(match, offsetof(struct sw_flow_key, field),    \
120			     sizeof((match)->key->field), is_mask);	    \
121		if (is_mask)						    \
122			(match)->mask->key.field = value;		    \
123		else							    \
124			(match)->key->field = value;		            \
125	} while (0)
126
127#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)	    \
128	do {								    \
129		update_range(match, offset, len, is_mask);		    \
130		if (is_mask)						    \
131			memcpy((u8 *)&(match)->mask->key + offset, value_p, \
132			       len);					   \
133		else							    \
134			memcpy((u8 *)(match)->key + offset, value_p, len);  \
135	} while (0)
136
137#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)		      \
138	SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
139				  value_p, len, is_mask)
140
141#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)		    \
142	do {								    \
143		update_range(match, offsetof(struct sw_flow_key, field),    \
144			     sizeof((match)->key->field), is_mask);	    \
145		if (is_mask)						    \
146			memset((u8 *)&(match)->mask->key.field, value,      \
147			       sizeof((match)->mask->key.field));	    \
148		else							    \
149			memset((u8 *)&(match)->key->field, value,           \
150			       sizeof((match)->key->field));                \
151	} while (0)
152
153static bool match_validate(const struct sw_flow_match *match,
154			   u64 key_attrs, u64 mask_attrs, bool log)
155{
156	u64 key_expected = 0;
157	u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
158
159	/* The following mask attributes allowed only if they
160	 * pass the validation tests. */
161	mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
162			| (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
163			| (1 << OVS_KEY_ATTR_IPV6)
164			| (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
165			| (1 << OVS_KEY_ATTR_TCP)
166			| (1 << OVS_KEY_ATTR_TCP_FLAGS)
167			| (1 << OVS_KEY_ATTR_UDP)
168			| (1 << OVS_KEY_ATTR_SCTP)
169			| (1 << OVS_KEY_ATTR_ICMP)
170			| (1 << OVS_KEY_ATTR_ICMPV6)
171			| (1 << OVS_KEY_ATTR_ARP)
172			| (1 << OVS_KEY_ATTR_ND)
173			| (1 << OVS_KEY_ATTR_MPLS)
174			| (1 << OVS_KEY_ATTR_NSH));
175
176	/* Always allowed mask fields. */
177	mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
178		       | (1 << OVS_KEY_ATTR_IN_PORT)
179		       | (1 << OVS_KEY_ATTR_ETHERTYPE));
180
181	/* Check key attributes. */
182	if (match->key->eth.type == htons(ETH_P_ARP)
183			|| match->key->eth.type == htons(ETH_P_RARP)) {
184		key_expected |= 1 << OVS_KEY_ATTR_ARP;
185		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
186			mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
187	}
188
189	if (eth_p_mpls(match->key->eth.type)) {
190		key_expected |= 1 << OVS_KEY_ATTR_MPLS;
191		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
192			mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
193	}
194
195	if (match->key->eth.type == htons(ETH_P_IP)) {
196		key_expected |= 1 << OVS_KEY_ATTR_IPV4;
197		if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
198			mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
199			mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
200		}
201
202		if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
203			if (match->key->ip.proto == IPPROTO_UDP) {
204				key_expected |= 1 << OVS_KEY_ATTR_UDP;
205				if (match->mask && (match->mask->key.ip.proto == 0xff))
206					mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
207			}
208
209			if (match->key->ip.proto == IPPROTO_SCTP) {
210				key_expected |= 1 << OVS_KEY_ATTR_SCTP;
211				if (match->mask && (match->mask->key.ip.proto == 0xff))
212					mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
213			}
214
215			if (match->key->ip.proto == IPPROTO_TCP) {
216				key_expected |= 1 << OVS_KEY_ATTR_TCP;
217				key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
218				if (match->mask && (match->mask->key.ip.proto == 0xff)) {
219					mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
220					mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
221				}
222			}
223
224			if (match->key->ip.proto == IPPROTO_ICMP) {
225				key_expected |= 1 << OVS_KEY_ATTR_ICMP;
226				if (match->mask && (match->mask->key.ip.proto == 0xff))
227					mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
228			}
229		}
230	}
231
232	if (match->key->eth.type == htons(ETH_P_IPV6)) {
233		key_expected |= 1 << OVS_KEY_ATTR_IPV6;
234		if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
235			mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
236			mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
237		}
238
239		if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
240			if (match->key->ip.proto == IPPROTO_UDP) {
241				key_expected |= 1 << OVS_KEY_ATTR_UDP;
242				if (match->mask && (match->mask->key.ip.proto == 0xff))
243					mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
244			}
245
246			if (match->key->ip.proto == IPPROTO_SCTP) {
247				key_expected |= 1 << OVS_KEY_ATTR_SCTP;
248				if (match->mask && (match->mask->key.ip.proto == 0xff))
249					mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
250			}
251
252			if (match->key->ip.proto == IPPROTO_TCP) {
253				key_expected |= 1 << OVS_KEY_ATTR_TCP;
254				key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
255				if (match->mask && (match->mask->key.ip.proto == 0xff)) {
256					mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
257					mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
258				}
259			}
260
261			if (match->key->ip.proto == IPPROTO_ICMPV6) {
262				key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
263				if (match->mask && (match->mask->key.ip.proto == 0xff))
264					mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
265
266				if (match->key->tp.src ==
267						htons(NDISC_NEIGHBOUR_SOLICITATION) ||
268				    match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
269					key_expected |= 1 << OVS_KEY_ATTR_ND;
270					/* Original direction conntrack tuple
271					 * uses the same space as the ND fields
272					 * in the key, so both are not allowed
273					 * at the same time.
274					 */
275					mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
276					if (match->mask && (match->mask->key.tp.src == htons(0xff)))
277						mask_allowed |= 1 << OVS_KEY_ATTR_ND;
278				}
279			}
280		}
281	}
282
283	if (match->key->eth.type == htons(ETH_P_NSH)) {
284		key_expected |= 1 << OVS_KEY_ATTR_NSH;
285		if (match->mask &&
286		    match->mask->key.eth.type == htons(0xffff)) {
287			mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
288		}
289	}
290
291	if ((key_attrs & key_expected) != key_expected) {
292		/* Key attributes check failed. */
293		OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
294			  (unsigned long long)key_attrs,
295			  (unsigned long long)key_expected);
296		return false;
297	}
298
299	if ((mask_attrs & mask_allowed) != mask_attrs) {
300		/* Mask attributes check failed. */
301		OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
302			  (unsigned long long)mask_attrs,
303			  (unsigned long long)mask_allowed);
304		return false;
305	}
306
307	return true;
308}
309
310size_t ovs_tun_key_attr_size(void)
311{
312	/* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
313	 * updating this function.
314	 */
315	return    nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */
316		+ nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
317		+ nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
318		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
319		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
320		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
321		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
322		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
323		+ nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
324		/* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
325		 * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
326		 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
327		 */
328		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
329		+ nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
330}
331
332static size_t ovs_nsh_key_attr_size(void)
333{
334	/* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
335	 * updating this function.
336	 */
337	return  nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
338		/* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
339		 * mutually exclusive, so the bigger one can cover
340		 * the small one.
341		 */
342		+ nla_total_size(NSH_CTX_HDRS_MAX_LEN);
343}
344
345size_t ovs_key_attr_size(void)
346{
347	/* Whenever adding new OVS_KEY_ FIELDS, we should consider
348	 * updating this function.
349	 */
350	BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
351
352	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
353		+ nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
354		  + ovs_tun_key_attr_size()
355		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
356		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
357		+ nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
358		+ nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */
359		+ nla_total_size(4)   /* OVS_KEY_ATTR_CT_STATE */
360		+ nla_total_size(2)   /* OVS_KEY_ATTR_CT_ZONE */
361		+ nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */
362		+ nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */
363		+ nla_total_size(40)  /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
364		+ nla_total_size(0)   /* OVS_KEY_ATTR_NSH */
365		  + ovs_nsh_key_attr_size()
366		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
367		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
368		+ nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
369		+ nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
370		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
371		+ nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
372		+ nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
373		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
374}
375
376static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
377	[OVS_VXLAN_EXT_GBP]	    = { .len = sizeof(u32) },
378};
379
380static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
381	[OVS_TUNNEL_KEY_ATTR_ID]	    = { .len = sizeof(u64) },
382	[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]	    = { .len = sizeof(u32) },
383	[OVS_TUNNEL_KEY_ATTR_IPV4_DST]	    = { .len = sizeof(u32) },
384	[OVS_TUNNEL_KEY_ATTR_TOS]	    = { .len = 1 },
385	[OVS_TUNNEL_KEY_ATTR_TTL]	    = { .len = 1 },
386	[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
387	[OVS_TUNNEL_KEY_ATTR_CSUM]	    = { .len = 0 },
388	[OVS_TUNNEL_KEY_ATTR_TP_SRC]	    = { .len = sizeof(u16) },
389	[OVS_TUNNEL_KEY_ATTR_TP_DST]	    = { .len = sizeof(u16) },
390	[OVS_TUNNEL_KEY_ATTR_OAM]	    = { .len = 0 },
391	[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_VARIABLE },
392	[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED,
393						.next = ovs_vxlan_ext_key_lens },
394	[OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
395	[OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
396	[OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = OVS_ATTR_VARIABLE },
397	[OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE]   = { .len = 0 },
398};
399
400static const struct ovs_len_tbl
401ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
402	[OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
403	[OVS_NSH_KEY_ATTR_MD1]  = { .len = sizeof(struct ovs_nsh_key_md1) },
404	[OVS_NSH_KEY_ATTR_MD2]  = { .len = OVS_ATTR_VARIABLE },
405};
406
407/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
408static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
409	[OVS_KEY_ATTR_ENCAP]	 = { .len = OVS_ATTR_NESTED },
410	[OVS_KEY_ATTR_PRIORITY]	 = { .len = sizeof(u32) },
411	[OVS_KEY_ATTR_IN_PORT]	 = { .len = sizeof(u32) },
412	[OVS_KEY_ATTR_SKB_MARK]	 = { .len = sizeof(u32) },
413	[OVS_KEY_ATTR_ETHERNET]	 = { .len = sizeof(struct ovs_key_ethernet) },
414	[OVS_KEY_ATTR_VLAN]	 = { .len = sizeof(__be16) },
415	[OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
416	[OVS_KEY_ATTR_IPV4]	 = { .len = sizeof(struct ovs_key_ipv4) },
417	[OVS_KEY_ATTR_IPV6]	 = { .len = sizeof(struct ovs_key_ipv6) },
418	[OVS_KEY_ATTR_TCP]	 = { .len = sizeof(struct ovs_key_tcp) },
419	[OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
420	[OVS_KEY_ATTR_UDP]	 = { .len = sizeof(struct ovs_key_udp) },
421	[OVS_KEY_ATTR_SCTP]	 = { .len = sizeof(struct ovs_key_sctp) },
422	[OVS_KEY_ATTR_ICMP]	 = { .len = sizeof(struct ovs_key_icmp) },
423	[OVS_KEY_ATTR_ICMPV6]	 = { .len = sizeof(struct ovs_key_icmpv6) },
424	[OVS_KEY_ATTR_ARP]	 = { .len = sizeof(struct ovs_key_arp) },
425	[OVS_KEY_ATTR_ND]	 = { .len = sizeof(struct ovs_key_nd) },
426	[OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
427	[OVS_KEY_ATTR_DP_HASH]	 = { .len = sizeof(u32) },
428	[OVS_KEY_ATTR_TUNNEL]	 = { .len = OVS_ATTR_NESTED,
429				     .next = ovs_tunnel_key_lens, },
430	[OVS_KEY_ATTR_MPLS]	 = { .len = OVS_ATTR_VARIABLE },
431	[OVS_KEY_ATTR_CT_STATE]	 = { .len = sizeof(u32) },
432	[OVS_KEY_ATTR_CT_ZONE]	 = { .len = sizeof(u16) },
433	[OVS_KEY_ATTR_CT_MARK]	 = { .len = sizeof(u32) },
434	[OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
435	[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
436		.len = sizeof(struct ovs_key_ct_tuple_ipv4) },
437	[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
438		.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
439	[OVS_KEY_ATTR_NSH]       = { .len = OVS_ATTR_NESTED,
440				     .next = ovs_nsh_key_attr_lens, },
441};
442
443static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
444{
445	return expected_len == attr_len ||
446	       expected_len == OVS_ATTR_NESTED ||
447	       expected_len == OVS_ATTR_VARIABLE;
448}
449
450static bool is_all_zero(const u8 *fp, size_t size)
451{
452	int i;
453
454	if (!fp)
455		return false;
456
457	for (i = 0; i < size; i++)
458		if (fp[i])
459			return false;
460
461	return true;
462}
463
464static int __parse_flow_nlattrs(const struct nlattr *attr,
465				const struct nlattr *a[],
466				u64 *attrsp, bool log, bool nz)
467{
468	const struct nlattr *nla;
469	u64 attrs;
470	int rem;
471
472	attrs = *attrsp;
473	nla_for_each_nested(nla, attr, rem) {
474		u16 type = nla_type(nla);
475		int expected_len;
476
477		if (type > OVS_KEY_ATTR_MAX) {
478			OVS_NLERR(log, "Key type %d is out of range max %d",
479				  type, OVS_KEY_ATTR_MAX);
480			return -EINVAL;
481		}
482
483		if (attrs & (1 << type)) {
484			OVS_NLERR(log, "Duplicate key (type %d).", type);
485			return -EINVAL;
486		}
487
488		expected_len = ovs_key_lens[type].len;
489		if (!check_attr_len(nla_len(nla), expected_len)) {
490			OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
491				  type, nla_len(nla), expected_len);
492			return -EINVAL;
493		}
494
495		if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) {
496			attrs |= 1 << type;
497			a[type] = nla;
498		}
499	}
500	if (rem) {
501		OVS_NLERR(log, "Message has %d unknown bytes.", rem);
502		return -EINVAL;
503	}
504
505	*attrsp = attrs;
506	return 0;
507}
508
509static int parse_flow_mask_nlattrs(const struct nlattr *attr,
510				   const struct nlattr *a[], u64 *attrsp,
511				   bool log)
512{
513	return __parse_flow_nlattrs(attr, a, attrsp, log, true);
514}
515
516int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
517		       u64 *attrsp, bool log)
518{
519	return __parse_flow_nlattrs(attr, a, attrsp, log, false);
520}
521
522static int genev_tun_opt_from_nlattr(const struct nlattr *a,
523				     struct sw_flow_match *match, bool is_mask,
524				     bool log)
525{
526	unsigned long opt_key_offset;
527
528	if (nla_len(a) > sizeof(match->key->tun_opts)) {
529		OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
530			  nla_len(a), sizeof(match->key->tun_opts));
531		return -EINVAL;
532	}
533
534	if (nla_len(a) % 4 != 0) {
535		OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
536			  nla_len(a));
537		return -EINVAL;
538	}
539
540	/* We need to record the length of the options passed
541	 * down, otherwise packets with the same format but
542	 * additional options will be silently matched.
543	 */
544	if (!is_mask) {
545		SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
546				false);
547	} else {
548		/* This is somewhat unusual because it looks at
549		 * both the key and mask while parsing the
550		 * attributes (and by extension assumes the key
551		 * is parsed first). Normally, we would verify
552		 * that each is the correct length and that the
553		 * attributes line up in the validate function.
554		 * However, that is difficult because this is
555		 * variable length and we won't have the
556		 * information later.
557		 */
558		if (match->key->tun_opts_len != nla_len(a)) {
559			OVS_NLERR(log, "Geneve option len %d != mask len %d",
560				  match->key->tun_opts_len, nla_len(a));
561			return -EINVAL;
562		}
563
564		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
565	}
566
567	opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
568	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
569				  nla_len(a), is_mask);
570	return 0;
571}
572
573static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
574				     struct sw_flow_match *match, bool is_mask,
575				     bool log)
576{
577	struct nlattr *a;
578	int rem;
579	unsigned long opt_key_offset;
580	struct vxlan_metadata opts;
581
582	BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
583
584	memset(&opts, 0, sizeof(opts));
585	nla_for_each_nested(a, attr, rem) {
586		int type = nla_type(a);
587
588		if (type > OVS_VXLAN_EXT_MAX) {
589			OVS_NLERR(log, "VXLAN extension %d out of range max %d",
590				  type, OVS_VXLAN_EXT_MAX);
591			return -EINVAL;
592		}
593
594		if (!check_attr_len(nla_len(a),
595				    ovs_vxlan_ext_key_lens[type].len)) {
596			OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
597				  type, nla_len(a),
598				  ovs_vxlan_ext_key_lens[type].len);
599			return -EINVAL;
600		}
601
602		switch (type) {
603		case OVS_VXLAN_EXT_GBP:
604			opts.gbp = nla_get_u32(a);
605			break;
606		default:
607			OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
608				  type);
609			return -EINVAL;
610		}
611	}
612	if (rem) {
613		OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
614			  rem);
615		return -EINVAL;
616	}
617
618	if (!is_mask)
619		SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
620	else
621		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
622
623	opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
624	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
625				  is_mask);
626	return 0;
627}
628
629static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
630				      struct sw_flow_match *match, bool is_mask,
631				      bool log)
632{
633	unsigned long opt_key_offset;
634
635	BUILD_BUG_ON(sizeof(struct erspan_metadata) >
636		     sizeof(match->key->tun_opts));
637
638	if (nla_len(a) > sizeof(match->key->tun_opts)) {
639		OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
640			  nla_len(a), sizeof(match->key->tun_opts));
641		return -EINVAL;
642	}
643
644	if (!is_mask)
645		SW_FLOW_KEY_PUT(match, tun_opts_len,
646				sizeof(struct erspan_metadata), false);
647	else
648		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
649
650	opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
651	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
652				  nla_len(a), is_mask);
653	return 0;
654}
655
656static int ip_tun_from_nlattr(const struct nlattr *attr,
657			      struct sw_flow_match *match, bool is_mask,
658			      bool log)
659{
660	bool ttl = false, ipv4 = false, ipv6 = false;
661	bool info_bridge_mode = false;
662	__be16 tun_flags = 0;
663	int opts_type = 0;
664	struct nlattr *a;
665	int rem;
666
667	nla_for_each_nested(a, attr, rem) {
668		int type = nla_type(a);
669		int err;
670
671		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
672			OVS_NLERR(log, "Tunnel attr %d out of range max %d",
673				  type, OVS_TUNNEL_KEY_ATTR_MAX);
674			return -EINVAL;
675		}
676
677		if (!check_attr_len(nla_len(a),
678				    ovs_tunnel_key_lens[type].len)) {
679			OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
680				  type, nla_len(a), ovs_tunnel_key_lens[type].len);
681			return -EINVAL;
682		}
683
684		switch (type) {
685		case OVS_TUNNEL_KEY_ATTR_ID:
686			SW_FLOW_KEY_PUT(match, tun_key.tun_id,
687					nla_get_be64(a), is_mask);
688			tun_flags |= TUNNEL_KEY;
689			break;
690		case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
691			SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
692					nla_get_in_addr(a), is_mask);
693			ipv4 = true;
694			break;
695		case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
696			SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
697					nla_get_in_addr(a), is_mask);
698			ipv4 = true;
699			break;
700		case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
701			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
702					nla_get_in6_addr(a), is_mask);
703			ipv6 = true;
704			break;
705		case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
706			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
707					nla_get_in6_addr(a), is_mask);
708			ipv6 = true;
709			break;
710		case OVS_TUNNEL_KEY_ATTR_TOS:
711			SW_FLOW_KEY_PUT(match, tun_key.tos,
712					nla_get_u8(a), is_mask);
713			break;
714		case OVS_TUNNEL_KEY_ATTR_TTL:
715			SW_FLOW_KEY_PUT(match, tun_key.ttl,
716					nla_get_u8(a), is_mask);
717			ttl = true;
718			break;
719		case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
720			tun_flags |= TUNNEL_DONT_FRAGMENT;
721			break;
722		case OVS_TUNNEL_KEY_ATTR_CSUM:
723			tun_flags |= TUNNEL_CSUM;
724			break;
725		case OVS_TUNNEL_KEY_ATTR_TP_SRC:
726			SW_FLOW_KEY_PUT(match, tun_key.tp_src,
727					nla_get_be16(a), is_mask);
728			break;
729		case OVS_TUNNEL_KEY_ATTR_TP_DST:
730			SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
731					nla_get_be16(a), is_mask);
732			break;
733		case OVS_TUNNEL_KEY_ATTR_OAM:
734			tun_flags |= TUNNEL_OAM;
735			break;
736		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
737			if (opts_type) {
738				OVS_NLERR(log, "Multiple metadata blocks provided");
739				return -EINVAL;
740			}
741
742			err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
743			if (err)
744				return err;
745
746			tun_flags |= TUNNEL_GENEVE_OPT;
747			opts_type = type;
748			break;
749		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
750			if (opts_type) {
751				OVS_NLERR(log, "Multiple metadata blocks provided");
752				return -EINVAL;
753			}
754
755			err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
756			if (err)
757				return err;
758
759			tun_flags |= TUNNEL_VXLAN_OPT;
760			opts_type = type;
761			break;
762		case OVS_TUNNEL_KEY_ATTR_PAD:
763			break;
764		case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
765			if (opts_type) {
766				OVS_NLERR(log, "Multiple metadata blocks provided");
767				return -EINVAL;
768			}
769
770			err = erspan_tun_opt_from_nlattr(a, match, is_mask,
771							 log);
772			if (err)
773				return err;
774
775			tun_flags |= TUNNEL_ERSPAN_OPT;
776			opts_type = type;
777			break;
778		case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE:
779			info_bridge_mode = true;
780			ipv4 = true;
781			break;
782		default:
783			OVS_NLERR(log, "Unknown IP tunnel attribute %d",
784				  type);
785			return -EINVAL;
786		}
787	}
788
789	SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
790	if (is_mask)
791		SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
792	else
793		SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
794				false);
795
796	if (rem > 0) {
797		OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
798			  rem);
799		return -EINVAL;
800	}
801
802	if (ipv4 && ipv6) {
803		OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
804		return -EINVAL;
805	}
806
807	if (!is_mask) {
808		if (!ipv4 && !ipv6) {
809			OVS_NLERR(log, "IP tunnel dst address not specified");
810			return -EINVAL;
811		}
812		if (ipv4) {
813			if (info_bridge_mode) {
814				if (match->key->tun_key.u.ipv4.src ||
815				    match->key->tun_key.u.ipv4.dst ||
816				    match->key->tun_key.tp_src ||
817				    match->key->tun_key.tp_dst ||
818				    match->key->tun_key.ttl ||
819				    match->key->tun_key.tos ||
820				    tun_flags & ~TUNNEL_KEY) {
821					OVS_NLERR(log, "IPv4 tun info is not correct");
822					return -EINVAL;
823				}
824			} else if (!match->key->tun_key.u.ipv4.dst) {
825				OVS_NLERR(log, "IPv4 tunnel dst address is zero");
826				return -EINVAL;
827			}
828		}
829		if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
830			OVS_NLERR(log, "IPv6 tunnel dst address is zero");
831			return -EINVAL;
832		}
833
834		if (!ttl && !info_bridge_mode) {
835			OVS_NLERR(log, "IP tunnel TTL not specified.");
836			return -EINVAL;
837		}
838	}
839
840	return opts_type;
841}
842
843static int vxlan_opt_to_nlattr(struct sk_buff *skb,
844			       const void *tun_opts, int swkey_tun_opts_len)
845{
846	const struct vxlan_metadata *opts = tun_opts;
847	struct nlattr *nla;
848
849	nla = nla_nest_start_noflag(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
850	if (!nla)
851		return -EMSGSIZE;
852
853	if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
854		return -EMSGSIZE;
855
856	nla_nest_end(skb, nla);
857	return 0;
858}
859
860static int __ip_tun_to_nlattr(struct sk_buff *skb,
861			      const struct ip_tunnel_key *output,
862			      const void *tun_opts, int swkey_tun_opts_len,
863			      unsigned short tun_proto, u8 mode)
864{
865	if (output->tun_flags & TUNNEL_KEY &&
866	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
867			 OVS_TUNNEL_KEY_ATTR_PAD))
868		return -EMSGSIZE;
869
870	if (mode & IP_TUNNEL_INFO_BRIDGE)
871		return nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE)
872		       ? -EMSGSIZE : 0;
873
874	switch (tun_proto) {
875	case AF_INET:
876		if (output->u.ipv4.src &&
877		    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
878				    output->u.ipv4.src))
879			return -EMSGSIZE;
880		if (output->u.ipv4.dst &&
881		    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
882				    output->u.ipv4.dst))
883			return -EMSGSIZE;
884		break;
885	case AF_INET6:
886		if (!ipv6_addr_any(&output->u.ipv6.src) &&
887		    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
888				     &output->u.ipv6.src))
889			return -EMSGSIZE;
890		if (!ipv6_addr_any(&output->u.ipv6.dst) &&
891		    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
892				     &output->u.ipv6.dst))
893			return -EMSGSIZE;
894		break;
895	}
896	if (output->tos &&
897	    nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
898		return -EMSGSIZE;
899	if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
900		return -EMSGSIZE;
901	if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
902	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
903		return -EMSGSIZE;
904	if ((output->tun_flags & TUNNEL_CSUM) &&
905	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
906		return -EMSGSIZE;
907	if (output->tp_src &&
908	    nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
909		return -EMSGSIZE;
910	if (output->tp_dst &&
911	    nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
912		return -EMSGSIZE;
913	if ((output->tun_flags & TUNNEL_OAM) &&
914	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
915		return -EMSGSIZE;
916	if (swkey_tun_opts_len) {
917		if (output->tun_flags & TUNNEL_GENEVE_OPT &&
918		    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
919			    swkey_tun_opts_len, tun_opts))
920			return -EMSGSIZE;
921		else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
922			 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
923			return -EMSGSIZE;
924		else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
925			 nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
926				 swkey_tun_opts_len, tun_opts))
927			return -EMSGSIZE;
928	}
929
930	return 0;
931}
932
933static int ip_tun_to_nlattr(struct sk_buff *skb,
934			    const struct ip_tunnel_key *output,
935			    const void *tun_opts, int swkey_tun_opts_len,
936			    unsigned short tun_proto, u8 mode)
937{
938	struct nlattr *nla;
939	int err;
940
941	nla = nla_nest_start_noflag(skb, OVS_KEY_ATTR_TUNNEL);
942	if (!nla)
943		return -EMSGSIZE;
944
945	err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
946				 tun_proto, mode);
947	if (err)
948		return err;
949
950	nla_nest_end(skb, nla);
951	return 0;
952}
953
954int ovs_nla_put_tunnel_info(struct sk_buff *skb,
955			    struct ip_tunnel_info *tun_info)
956{
957	return __ip_tun_to_nlattr(skb, &tun_info->key,
958				  ip_tunnel_info_opts(tun_info),
959				  tun_info->options_len,
960				  ip_tunnel_info_af(tun_info), tun_info->mode);
961}
962
963static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
964				    const struct nlattr *a[],
965				    bool is_mask, bool inner)
966{
967	__be16 tci = 0;
968	__be16 tpid = 0;
969
970	if (a[OVS_KEY_ATTR_VLAN])
971		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
972
973	if (a[OVS_KEY_ATTR_ETHERTYPE])
974		tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
975
976	if (likely(!inner)) {
977		SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
978		SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
979	} else {
980		SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
981		SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
982	}
983	return 0;
984}
985
986static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
987				      u64 key_attrs, bool inner,
988				      const struct nlattr **a, bool log)
989{
990	__be16 tci = 0;
991
992	if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
993	      (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
994	       eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
995		/* Not a VLAN. */
996		return 0;
997	}
998
999	if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
1000	      (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
1001		OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
1002		return -EINVAL;
1003	}
1004
1005	if (a[OVS_KEY_ATTR_VLAN])
1006		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1007
1008	if (!(tci & htons(VLAN_CFI_MASK))) {
1009		if (tci) {
1010			OVS_NLERR(log, "%s TCI does not have VLAN_CFI_MASK bit set.",
1011				  (inner) ? "C-VLAN" : "VLAN");
1012			return -EINVAL;
1013		} else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
1014			/* Corner case for truncated VLAN header. */
1015			OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
1016				  (inner) ? "C-VLAN" : "VLAN");
1017			return -EINVAL;
1018		}
1019	}
1020
1021	return 1;
1022}
1023
1024static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
1025					   u64 key_attrs, bool inner,
1026					   const struct nlattr **a, bool log)
1027{
1028	__be16 tci = 0;
1029	__be16 tpid = 0;
1030	bool encap_valid = !!(match->key->eth.vlan.tci &
1031			      htons(VLAN_CFI_MASK));
1032	bool i_encap_valid = !!(match->key->eth.cvlan.tci &
1033				htons(VLAN_CFI_MASK));
1034
1035	if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
1036		/* Not a VLAN. */
1037		return 0;
1038	}
1039
1040	if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
1041		OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
1042			  (inner) ? "C-VLAN" : "VLAN");
1043		return -EINVAL;
1044	}
1045
1046	if (a[OVS_KEY_ATTR_VLAN])
1047		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1048
1049	if (a[OVS_KEY_ATTR_ETHERTYPE])
1050		tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1051
1052	if (tpid != htons(0xffff)) {
1053		OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
1054			  (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
1055		return -EINVAL;
1056	}
1057	if (!(tci & htons(VLAN_CFI_MASK))) {
1058		OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_CFI_MASK bit.",
1059			  (inner) ? "C-VLAN" : "VLAN");
1060		return -EINVAL;
1061	}
1062
1063	return 1;
1064}
1065
1066static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
1067				     u64 *key_attrs, bool inner,
1068				     const struct nlattr **a, bool is_mask,
1069				     bool log)
1070{
1071	int err;
1072	const struct nlattr *encap;
1073
1074	if (!is_mask)
1075		err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
1076						 a, log);
1077	else
1078		err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
1079						      a, log);
1080	if (err <= 0)
1081		return err;
1082
1083	err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
1084	if (err)
1085		return err;
1086
1087	*key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1088	*key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
1089	*key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1090
1091	encap = a[OVS_KEY_ATTR_ENCAP];
1092
1093	if (!is_mask)
1094		err = parse_flow_nlattrs(encap, a, key_attrs, log);
1095	else
1096		err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
1097
1098	return err;
1099}
1100
1101static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
1102				   u64 *key_attrs, const struct nlattr **a,
1103				   bool is_mask, bool log)
1104{
1105	int err;
1106	bool encap_valid = false;
1107
1108	err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
1109					is_mask, log);
1110	if (err)
1111		return err;
1112
1113	encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_CFI_MASK));
1114	if (encap_valid) {
1115		err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
1116						is_mask, log);
1117		if (err)
1118			return err;
1119	}
1120
1121	return 0;
1122}
1123
1124static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
1125				       u64 *attrs, const struct nlattr **a,
1126				       bool is_mask, bool log)
1127{
1128	__be16 eth_type;
1129
1130	eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1131	if (is_mask) {
1132		/* Always exact match EtherType. */
1133		eth_type = htons(0xffff);
1134	} else if (!eth_proto_is_802_3(eth_type)) {
1135		OVS_NLERR(log, "EtherType %x is less than min %x",
1136				ntohs(eth_type), ETH_P_802_3_MIN);
1137		return -EINVAL;
1138	}
1139
1140	SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1141	*attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1142	return 0;
1143}
1144
1145static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
1146				 u64 *attrs, const struct nlattr **a,
1147				 bool is_mask, bool log)
1148{
1149	u8 mac_proto = MAC_PROTO_ETHERNET;
1150
1151	if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
1152		u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
1153
1154		SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
1155		*attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
1156	}
1157
1158	if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
1159		u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
1160
1161		SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
1162		*attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
1163	}
1164
1165	if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
1166		SW_FLOW_KEY_PUT(match, phy.priority,
1167			  nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
1168		*attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
1169	}
1170
1171	if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
1172		u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
1173
1174		if (is_mask) {
1175			in_port = 0xffffffff; /* Always exact match in_port. */
1176		} else if (in_port >= DP_MAX_PORTS) {
1177			OVS_NLERR(log, "Port %d exceeds max allowable %d",
1178				  in_port, DP_MAX_PORTS);
1179			return -EINVAL;
1180		}
1181
1182		SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
1183		*attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
1184	} else if (!is_mask) {
1185		SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
1186	}
1187
1188	if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
1189		uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1190
1191		SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1192		*attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1193	}
1194	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1195		if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1196				       is_mask, log) < 0)
1197			return -EINVAL;
1198		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1199	}
1200
1201	if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
1202	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
1203		u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
1204
1205		if (ct_state & ~CT_SUPPORTED_MASK) {
1206			OVS_NLERR(log, "ct_state flags %08x unsupported",
1207				  ct_state);
1208			return -EINVAL;
1209		}
1210
1211		SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask);
1212		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
1213	}
1214	if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
1215	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
1216		u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
1217
1218		SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask);
1219		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
1220	}
1221	if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
1222	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
1223		u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
1224
1225		SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
1226		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
1227	}
1228	if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
1229	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
1230		const struct ovs_key_ct_labels *cl;
1231
1232		cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
1233		SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
1234				   sizeof(*cl), is_mask);
1235		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
1236	}
1237	if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
1238		const struct ovs_key_ct_tuple_ipv4 *ct;
1239
1240		ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
1241
1242		SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
1243		SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
1244		SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1245		SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1246		SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask);
1247		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
1248	}
1249	if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
1250		const struct ovs_key_ct_tuple_ipv6 *ct;
1251
1252		ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
1253
1254		SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
1255				   sizeof(match->key->ipv6.ct_orig.src),
1256				   is_mask);
1257		SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
1258				   sizeof(match->key->ipv6.ct_orig.dst),
1259				   is_mask);
1260		SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1261		SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1262		SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask);
1263		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
1264	}
1265
1266	/* For layer 3 packets the Ethernet type is provided
1267	 * and treated as metadata but no MAC addresses are provided.
1268	 */
1269	if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
1270	    (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
1271		mac_proto = MAC_PROTO_NONE;
1272
1273	/* Always exact match mac_proto */
1274	SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
1275
1276	if (mac_proto == MAC_PROTO_NONE)
1277		return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
1278						   log);
1279
1280	return 0;
1281}
1282
1283int nsh_hdr_from_nlattr(const struct nlattr *attr,
1284			struct nshhdr *nh, size_t size)
1285{
1286	struct nlattr *a;
1287	int rem;
1288	u8 flags = 0;
1289	u8 ttl = 0;
1290	int mdlen = 0;
1291
1292	/* validate_nsh has check this, so we needn't do duplicate check here
1293	 */
1294	if (size < NSH_BASE_HDR_LEN)
1295		return -ENOBUFS;
1296
1297	nla_for_each_nested(a, attr, rem) {
1298		int type = nla_type(a);
1299
1300		switch (type) {
1301		case OVS_NSH_KEY_ATTR_BASE: {
1302			const struct ovs_nsh_key_base *base = nla_data(a);
1303
1304			flags = base->flags;
1305			ttl = base->ttl;
1306			nh->np = base->np;
1307			nh->mdtype = base->mdtype;
1308			nh->path_hdr = base->path_hdr;
1309			break;
1310		}
1311		case OVS_NSH_KEY_ATTR_MD1:
1312			mdlen = nla_len(a);
1313			if (mdlen > size - NSH_BASE_HDR_LEN)
1314				return -ENOBUFS;
1315			memcpy(&nh->md1, nla_data(a), mdlen);
1316			break;
1317
1318		case OVS_NSH_KEY_ATTR_MD2:
1319			mdlen = nla_len(a);
1320			if (mdlen > size - NSH_BASE_HDR_LEN)
1321				return -ENOBUFS;
1322			memcpy(&nh->md2, nla_data(a), mdlen);
1323			break;
1324
1325		default:
1326			return -EINVAL;
1327		}
1328	}
1329
1330	/* nsh header length  = NSH_BASE_HDR_LEN + mdlen */
1331	nh->ver_flags_ttl_len = 0;
1332	nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
1333
1334	return 0;
1335}
1336
1337int nsh_key_from_nlattr(const struct nlattr *attr,
1338			struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
1339{
1340	struct nlattr *a;
1341	int rem;
1342
1343	/* validate_nsh has check this, so we needn't do duplicate check here
1344	 */
1345	nla_for_each_nested(a, attr, rem) {
1346		int type = nla_type(a);
1347
1348		switch (type) {
1349		case OVS_NSH_KEY_ATTR_BASE: {
1350			const struct ovs_nsh_key_base *base = nla_data(a);
1351			const struct ovs_nsh_key_base *base_mask = base + 1;
1352
1353			nsh->base = *base;
1354			nsh_mask->base = *base_mask;
1355			break;
1356		}
1357		case OVS_NSH_KEY_ATTR_MD1: {
1358			const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1359			const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
1360
1361			memcpy(nsh->context, md1->context, sizeof(*md1));
1362			memcpy(nsh_mask->context, md1_mask->context,
1363			       sizeof(*md1_mask));
1364			break;
1365		}
1366		case OVS_NSH_KEY_ATTR_MD2:
1367			/* Not supported yet */
1368			return -ENOTSUPP;
1369		default:
1370			return -EINVAL;
1371		}
1372	}
1373
1374	return 0;
1375}
1376
1377static int nsh_key_put_from_nlattr(const struct nlattr *attr,
1378				   struct sw_flow_match *match, bool is_mask,
1379				   bool is_push_nsh, bool log)
1380{
1381	struct nlattr *a;
1382	int rem;
1383	bool has_base = false;
1384	bool has_md1 = false;
1385	bool has_md2 = false;
1386	u8 mdtype = 0;
1387	int mdlen = 0;
1388
1389	if (WARN_ON(is_push_nsh && is_mask))
1390		return -EINVAL;
1391
1392	nla_for_each_nested(a, attr, rem) {
1393		int type = nla_type(a);
1394		int i;
1395
1396		if (type > OVS_NSH_KEY_ATTR_MAX) {
1397			OVS_NLERR(log, "nsh attr %d is out of range max %d",
1398				  type, OVS_NSH_KEY_ATTR_MAX);
1399			return -EINVAL;
1400		}
1401
1402		if (!check_attr_len(nla_len(a),
1403				    ovs_nsh_key_attr_lens[type].len)) {
1404			OVS_NLERR(
1405			    log,
1406			    "nsh attr %d has unexpected len %d expected %d",
1407			    type,
1408			    nla_len(a),
1409			    ovs_nsh_key_attr_lens[type].len
1410			);
1411			return -EINVAL;
1412		}
1413
1414		switch (type) {
1415		case OVS_NSH_KEY_ATTR_BASE: {
1416			const struct ovs_nsh_key_base *base = nla_data(a);
1417
1418			has_base = true;
1419			mdtype = base->mdtype;
1420			SW_FLOW_KEY_PUT(match, nsh.base.flags,
1421					base->flags, is_mask);
1422			SW_FLOW_KEY_PUT(match, nsh.base.ttl,
1423					base->ttl, is_mask);
1424			SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
1425					base->mdtype, is_mask);
1426			SW_FLOW_KEY_PUT(match, nsh.base.np,
1427					base->np, is_mask);
1428			SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
1429					base->path_hdr, is_mask);
1430			break;
1431		}
1432		case OVS_NSH_KEY_ATTR_MD1: {
1433			const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1434
1435			has_md1 = true;
1436			for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
1437				SW_FLOW_KEY_PUT(match, nsh.context[i],
1438						md1->context[i], is_mask);
1439			break;
1440		}
1441		case OVS_NSH_KEY_ATTR_MD2:
1442			if (!is_push_nsh) /* Not supported MD type 2 yet */
1443				return -ENOTSUPP;
1444
1445			has_md2 = true;
1446			mdlen = nla_len(a);
1447			if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
1448				OVS_NLERR(
1449				    log,
1450				    "Invalid MD length %d for MD type %d",
1451				    mdlen,
1452				    mdtype
1453				);
1454				return -EINVAL;
1455			}
1456			break;
1457		default:
1458			OVS_NLERR(log, "Unknown nsh attribute %d",
1459				  type);
1460			return -EINVAL;
1461		}
1462	}
1463
1464	if (rem > 0) {
1465		OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
1466		return -EINVAL;
1467	}
1468
1469	if (has_md1 && has_md2) {
1470		OVS_NLERR(
1471		    1,
1472		    "invalid nsh attribute: md1 and md2 are exclusive."
1473		);
1474		return -EINVAL;
1475	}
1476
1477	if (!is_mask) {
1478		if ((has_md1 && mdtype != NSH_M_TYPE1) ||
1479		    (has_md2 && mdtype != NSH_M_TYPE2)) {
1480			OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
1481				  mdtype);
1482			return -EINVAL;
1483		}
1484
1485		if (is_push_nsh &&
1486		    (!has_base || (!has_md1 && !has_md2))) {
1487			OVS_NLERR(
1488			    1,
1489			    "push_nsh: missing base or metadata attributes"
1490			);
1491			return -EINVAL;
1492		}
1493	}
1494
1495	return 0;
1496}
1497
1498static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1499				u64 attrs, const struct nlattr **a,
1500				bool is_mask, bool log)
1501{
1502	int err;
1503
1504	err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
1505	if (err)
1506		return err;
1507
1508	if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
1509		const struct ovs_key_ethernet *eth_key;
1510
1511		eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1512		SW_FLOW_KEY_MEMCPY(match, eth.src,
1513				eth_key->eth_src, ETH_ALEN, is_mask);
1514		SW_FLOW_KEY_MEMCPY(match, eth.dst,
1515				eth_key->eth_dst, ETH_ALEN, is_mask);
1516		attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1517
1518		if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1519			/* VLAN attribute is always parsed before getting here since it
1520			 * may occur multiple times.
1521			 */
1522			OVS_NLERR(log, "VLAN attribute unexpected.");
1523			return -EINVAL;
1524		}
1525
1526		if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1527			err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
1528							  log);
1529			if (err)
1530				return err;
1531		} else if (!is_mask) {
1532			SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1533		}
1534	} else if (!match->key->eth.type) {
1535		OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
1536		return -EINVAL;
1537	}
1538
1539	if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1540		const struct ovs_key_ipv4 *ipv4_key;
1541
1542		ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1543		if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1544			OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
1545				  ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1546			return -EINVAL;
1547		}
1548		SW_FLOW_KEY_PUT(match, ip.proto,
1549				ipv4_key->ipv4_proto, is_mask);
1550		SW_FLOW_KEY_PUT(match, ip.tos,
1551				ipv4_key->ipv4_tos, is_mask);
1552		SW_FLOW_KEY_PUT(match, ip.ttl,
1553				ipv4_key->ipv4_ttl, is_mask);
1554		SW_FLOW_KEY_PUT(match, ip.frag,
1555				ipv4_key->ipv4_frag, is_mask);
1556		SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1557				ipv4_key->ipv4_src, is_mask);
1558		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1559				ipv4_key->ipv4_dst, is_mask);
1560		attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1561	}
1562
1563	if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
1564		const struct ovs_key_ipv6 *ipv6_key;
1565
1566		ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1567		if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1568			OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
1569				  ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1570			return -EINVAL;
1571		}
1572
1573		if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
1574			OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
1575				  ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
1576			return -EINVAL;
1577		}
1578
1579		SW_FLOW_KEY_PUT(match, ipv6.label,
1580				ipv6_key->ipv6_label, is_mask);
1581		SW_FLOW_KEY_PUT(match, ip.proto,
1582				ipv6_key->ipv6_proto, is_mask);
1583		SW_FLOW_KEY_PUT(match, ip.tos,
1584				ipv6_key->ipv6_tclass, is_mask);
1585		SW_FLOW_KEY_PUT(match, ip.ttl,
1586				ipv6_key->ipv6_hlimit, is_mask);
1587		SW_FLOW_KEY_PUT(match, ip.frag,
1588				ipv6_key->ipv6_frag, is_mask);
1589		SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1590				ipv6_key->ipv6_src,
1591				sizeof(match->key->ipv6.addr.src),
1592				is_mask);
1593		SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1594				ipv6_key->ipv6_dst,
1595				sizeof(match->key->ipv6.addr.dst),
1596				is_mask);
1597
1598		attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1599	}
1600
1601	if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
1602		const struct ovs_key_arp *arp_key;
1603
1604		arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1605		if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1606			OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
1607				  arp_key->arp_op);
1608			return -EINVAL;
1609		}
1610
1611		SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1612				arp_key->arp_sip, is_mask);
1613		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1614			arp_key->arp_tip, is_mask);
1615		SW_FLOW_KEY_PUT(match, ip.proto,
1616				ntohs(arp_key->arp_op), is_mask);
1617		SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1618				arp_key->arp_sha, ETH_ALEN, is_mask);
1619		SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1620				arp_key->arp_tha, ETH_ALEN, is_mask);
1621
1622		attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1623	}
1624
1625	if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
1626		if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
1627					    is_mask, false, log) < 0)
1628			return -EINVAL;
1629		attrs &= ~(1 << OVS_KEY_ATTR_NSH);
1630	}
1631
1632	if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
1633		const struct ovs_key_mpls *mpls_key;
1634		u32 hdr_len;
1635		u32 label_count, label_count_mask, i;
1636
1637		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
1638		hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
1639		label_count = hdr_len / sizeof(struct ovs_key_mpls);
1640
1641		if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
1642		    hdr_len % sizeof(struct ovs_key_mpls))
1643			return -EINVAL;
1644
1645		label_count_mask =  GENMASK(label_count - 1, 0);
1646
1647		for (i = 0 ; i < label_count; i++)
1648			SW_FLOW_KEY_PUT(match, mpls.lse[i],
1649					mpls_key[i].mpls_lse, is_mask);
1650
1651		SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
1652				label_count_mask, is_mask);
1653
1654		attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
1655	 }
1656
1657	if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
1658		const struct ovs_key_tcp *tcp_key;
1659
1660		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1661		SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
1662		SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
1663		attrs &= ~(1 << OVS_KEY_ATTR_TCP);
1664	}
1665
1666	if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
1667		SW_FLOW_KEY_PUT(match, tp.flags,
1668				nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
1669				is_mask);
1670		attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
1671	}
1672
1673	if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
1674		const struct ovs_key_udp *udp_key;
1675
1676		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1677		SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
1678		SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
1679		attrs &= ~(1 << OVS_KEY_ATTR_UDP);
1680	}
1681
1682	if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
1683		const struct ovs_key_sctp *sctp_key;
1684
1685		sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
1686		SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
1687		SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
1688		attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
1689	}
1690
1691	if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
1692		const struct ovs_key_icmp *icmp_key;
1693
1694		icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1695		SW_FLOW_KEY_PUT(match, tp.src,
1696				htons(icmp_key->icmp_type), is_mask);
1697		SW_FLOW_KEY_PUT(match, tp.dst,
1698				htons(icmp_key->icmp_code), is_mask);
1699		attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
1700	}
1701
1702	if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
1703		const struct ovs_key_icmpv6 *icmpv6_key;
1704
1705		icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1706		SW_FLOW_KEY_PUT(match, tp.src,
1707				htons(icmpv6_key->icmpv6_type), is_mask);
1708		SW_FLOW_KEY_PUT(match, tp.dst,
1709				htons(icmpv6_key->icmpv6_code), is_mask);
1710		attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
1711	}
1712
1713	if (attrs & (1 << OVS_KEY_ATTR_ND)) {
1714		const struct ovs_key_nd *nd_key;
1715
1716		nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1717		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1718			nd_key->nd_target,
1719			sizeof(match->key->ipv6.nd.target),
1720			is_mask);
1721		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1722			nd_key->nd_sll, ETH_ALEN, is_mask);
1723		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1724				nd_key->nd_tll, ETH_ALEN, is_mask);
1725		attrs &= ~(1 << OVS_KEY_ATTR_ND);
1726	}
1727
1728	if (attrs != 0) {
1729		OVS_NLERR(log, "Unknown key attributes %llx",
1730			  (unsigned long long)attrs);
1731		return -EINVAL;
1732	}
1733
1734	return 0;
1735}
1736
1737static void nlattr_set(struct nlattr *attr, u8 val,
1738		       const struct ovs_len_tbl *tbl)
1739{
1740	struct nlattr *nla;
1741	int rem;
1742
1743	/* The nlattr stream should already have been validated */
1744	nla_for_each_nested(nla, attr, rem) {
1745		if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
1746			nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl);
1747		else
1748			memset(nla_data(nla), val, nla_len(nla));
1749
1750		if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
1751			*(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
1752	}
1753}
1754
1755static void mask_set_nlattr(struct nlattr *attr, u8 val)
1756{
1757	nlattr_set(attr, val, ovs_key_lens);
1758}
1759
1760/**
1761 * ovs_nla_get_match - parses Netlink attributes into a flow key and
1762 * mask. In case the 'mask' is NULL, the flow is treated as exact match
1763 * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1764 * does not include any don't care bit.
1765 * @net: Used to determine per-namespace field support.
1766 * @match: receives the extracted flow match information.
1767 * @nla_key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1768 * sequence. The fields should of the packet that triggered the creation
1769 * of this flow.
1770 * @nla_mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_*
1771 * Netlink attribute specifies the mask field of the wildcarded flow.
1772 * @log: Boolean to allow kernel error logging.  Normally true, but when
1773 * probing for feature compatibility this should be passed in as false to
1774 * suppress unnecessary error logging.
1775 */
1776int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
1777		      const struct nlattr *nla_key,
1778		      const struct nlattr *nla_mask,
1779		      bool log)
1780{
1781	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1782	struct nlattr *newmask = NULL;
1783	u64 key_attrs = 0;
1784	u64 mask_attrs = 0;
1785	int err;
1786
1787	err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
1788	if (err)
1789		return err;
1790
1791	err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
1792	if (err)
1793		return err;
1794
1795	err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
1796	if (err)
1797		return err;
1798
1799	if (match->mask) {
1800		if (!nla_mask) {
1801			/* Create an exact match mask. We need to set to 0xff
1802			 * all the 'match->mask' fields that have been touched
1803			 * in 'match->key'. We cannot simply memset
1804			 * 'match->mask', because padding bytes and fields not
1805			 * specified in 'match->key' should be left to 0.
1806			 * Instead, we use a stream of netlink attributes,
1807			 * copied from 'key' and set to 0xff.
1808			 * ovs_key_from_nlattrs() will take care of filling
1809			 * 'match->mask' appropriately.
1810			 */
1811			newmask = kmemdup(nla_key,
1812					  nla_total_size(nla_len(nla_key)),
1813					  GFP_KERNEL);
1814			if (!newmask)
1815				return -ENOMEM;
1816
1817			mask_set_nlattr(newmask, 0xff);
1818
1819			/* The userspace does not send tunnel attributes that
1820			 * are 0, but we should not wildcard them nonetheless.
1821			 */
1822			if (match->key->tun_proto)
1823				SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1824							 0xff, true);
1825
1826			nla_mask = newmask;
1827		}
1828
1829		err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
1830		if (err)
1831			goto free_newmask;
1832
1833		/* Always match on tci. */
1834		SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
1835		SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
1836
1837		err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
1838		if (err)
1839			goto free_newmask;
1840
1841		err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
1842					   log);
1843		if (err)
1844			goto free_newmask;
1845	}
1846
1847	if (!match_validate(match, key_attrs, mask_attrs, log))
1848		err = -EINVAL;
1849
1850free_newmask:
1851	kfree(newmask);
1852	return err;
1853}
1854
1855static size_t get_ufid_len(const struct nlattr *attr, bool log)
1856{
1857	size_t len;
1858
1859	if (!attr)
1860		return 0;
1861
1862	len = nla_len(attr);
1863	if (len < 1 || len > MAX_UFID_LENGTH) {
1864		OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
1865			  nla_len(attr), MAX_UFID_LENGTH);
1866		return 0;
1867	}
1868
1869	return len;
1870}
1871
1872/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
1873 * or false otherwise.
1874 */
1875bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
1876		      bool log)
1877{
1878	sfid->ufid_len = get_ufid_len(attr, log);
1879	if (sfid->ufid_len)
1880		memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
1881
1882	return sfid->ufid_len;
1883}
1884
1885int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
1886			   const struct sw_flow_key *key, bool log)
1887{
1888	struct sw_flow_key *new_key;
1889
1890	if (ovs_nla_get_ufid(sfid, ufid, log))
1891		return 0;
1892
1893	/* If UFID was not provided, use unmasked key. */
1894	new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
1895	if (!new_key)
1896		return -ENOMEM;
1897	memcpy(new_key, key, sizeof(*key));
1898	sfid->unmasked_key = new_key;
1899
1900	return 0;
1901}
1902
1903u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
1904{
1905	return attr ? nla_get_u32(attr) : 0;
1906}
1907
1908/**
1909 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1910 * @net: Network namespace.
1911 * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
1912 * metadata.
1913 * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
1914 * attributes.
1915 * @attrs: Bit mask for the netlink attributes included in @a.
1916 * @log: Boolean to allow kernel error logging.  Normally true, but when
1917 * probing for feature compatibility this should be passed in as false to
1918 * suppress unnecessary error logging.
1919 *
1920 * This parses a series of Netlink attributes that form a flow key, which must
1921 * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1922 * get the metadata, that is, the parts of the flow key that cannot be
1923 * extracted from the packet itself.
1924 *
1925 * This must be called before the packet key fields are filled in 'key'.
1926 */
1927
1928int ovs_nla_get_flow_metadata(struct net *net,
1929			      const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
1930			      u64 attrs, struct sw_flow_key *key, bool log)
1931{
1932	struct sw_flow_match match;
1933
1934	memset(&match, 0, sizeof(match));
1935	match.key = key;
1936
1937	key->ct_state = 0;
1938	key->ct_zone = 0;
1939	key->ct_orig_proto = 0;
1940	memset(&key->ct, 0, sizeof(key->ct));
1941	memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
1942	memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
1943
1944	key->phy.in_port = DP_MAX_PORTS;
1945
1946	return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
1947}
1948
1949static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
1950			    bool is_mask)
1951{
1952	__be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
1953
1954	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1955	    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
1956		return -EMSGSIZE;
1957	return 0;
1958}
1959
1960static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
1961			     struct sk_buff *skb)
1962{
1963	struct nlattr *start;
1964
1965	start = nla_nest_start_noflag(skb, OVS_KEY_ATTR_NSH);
1966	if (!start)
1967		return -EMSGSIZE;
1968
1969	if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
1970		goto nla_put_failure;
1971
1972	if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
1973		if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
1974			    sizeof(nsh->context), nsh->context))
1975			goto nla_put_failure;
1976	}
1977
1978	/* Don't support MD type 2 yet */
1979
1980	nla_nest_end(skb, start);
1981
1982	return 0;
1983
1984nla_put_failure:
1985	return -EMSGSIZE;
1986}
1987
1988static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1989			     const struct sw_flow_key *output, bool is_mask,
1990			     struct sk_buff *skb)
1991{
1992	struct ovs_key_ethernet *eth_key;
1993	struct nlattr *nla;
1994	struct nlattr *encap = NULL;
1995	struct nlattr *in_encap = NULL;
1996
1997	if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
1998		goto nla_put_failure;
1999
2000	if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
2001		goto nla_put_failure;
2002
2003	if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
2004		goto nla_put_failure;
2005
2006	if ((swkey->tun_proto || is_mask)) {
2007		const void *opts = NULL;
2008
2009		if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
2010			opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
2011
2012		if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
2013				     swkey->tun_opts_len, swkey->tun_proto, 0))
2014			goto nla_put_failure;
2015	}
2016
2017	if (swkey->phy.in_port == DP_MAX_PORTS) {
2018		if (is_mask && (output->phy.in_port == 0xffff))
2019			if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
2020				goto nla_put_failure;
2021	} else {
2022		u16 upper_u16;
2023		upper_u16 = !is_mask ? 0 : 0xffff;
2024
2025		if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
2026				(upper_u16 << 16) | output->phy.in_port))
2027			goto nla_put_failure;
2028	}
2029
2030	if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
2031		goto nla_put_failure;
2032
2033	if (ovs_ct_put_key(swkey, output, skb))
2034		goto nla_put_failure;
2035
2036	if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
2037		nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
2038		if (!nla)
2039			goto nla_put_failure;
2040
2041		eth_key = nla_data(nla);
2042		ether_addr_copy(eth_key->eth_src, output->eth.src);
2043		ether_addr_copy(eth_key->eth_dst, output->eth.dst);
2044
2045		if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
2046			if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
2047				goto nla_put_failure;
2048			encap = nla_nest_start_noflag(skb, OVS_KEY_ATTR_ENCAP);
2049			if (!swkey->eth.vlan.tci)
2050				goto unencap;
2051
2052			if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
2053				if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
2054					goto nla_put_failure;
2055				in_encap = nla_nest_start_noflag(skb,
2056								 OVS_KEY_ATTR_ENCAP);
2057				if (!swkey->eth.cvlan.tci)
2058					goto unencap;
2059			}
2060		}
2061
2062		if (swkey->eth.type == htons(ETH_P_802_2)) {
2063			/*
2064			* Ethertype 802.2 is represented in the netlink with omitted
2065			* OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
2066			* 0xffff in the mask attribute.  Ethertype can also
2067			* be wildcarded.
2068			*/
2069			if (is_mask && output->eth.type)
2070				if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
2071							output->eth.type))
2072					goto nla_put_failure;
2073			goto unencap;
2074		}
2075	}
2076
2077	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
2078		goto nla_put_failure;
2079
2080	if (eth_type_vlan(swkey->eth.type)) {
2081		/* There are 3 VLAN tags, we don't know anything about the rest
2082		 * of the packet, so truncate here.
2083		 */
2084		WARN_ON_ONCE(!(encap && in_encap));
2085		goto unencap;
2086	}
2087
2088	if (swkey->eth.type == htons(ETH_P_IP)) {
2089		struct ovs_key_ipv4 *ipv4_key;
2090
2091		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
2092		if (!nla)
2093			goto nla_put_failure;
2094		ipv4_key = nla_data(nla);
2095		ipv4_key->ipv4_src = output->ipv4.addr.src;
2096		ipv4_key->ipv4_dst = output->ipv4.addr.dst;
2097		ipv4_key->ipv4_proto = output->ip.proto;
2098		ipv4_key->ipv4_tos = output->ip.tos;
2099		ipv4_key->ipv4_ttl = output->ip.ttl;
2100		ipv4_key->ipv4_frag = output->ip.frag;
2101	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
2102		struct ovs_key_ipv6 *ipv6_key;
2103
2104		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
2105		if (!nla)
2106			goto nla_put_failure;
2107		ipv6_key = nla_data(nla);
2108		memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
2109				sizeof(ipv6_key->ipv6_src));
2110		memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
2111				sizeof(ipv6_key->ipv6_dst));
2112		ipv6_key->ipv6_label = output->ipv6.label;
2113		ipv6_key->ipv6_proto = output->ip.proto;
2114		ipv6_key->ipv6_tclass = output->ip.tos;
2115		ipv6_key->ipv6_hlimit = output->ip.ttl;
2116		ipv6_key->ipv6_frag = output->ip.frag;
2117	} else if (swkey->eth.type == htons(ETH_P_NSH)) {
2118		if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
2119			goto nla_put_failure;
2120	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
2121		   swkey->eth.type == htons(ETH_P_RARP)) {
2122		struct ovs_key_arp *arp_key;
2123
2124		nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
2125		if (!nla)
2126			goto nla_put_failure;
2127		arp_key = nla_data(nla);
2128		memset(arp_key, 0, sizeof(struct ovs_key_arp));
2129		arp_key->arp_sip = output->ipv4.addr.src;
2130		arp_key->arp_tip = output->ipv4.addr.dst;
2131		arp_key->arp_op = htons(output->ip.proto);
2132		ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
2133		ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
2134	} else if (eth_p_mpls(swkey->eth.type)) {
2135		u8 i, num_labels;
2136		struct ovs_key_mpls *mpls_key;
2137
2138		num_labels = hweight_long(output->mpls.num_labels_mask);
2139		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
2140				  num_labels * sizeof(*mpls_key));
2141		if (!nla)
2142			goto nla_put_failure;
2143
2144		mpls_key = nla_data(nla);
2145		for (i = 0; i < num_labels; i++)
2146			mpls_key[i].mpls_lse = output->mpls.lse[i];
2147	}
2148
2149	if ((swkey->eth.type == htons(ETH_P_IP) ||
2150	     swkey->eth.type == htons(ETH_P_IPV6)) &&
2151	     swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
2152
2153		if (swkey->ip.proto == IPPROTO_TCP) {
2154			struct ovs_key_tcp *tcp_key;
2155
2156			nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
2157			if (!nla)
2158				goto nla_put_failure;
2159			tcp_key = nla_data(nla);
2160			tcp_key->tcp_src = output->tp.src;
2161			tcp_key->tcp_dst = output->tp.dst;
2162			if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
2163					 output->tp.flags))
2164				goto nla_put_failure;
2165		} else if (swkey->ip.proto == IPPROTO_UDP) {
2166			struct ovs_key_udp *udp_key;
2167
2168			nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
2169			if (!nla)
2170				goto nla_put_failure;
2171			udp_key = nla_data(nla);
2172			udp_key->udp_src = output->tp.src;
2173			udp_key->udp_dst = output->tp.dst;
2174		} else if (swkey->ip.proto == IPPROTO_SCTP) {
2175			struct ovs_key_sctp *sctp_key;
2176
2177			nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
2178			if (!nla)
2179				goto nla_put_failure;
2180			sctp_key = nla_data(nla);
2181			sctp_key->sctp_src = output->tp.src;
2182			sctp_key->sctp_dst = output->tp.dst;
2183		} else if (swkey->eth.type == htons(ETH_P_IP) &&
2184			   swkey->ip.proto == IPPROTO_ICMP) {
2185			struct ovs_key_icmp *icmp_key;
2186
2187			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
2188			if (!nla)
2189				goto nla_put_failure;
2190			icmp_key = nla_data(nla);
2191			icmp_key->icmp_type = ntohs(output->tp.src);
2192			icmp_key->icmp_code = ntohs(output->tp.dst);
2193		} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
2194			   swkey->ip.proto == IPPROTO_ICMPV6) {
2195			struct ovs_key_icmpv6 *icmpv6_key;
2196
2197			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
2198						sizeof(*icmpv6_key));
2199			if (!nla)
2200				goto nla_put_failure;
2201			icmpv6_key = nla_data(nla);
2202			icmpv6_key->icmpv6_type = ntohs(output->tp.src);
2203			icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
2204
2205			if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
2206			    swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
2207				struct ovs_key_nd *nd_key;
2208
2209				nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
2210				if (!nla)
2211					goto nla_put_failure;
2212				nd_key = nla_data(nla);
2213				memcpy(nd_key->nd_target, &output->ipv6.nd.target,
2214							sizeof(nd_key->nd_target));
2215				ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
2216				ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
2217			}
2218		}
2219	}
2220
2221unencap:
2222	if (in_encap)
2223		nla_nest_end(skb, in_encap);
2224	if (encap)
2225		nla_nest_end(skb, encap);
2226
2227	return 0;
2228
2229nla_put_failure:
2230	return -EMSGSIZE;
2231}
2232
2233int ovs_nla_put_key(const struct sw_flow_key *swkey,
2234		    const struct sw_flow_key *output, int attr, bool is_mask,
2235		    struct sk_buff *skb)
2236{
2237	int err;
2238	struct nlattr *nla;
2239
2240	nla = nla_nest_start_noflag(skb, attr);
2241	if (!nla)
2242		return -EMSGSIZE;
2243	err = __ovs_nla_put_key(swkey, output, is_mask, skb);
2244	if (err)
2245		return err;
2246	nla_nest_end(skb, nla);
2247
2248	return 0;
2249}
2250
2251/* Called with ovs_mutex or RCU read lock. */
2252int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
2253{
2254	if (ovs_identifier_is_ufid(&flow->id))
2255		return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
2256			       flow->id.ufid);
2257
2258	return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
2259			       OVS_FLOW_ATTR_KEY, false, skb);
2260}
2261
2262/* Called with ovs_mutex or RCU read lock. */
2263int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
2264{
2265	return ovs_nla_put_key(&flow->key, &flow->key,
2266				OVS_FLOW_ATTR_KEY, false, skb);
2267}
2268
2269/* Called with ovs_mutex or RCU read lock. */
2270int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
2271{
2272	return ovs_nla_put_key(&flow->key, &flow->mask->key,
2273				OVS_FLOW_ATTR_MASK, true, skb);
2274}
2275
2276#define MAX_ACTIONS_BUFSIZE	(32 * 1024)
2277
2278static struct sw_flow_actions *nla_alloc_flow_actions(int size)
2279{
2280	struct sw_flow_actions *sfa;
2281
2282	WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
2283
2284	sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
2285	if (!sfa)
2286		return ERR_PTR(-ENOMEM);
2287
2288	sfa->actions_len = 0;
2289	return sfa;
2290}
2291
2292static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len);
2293
2294static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action)
2295{
2296	const struct nlattr *a;
2297	int rem;
2298
2299	nla_for_each_nested(a, action, rem) {
2300		switch (nla_type(a)) {
2301		case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL:
2302		case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER:
2303			ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
2304			break;
2305		}
2306	}
2307}
2308
2309static void ovs_nla_free_clone_action(const struct nlattr *action)
2310{
2311	const struct nlattr *a = nla_data(action);
2312	int rem = nla_len(action);
2313
2314	switch (nla_type(a)) {
2315	case OVS_CLONE_ATTR_EXEC:
2316		/* The real list of actions follows this attribute. */
2317		a = nla_next(a, &rem);
2318		ovs_nla_free_nested_actions(a, rem);
2319		break;
2320	}
2321}
2322
2323static void ovs_nla_free_dec_ttl_action(const struct nlattr *action)
2324{
2325	const struct nlattr *a = nla_data(action);
2326
2327	switch (nla_type(a)) {
2328	case OVS_DEC_TTL_ATTR_ACTION:
2329		ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
2330		break;
2331	}
2332}
2333
2334static void ovs_nla_free_sample_action(const struct nlattr *action)
2335{
2336	const struct nlattr *a = nla_data(action);
2337	int rem = nla_len(action);
2338
2339	switch (nla_type(a)) {
2340	case OVS_SAMPLE_ATTR_ARG:
2341		/* The real list of actions follows this attribute. */
2342		a = nla_next(a, &rem);
2343		ovs_nla_free_nested_actions(a, rem);
2344		break;
2345	}
2346}
2347
2348static void ovs_nla_free_set_action(const struct nlattr *a)
2349{
2350	const struct nlattr *ovs_key = nla_data(a);
2351	struct ovs_tunnel_info *ovs_tun;
2352
2353	switch (nla_type(ovs_key)) {
2354	case OVS_KEY_ATTR_TUNNEL_INFO:
2355		ovs_tun = nla_data(ovs_key);
2356		dst_release((struct dst_entry *)ovs_tun->tun_dst);
2357		break;
2358	}
2359}
2360
2361static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
2362{
2363	const struct nlattr *a;
2364	int rem;
2365
2366	/* Whenever new actions are added, the need to update this
2367	 * function should be considered.
2368	 */
2369	BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
2370
2371	if (!actions)
2372		return;
2373
2374	nla_for_each_attr(a, actions, len, rem) {
2375		switch (nla_type(a)) {
2376		case OVS_ACTION_ATTR_CHECK_PKT_LEN:
2377			ovs_nla_free_check_pkt_len_action(a);
2378			break;
2379
2380		case OVS_ACTION_ATTR_CLONE:
2381			ovs_nla_free_clone_action(a);
2382			break;
2383
2384		case OVS_ACTION_ATTR_CT:
2385			ovs_ct_free_action(a);
2386			break;
2387
2388		case OVS_ACTION_ATTR_DEC_TTL:
2389			ovs_nla_free_dec_ttl_action(a);
2390			break;
2391
2392		case OVS_ACTION_ATTR_SAMPLE:
2393			ovs_nla_free_sample_action(a);
2394			break;
2395
2396		case OVS_ACTION_ATTR_SET:
2397			ovs_nla_free_set_action(a);
2398			break;
2399		}
2400	}
2401}
2402
2403void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
2404{
2405	if (!sf_acts)
2406		return;
2407
2408	ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len);
2409	kfree(sf_acts);
2410}
2411
2412static void __ovs_nla_free_flow_actions(struct rcu_head *head)
2413{
2414	ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
2415}
2416
2417/* Schedules 'sf_acts' to be freed after the next RCU grace period.
2418 * The caller must hold rcu_read_lock for this to be sensible. */
2419void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
2420{
2421	call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
2422}
2423
2424static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
2425				       int attr_len, bool log)
2426{
2427
2428	struct sw_flow_actions *acts;
2429	int new_acts_size;
2430	size_t req_size = NLA_ALIGN(attr_len);
2431	int next_offset = offsetof(struct sw_flow_actions, actions) +
2432					(*sfa)->actions_len;
2433
2434	if (req_size <= (ksize(*sfa) - next_offset))
2435		goto out;
2436
2437	new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
2438
2439	if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
2440		if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
2441			OVS_NLERR(log, "Flow action size exceeds max %u",
2442				  MAX_ACTIONS_BUFSIZE);
2443			return ERR_PTR(-EMSGSIZE);
2444		}
2445		new_acts_size = MAX_ACTIONS_BUFSIZE;
2446	}
2447
2448	acts = nla_alloc_flow_actions(new_acts_size);
2449	if (IS_ERR(acts))
2450		return (void *)acts;
2451
2452	memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
2453	acts->actions_len = (*sfa)->actions_len;
2454	acts->orig_len = (*sfa)->orig_len;
2455	kfree(*sfa);
2456	*sfa = acts;
2457
2458out:
2459	(*sfa)->actions_len += req_size;
2460	return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
2461}
2462
2463static struct nlattr *__add_action(struct sw_flow_actions **sfa,
2464				   int attrtype, void *data, int len, bool log)
2465{
2466	struct nlattr *a;
2467
2468	a = reserve_sfa_size(sfa, nla_attr_size(len), log);
2469	if (IS_ERR(a))
2470		return a;
2471
2472	a->nla_type = attrtype;
2473	a->nla_len = nla_attr_size(len);
2474
2475	if (data)
2476		memcpy(nla_data(a), data, len);
2477	memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
2478
2479	return a;
2480}
2481
2482int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
2483		       int len, bool log)
2484{
2485	struct nlattr *a;
2486
2487	a = __add_action(sfa, attrtype, data, len, log);
2488
2489	return PTR_ERR_OR_ZERO(a);
2490}
2491
2492static inline int add_nested_action_start(struct sw_flow_actions **sfa,
2493					  int attrtype, bool log)
2494{
2495	int used = (*sfa)->actions_len;
2496	int err;
2497
2498	err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
2499	if (err)
2500		return err;
2501
2502	return used;
2503}
2504
2505static inline void add_nested_action_end(struct sw_flow_actions *sfa,
2506					 int st_offset)
2507{
2508	struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
2509							       st_offset);
2510
2511	a->nla_len = sfa->actions_len - st_offset;
2512}
2513
2514static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2515				  const struct sw_flow_key *key,
2516				  struct sw_flow_actions **sfa,
2517				  __be16 eth_type, __be16 vlan_tci,
2518				  u32 mpls_label_count, bool log,
2519				  u32 depth);
2520
2521static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
2522				    const struct sw_flow_key *key,
2523				    struct sw_flow_actions **sfa,
2524				    __be16 eth_type, __be16 vlan_tci,
2525				    u32 mpls_label_count, bool log, bool last,
2526				    u32 depth)
2527{
2528	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
2529	const struct nlattr *probability, *actions;
2530	const struct nlattr *a;
2531	int rem, start, err;
2532	struct sample_arg arg;
2533
2534	memset(attrs, 0, sizeof(attrs));
2535	nla_for_each_nested(a, attr, rem) {
2536		int type = nla_type(a);
2537		if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
2538			return -EINVAL;
2539		attrs[type] = a;
2540	}
2541	if (rem)
2542		return -EINVAL;
2543
2544	probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
2545	if (!probability || nla_len(probability) != sizeof(u32))
2546		return -EINVAL;
2547
2548	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
2549	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
2550		return -EINVAL;
2551
2552	/* validation done, copy sample action. */
2553	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
2554	if (start < 0)
2555		return start;
2556
2557	/* When both skb and flow may be changed, put the sample
2558	 * into a deferred fifo. On the other hand, if only skb
2559	 * may be modified, the actions can be executed in place.
2560	 *
2561	 * Do this analysis at the flow installation time.
2562	 * Set 'clone_action->exec' to true if the actions can be
2563	 * executed without being deferred.
2564	 *
2565	 * If the sample is the last action, it can always be excuted
2566	 * rather than deferred.
2567	 */
2568	arg.exec = last || !actions_may_change_flow(actions);
2569	arg.probability = nla_get_u32(probability);
2570
2571	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
2572				 log);
2573	if (err)
2574		return err;
2575
2576	err = __ovs_nla_copy_actions(net, actions, key, sfa,
2577				     eth_type, vlan_tci, mpls_label_count, log,
2578				     depth + 1);
2579
2580	if (err)
2581		return err;
2582
2583	add_nested_action_end(*sfa, start);
2584
2585	return 0;
2586}
2587
2588static int validate_and_copy_dec_ttl(struct net *net,
2589				     const struct nlattr *attr,
2590				     const struct sw_flow_key *key,
2591				     struct sw_flow_actions **sfa,
2592				     __be16 eth_type, __be16 vlan_tci,
2593				     u32 mpls_label_count, bool log,
2594				     u32 depth)
2595{
2596	const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
2597	int start, action_start, err, rem;
2598	const struct nlattr *a, *actions;
2599
2600	memset(attrs, 0, sizeof(attrs));
2601	nla_for_each_nested(a, attr, rem) {
2602		int type = nla_type(a);
2603
2604		/* Ignore unknown attributes to be future proof. */
2605		if (type > OVS_DEC_TTL_ATTR_MAX)
2606			continue;
2607
2608		if (!type || attrs[type])
2609			return -EINVAL;
2610
2611		attrs[type] = a;
2612	}
2613
2614	actions = attrs[OVS_DEC_TTL_ATTR_ACTION];
2615	if (rem || !actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
2616		return -EINVAL;
2617
2618	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
2619	if (start < 0)
2620		return start;
2621
2622	action_start = add_nested_action_start(sfa, OVS_DEC_TTL_ATTR_ACTION, log);
2623	if (action_start < 0)
2624		return action_start;
2625
2626	err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
2627				     vlan_tci, mpls_label_count, log,
2628				     depth + 1);
2629	if (err)
2630		return err;
2631
2632	add_nested_action_end(*sfa, action_start);
2633	add_nested_action_end(*sfa, start);
2634	return 0;
2635}
2636
2637static int validate_and_copy_clone(struct net *net,
2638				   const struct nlattr *attr,
2639				   const struct sw_flow_key *key,
2640				   struct sw_flow_actions **sfa,
2641				   __be16 eth_type, __be16 vlan_tci,
2642				   u32 mpls_label_count, bool log, bool last,
2643				   u32 depth)
2644{
2645	int start, err;
2646	u32 exec;
2647
2648	if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN)
2649		return -EINVAL;
2650
2651	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log);
2652	if (start < 0)
2653		return start;
2654
2655	exec = last || !actions_may_change_flow(attr);
2656
2657	err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec,
2658				 sizeof(exec), log);
2659	if (err)
2660		return err;
2661
2662	err = __ovs_nla_copy_actions(net, attr, key, sfa,
2663				     eth_type, vlan_tci, mpls_label_count, log,
2664				     depth + 1);
2665	if (err)
2666		return err;
2667
2668	add_nested_action_end(*sfa, start);
2669
2670	return 0;
2671}
2672
2673void ovs_match_init(struct sw_flow_match *match,
2674		    struct sw_flow_key *key,
2675		    bool reset_key,
2676		    struct sw_flow_mask *mask)
2677{
2678	memset(match, 0, sizeof(*match));
2679	match->key = key;
2680	match->mask = mask;
2681
2682	if (reset_key)
2683		memset(key, 0, sizeof(*key));
2684
2685	if (mask) {
2686		memset(&mask->key, 0, sizeof(mask->key));
2687		mask->range.start = mask->range.end = 0;
2688	}
2689}
2690
2691static int validate_geneve_opts(struct sw_flow_key *key)
2692{
2693	struct geneve_opt *option;
2694	int opts_len = key->tun_opts_len;
2695	bool crit_opt = false;
2696
2697	option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
2698	while (opts_len > 0) {
2699		int len;
2700
2701		if (opts_len < sizeof(*option))
2702			return -EINVAL;
2703
2704		len = sizeof(*option) + option->length * 4;
2705		if (len > opts_len)
2706			return -EINVAL;
2707
2708		crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
2709
2710		option = (struct geneve_opt *)((u8 *)option + len);
2711		opts_len -= len;
2712	}
2713
2714	key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
2715
2716	return 0;
2717}
2718
2719static int validate_and_copy_set_tun(const struct nlattr *attr,
2720				     struct sw_flow_actions **sfa, bool log)
2721{
2722	struct sw_flow_match match;
2723	struct sw_flow_key key;
2724	struct metadata_dst *tun_dst;
2725	struct ip_tunnel_info *tun_info;
2726	struct ovs_tunnel_info *ovs_tun;
2727	struct nlattr *a;
2728	int err = 0, start, opts_type;
2729	__be16 dst_opt_type;
2730
2731	dst_opt_type = 0;
2732	ovs_match_init(&match, &key, true, NULL);
2733	opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
2734	if (opts_type < 0)
2735		return opts_type;
2736
2737	if (key.tun_opts_len) {
2738		switch (opts_type) {
2739		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
2740			err = validate_geneve_opts(&key);
2741			if (err < 0)
2742				return err;
2743			dst_opt_type = TUNNEL_GENEVE_OPT;
2744			break;
2745		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2746			dst_opt_type = TUNNEL_VXLAN_OPT;
2747			break;
2748		case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
2749			dst_opt_type = TUNNEL_ERSPAN_OPT;
2750			break;
2751		}
2752	}
2753
2754	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
2755	if (start < 0)
2756		return start;
2757
2758	tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL,
2759				     GFP_KERNEL);
2760
2761	if (!tun_dst)
2762		return -ENOMEM;
2763
2764	err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
2765	if (err) {
2766		dst_release((struct dst_entry *)tun_dst);
2767		return err;
2768	}
2769
2770	a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
2771			 sizeof(*ovs_tun), log);
2772	if (IS_ERR(a)) {
2773		dst_release((struct dst_entry *)tun_dst);
2774		return PTR_ERR(a);
2775	}
2776
2777	ovs_tun = nla_data(a);
2778	ovs_tun->tun_dst = tun_dst;
2779
2780	tun_info = &tun_dst->u.tun_info;
2781	tun_info->mode = IP_TUNNEL_INFO_TX;
2782	if (key.tun_proto == AF_INET6)
2783		tun_info->mode |= IP_TUNNEL_INFO_IPV6;
2784	else if (key.tun_proto == AF_INET && key.tun_key.u.ipv4.dst == 0)
2785		tun_info->mode |= IP_TUNNEL_INFO_BRIDGE;
2786	tun_info->key = key.tun_key;
2787
2788	/* We need to store the options in the action itself since
2789	 * everything else will go away after flow setup. We can append
2790	 * it to tun_info and then point there.
2791	 */
2792	ip_tunnel_info_opts_set(tun_info,
2793				TUN_METADATA_OPTS(&key, key.tun_opts_len),
2794				key.tun_opts_len, dst_opt_type);
2795	add_nested_action_end(*sfa, start);
2796
2797	return err;
2798}
2799
2800static bool validate_nsh(const struct nlattr *attr, bool is_mask,
2801			 bool is_push_nsh, bool log)
2802{
2803	struct sw_flow_match match;
2804	struct sw_flow_key key;
2805	int ret = 0;
2806
2807	ovs_match_init(&match, &key, true, NULL);
2808	ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
2809				      is_push_nsh, log);
2810	return !ret;
2811}
2812
2813/* Return false if there are any non-masked bits set.
2814 * Mask follows data immediately, before any netlink padding.
2815 */
2816static bool validate_masked(u8 *data, int len)
2817{
2818	u8 *mask = data + len;
2819
2820	while (len--)
2821		if (*data++ & ~*mask++)
2822			return false;
2823
2824	return true;
2825}
2826
2827static int validate_set(const struct nlattr *a,
2828			const struct sw_flow_key *flow_key,
2829			struct sw_flow_actions **sfa, bool *skip_copy,
2830			u8 mac_proto, __be16 eth_type, bool masked, bool log)
2831{
2832	const struct nlattr *ovs_key = nla_data(a);
2833	int key_type = nla_type(ovs_key);
2834	size_t key_len;
2835
2836	/* There can be only one key in a action */
2837	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
2838		return -EINVAL;
2839
2840	key_len = nla_len(ovs_key);
2841	if (masked)
2842		key_len /= 2;
2843
2844	if (key_type > OVS_KEY_ATTR_MAX ||
2845	    !check_attr_len(key_len, ovs_key_lens[key_type].len))
2846		return -EINVAL;
2847
2848	if (masked && !validate_masked(nla_data(ovs_key), key_len))
2849		return -EINVAL;
2850
2851	switch (key_type) {
2852	case OVS_KEY_ATTR_PRIORITY:
2853	case OVS_KEY_ATTR_SKB_MARK:
2854	case OVS_KEY_ATTR_CT_MARK:
2855	case OVS_KEY_ATTR_CT_LABELS:
2856		break;
2857
2858	case OVS_KEY_ATTR_ETHERNET:
2859		if (mac_proto != MAC_PROTO_ETHERNET)
2860			return -EINVAL;
2861		break;
2862
2863	case OVS_KEY_ATTR_TUNNEL: {
2864		int err;
2865
2866		if (masked)
2867			return -EINVAL; /* Masked tunnel set not supported. */
2868
2869		*skip_copy = true;
2870		err = validate_and_copy_set_tun(a, sfa, log);
2871		if (err)
2872			return err;
2873		break;
2874	}
2875	case OVS_KEY_ATTR_IPV4: {
2876		const struct ovs_key_ipv4 *ipv4_key;
2877
2878		if (eth_type != htons(ETH_P_IP))
2879			return -EINVAL;
2880
2881		ipv4_key = nla_data(ovs_key);
2882
2883		if (masked) {
2884			const struct ovs_key_ipv4 *mask = ipv4_key + 1;
2885
2886			/* Non-writeable fields. */
2887			if (mask->ipv4_proto || mask->ipv4_frag)
2888				return -EINVAL;
2889		} else {
2890			if (ipv4_key->ipv4_proto != flow_key->ip.proto)
2891				return -EINVAL;
2892
2893			if (ipv4_key->ipv4_frag != flow_key->ip.frag)
2894				return -EINVAL;
2895		}
2896		break;
2897	}
2898	case OVS_KEY_ATTR_IPV6: {
2899		const struct ovs_key_ipv6 *ipv6_key;
2900
2901		if (eth_type != htons(ETH_P_IPV6))
2902			return -EINVAL;
2903
2904		ipv6_key = nla_data(ovs_key);
2905
2906		if (masked) {
2907			const struct ovs_key_ipv6 *mask = ipv6_key + 1;
2908
2909			/* Non-writeable fields. */
2910			if (mask->ipv6_proto || mask->ipv6_frag)
2911				return -EINVAL;
2912
2913			/* Invalid bits in the flow label mask? */
2914			if (ntohl(mask->ipv6_label) & 0xFFF00000)
2915				return -EINVAL;
2916		} else {
2917			if (ipv6_key->ipv6_proto != flow_key->ip.proto)
2918				return -EINVAL;
2919
2920			if (ipv6_key->ipv6_frag != flow_key->ip.frag)
2921				return -EINVAL;
2922		}
2923		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
2924			return -EINVAL;
2925
2926		break;
2927	}
2928	case OVS_KEY_ATTR_TCP:
2929		if ((eth_type != htons(ETH_P_IP) &&
2930		     eth_type != htons(ETH_P_IPV6)) ||
2931		    flow_key->ip.proto != IPPROTO_TCP)
2932			return -EINVAL;
2933
2934		break;
2935
2936	case OVS_KEY_ATTR_UDP:
2937		if ((eth_type != htons(ETH_P_IP) &&
2938		     eth_type != htons(ETH_P_IPV6)) ||
2939		    flow_key->ip.proto != IPPROTO_UDP)
2940			return -EINVAL;
2941
2942		break;
2943
2944	case OVS_KEY_ATTR_MPLS:
2945		if (!eth_p_mpls(eth_type))
2946			return -EINVAL;
2947		break;
2948
2949	case OVS_KEY_ATTR_SCTP:
2950		if ((eth_type != htons(ETH_P_IP) &&
2951		     eth_type != htons(ETH_P_IPV6)) ||
2952		    flow_key->ip.proto != IPPROTO_SCTP)
2953			return -EINVAL;
2954
2955		break;
2956
2957	case OVS_KEY_ATTR_NSH:
2958		if (eth_type != htons(ETH_P_NSH))
2959			return -EINVAL;
2960		if (!validate_nsh(nla_data(a), masked, false, log))
2961			return -EINVAL;
2962		break;
2963
2964	default:
2965		return -EINVAL;
2966	}
2967
2968	/* Convert non-masked non-tunnel set actions to masked set actions. */
2969	if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
2970		int start, len = key_len * 2;
2971		struct nlattr *at;
2972
2973		*skip_copy = true;
2974
2975		start = add_nested_action_start(sfa,
2976						OVS_ACTION_ATTR_SET_TO_MASKED,
2977						log);
2978		if (start < 0)
2979			return start;
2980
2981		at = __add_action(sfa, key_type, NULL, len, log);
2982		if (IS_ERR(at))
2983			return PTR_ERR(at);
2984
2985		memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
2986		memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
2987		/* Clear non-writeable bits from otherwise writeable fields. */
2988		if (key_type == OVS_KEY_ATTR_IPV6) {
2989			struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
2990
2991			mask->ipv6_label &= htonl(0x000FFFFF);
2992		}
2993		add_nested_action_end(*sfa, start);
2994	}
2995
2996	return 0;
2997}
2998
2999static int validate_userspace(const struct nlattr *attr)
3000{
3001	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
3002		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
3003		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
3004		[OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
3005	};
3006	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
3007	int error;
3008
3009	error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr,
3010					    userspace_policy, NULL);
3011	if (error)
3012		return error;
3013
3014	if (!a[OVS_USERSPACE_ATTR_PID] ||
3015	    !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
3016		return -EINVAL;
3017
3018	return 0;
3019}
3020
3021static const struct nla_policy cpl_policy[OVS_CHECK_PKT_LEN_ATTR_MAX + 1] = {
3022	[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = {.type = NLA_U16 },
3023	[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = {.type = NLA_NESTED },
3024	[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] = {.type = NLA_NESTED },
3025};
3026
3027static int validate_and_copy_check_pkt_len(struct net *net,
3028					   const struct nlattr *attr,
3029					   const struct sw_flow_key *key,
3030					   struct sw_flow_actions **sfa,
3031					   __be16 eth_type, __be16 vlan_tci,
3032					   u32 mpls_label_count,
3033					   bool log, bool last, u32 depth)
3034{
3035	const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
3036	struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
3037	struct check_pkt_len_arg arg;
3038	int nested_acts_start;
3039	int start, err;
3040
3041	err = nla_parse_deprecated_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX,
3042					  nla_data(attr), nla_len(attr),
3043					  cpl_policy, NULL);
3044	if (err)
3045		return err;
3046
3047	if (!a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] ||
3048	    !nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]))
3049		return -EINVAL;
3050
3051	acts_if_lesser_eq = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL];
3052	acts_if_greater = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER];
3053
3054	/* Both the nested action should be present. */
3055	if (!acts_if_greater || !acts_if_lesser_eq)
3056		return -EINVAL;
3057
3058	/* validation done, copy the nested actions. */
3059	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CHECK_PKT_LEN,
3060					log);
3061	if (start < 0)
3062		return start;
3063
3064	arg.pkt_len = nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]);
3065	arg.exec_for_lesser_equal =
3066		last || !actions_may_change_flow(acts_if_lesser_eq);
3067	arg.exec_for_greater =
3068		last || !actions_may_change_flow(acts_if_greater);
3069
3070	err = ovs_nla_add_action(sfa, OVS_CHECK_PKT_LEN_ATTR_ARG, &arg,
3071				 sizeof(arg), log);
3072	if (err)
3073		return err;
3074
3075	nested_acts_start = add_nested_action_start(sfa,
3076		OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, log);
3077	if (nested_acts_start < 0)
3078		return nested_acts_start;
3079
3080	err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
3081				     eth_type, vlan_tci, mpls_label_count, log,
3082				     depth + 1);
3083
3084	if (err)
3085		return err;
3086
3087	add_nested_action_end(*sfa, nested_acts_start);
3088
3089	nested_acts_start = add_nested_action_start(sfa,
3090		OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, log);
3091	if (nested_acts_start < 0)
3092		return nested_acts_start;
3093
3094	err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
3095				     eth_type, vlan_tci, mpls_label_count, log,
3096				     depth + 1);
3097
3098	if (err)
3099		return err;
3100
3101	add_nested_action_end(*sfa, nested_acts_start);
3102	add_nested_action_end(*sfa, start);
3103	return 0;
3104}
3105
3106static int copy_action(const struct nlattr *from,
3107		       struct sw_flow_actions **sfa, bool log)
3108{
3109	int totlen = NLA_ALIGN(from->nla_len);
3110	struct nlattr *to;
3111
3112	to = reserve_sfa_size(sfa, from->nla_len, log);
3113	if (IS_ERR(to))
3114		return PTR_ERR(to);
3115
3116	memcpy(to, from, totlen);
3117	return 0;
3118}
3119
3120static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
3121				  const struct sw_flow_key *key,
3122				  struct sw_flow_actions **sfa,
3123				  __be16 eth_type, __be16 vlan_tci,
3124				  u32 mpls_label_count, bool log,
3125				  u32 depth)
3126{
3127	u8 mac_proto = ovs_key_mac_proto(key);
3128	const struct nlattr *a;
3129	int rem, err;
3130
3131	if (depth > OVS_COPY_ACTIONS_MAX_DEPTH)
3132		return -EOVERFLOW;
3133
3134	nla_for_each_nested(a, attr, rem) {
3135		/* Expected argument lengths, (u32)-1 for variable length. */
3136		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
3137			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
3138			[OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
3139			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
3140			[OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
3141			[OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
3142			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
3143			[OVS_ACTION_ATTR_POP_VLAN] = 0,
3144			[OVS_ACTION_ATTR_SET] = (u32)-1,
3145			[OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
3146			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
3147			[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
3148			[OVS_ACTION_ATTR_CT] = (u32)-1,
3149			[OVS_ACTION_ATTR_CT_CLEAR] = 0,
3150			[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
3151			[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
3152			[OVS_ACTION_ATTR_POP_ETH] = 0,
3153			[OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
3154			[OVS_ACTION_ATTR_POP_NSH] = 0,
3155			[OVS_ACTION_ATTR_METER] = sizeof(u32),
3156			[OVS_ACTION_ATTR_CLONE] = (u32)-1,
3157			[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
3158			[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
3159			[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
3160		};
3161		const struct ovs_action_push_vlan *vlan;
3162		int type = nla_type(a);
3163		bool skip_copy;
3164
3165		if (type > OVS_ACTION_ATTR_MAX ||
3166		    (action_lens[type] != nla_len(a) &&
3167		     action_lens[type] != (u32)-1))
3168			return -EINVAL;
3169
3170		skip_copy = false;
3171		switch (type) {
3172		case OVS_ACTION_ATTR_UNSPEC:
3173			return -EINVAL;
3174
3175		case OVS_ACTION_ATTR_USERSPACE:
3176			err = validate_userspace(a);
3177			if (err)
3178				return err;
3179			break;
3180
3181		case OVS_ACTION_ATTR_OUTPUT:
3182			if (nla_get_u32(a) >= DP_MAX_PORTS)
3183				return -EINVAL;
3184			break;
3185
3186		case OVS_ACTION_ATTR_TRUNC: {
3187			const struct ovs_action_trunc *trunc = nla_data(a);
3188
3189			if (trunc->max_len < ETH_HLEN)
3190				return -EINVAL;
3191			break;
3192		}
3193
3194		case OVS_ACTION_ATTR_HASH: {
3195			const struct ovs_action_hash *act_hash = nla_data(a);
3196
3197			switch (act_hash->hash_alg) {
3198			case OVS_HASH_ALG_L4:
3199				break;
3200			default:
3201				return  -EINVAL;
3202			}
3203
3204			break;
3205		}
3206
3207		case OVS_ACTION_ATTR_POP_VLAN:
3208			if (mac_proto != MAC_PROTO_ETHERNET)
3209				return -EINVAL;
3210			vlan_tci = htons(0);
3211			break;
3212
3213		case OVS_ACTION_ATTR_PUSH_VLAN:
3214			if (mac_proto != MAC_PROTO_ETHERNET)
3215				return -EINVAL;
3216			vlan = nla_data(a);
3217			if (!eth_type_vlan(vlan->vlan_tpid))
3218				return -EINVAL;
3219			if (!(vlan->vlan_tci & htons(VLAN_CFI_MASK)))
3220				return -EINVAL;
3221			vlan_tci = vlan->vlan_tci;
3222			break;
3223
3224		case OVS_ACTION_ATTR_RECIRC:
3225			break;
3226
3227		case OVS_ACTION_ATTR_ADD_MPLS: {
3228			const struct ovs_action_add_mpls *mpls = nla_data(a);
3229
3230			if (!eth_p_mpls(mpls->mpls_ethertype))
3231				return -EINVAL;
3232
3233			if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK) {
3234				if (vlan_tci & htons(VLAN_CFI_MASK) ||
3235				    (eth_type != htons(ETH_P_IP) &&
3236				     eth_type != htons(ETH_P_IPV6) &&
3237				     eth_type != htons(ETH_P_ARP) &&
3238				     eth_type != htons(ETH_P_RARP) &&
3239				     !eth_p_mpls(eth_type)))
3240					return -EINVAL;
3241				mpls_label_count++;
3242			} else {
3243				if (mac_proto == MAC_PROTO_ETHERNET) {
3244					mpls_label_count = 1;
3245					mac_proto = MAC_PROTO_NONE;
3246				} else {
3247					mpls_label_count++;
3248				}
3249			}
3250			eth_type = mpls->mpls_ethertype;
3251			break;
3252		}
3253
3254		case OVS_ACTION_ATTR_PUSH_MPLS: {
3255			const struct ovs_action_push_mpls *mpls = nla_data(a);
3256
3257			if (!eth_p_mpls(mpls->mpls_ethertype))
3258				return -EINVAL;
3259			/* Prohibit push MPLS other than to a white list
3260			 * for packets that have a known tag order.
3261			 */
3262			if (vlan_tci & htons(VLAN_CFI_MASK) ||
3263			    (eth_type != htons(ETH_P_IP) &&
3264			     eth_type != htons(ETH_P_IPV6) &&
3265			     eth_type != htons(ETH_P_ARP) &&
3266			     eth_type != htons(ETH_P_RARP) &&
3267			     !eth_p_mpls(eth_type)))
3268				return -EINVAL;
3269			eth_type = mpls->mpls_ethertype;
3270			mpls_label_count++;
3271			break;
3272		}
3273
3274		case OVS_ACTION_ATTR_POP_MPLS: {
3275			__be16  proto;
3276			if (vlan_tci & htons(VLAN_CFI_MASK) ||
3277			    !eth_p_mpls(eth_type))
3278				return -EINVAL;
3279
3280			/* Disallow subsequent L2.5+ set actions and mpls_pop
3281			 * actions once the last MPLS label in the packet is
3282			 * is popped as there is no check here to ensure that
3283			 * the new eth type is valid and thus set actions could
3284			 * write off the end of the packet or otherwise corrupt
3285			 * it.
3286			 *
3287			 * Support for these actions is planned using packet
3288			 * recirculation.
3289			 */
3290			proto = nla_get_be16(a);
3291
3292			if (proto == htons(ETH_P_TEB) &&
3293			    mac_proto != MAC_PROTO_NONE)
3294				return -EINVAL;
3295
3296			mpls_label_count--;
3297
3298			if (!eth_p_mpls(proto) || !mpls_label_count)
3299				eth_type = htons(0);
3300			else
3301				eth_type =  proto;
3302
3303			break;
3304		}
3305
3306		case OVS_ACTION_ATTR_SET:
3307			err = validate_set(a, key, sfa,
3308					   &skip_copy, mac_proto, eth_type,
3309					   false, log);
3310			if (err)
3311				return err;
3312			break;
3313
3314		case OVS_ACTION_ATTR_SET_MASKED:
3315			err = validate_set(a, key, sfa,
3316					   &skip_copy, mac_proto, eth_type,
3317					   true, log);
3318			if (err)
3319				return err;
3320			break;
3321
3322		case OVS_ACTION_ATTR_SAMPLE: {
3323			bool last = nla_is_last(a, rem);
3324
3325			err = validate_and_copy_sample(net, a, key, sfa,
3326						       eth_type, vlan_tci,
3327						       mpls_label_count,
3328						       log, last, depth);
3329			if (err)
3330				return err;
3331			skip_copy = true;
3332			break;
3333		}
3334
3335		case OVS_ACTION_ATTR_CT:
3336			err = ovs_ct_copy_action(net, a, key, sfa, log);
3337			if (err)
3338				return err;
3339			skip_copy = true;
3340			break;
3341
3342		case OVS_ACTION_ATTR_CT_CLEAR:
3343			break;
3344
3345		case OVS_ACTION_ATTR_PUSH_ETH:
3346			/* Disallow pushing an Ethernet header if one
3347			 * is already present */
3348			if (mac_proto != MAC_PROTO_NONE)
3349				return -EINVAL;
3350			mac_proto = MAC_PROTO_ETHERNET;
3351			break;
3352
3353		case OVS_ACTION_ATTR_POP_ETH:
3354			if (mac_proto != MAC_PROTO_ETHERNET)
3355				return -EINVAL;
3356			if (vlan_tci & htons(VLAN_CFI_MASK))
3357				return -EINVAL;
3358			mac_proto = MAC_PROTO_NONE;
3359			break;
3360
3361		case OVS_ACTION_ATTR_PUSH_NSH:
3362			if (mac_proto != MAC_PROTO_ETHERNET) {
3363				u8 next_proto;
3364
3365				next_proto = tun_p_from_eth_p(eth_type);
3366				if (!next_proto)
3367					return -EINVAL;
3368			}
3369			mac_proto = MAC_PROTO_NONE;
3370			if (!validate_nsh(nla_data(a), false, true, true))
3371				return -EINVAL;
3372			break;
3373
3374		case OVS_ACTION_ATTR_POP_NSH: {
3375			__be16 inner_proto;
3376
3377			if (eth_type != htons(ETH_P_NSH))
3378				return -EINVAL;
3379			inner_proto = tun_p_to_eth_p(key->nsh.base.np);
3380			if (!inner_proto)
3381				return -EINVAL;
3382			if (key->nsh.base.np == TUN_P_ETHERNET)
3383				mac_proto = MAC_PROTO_ETHERNET;
3384			else
3385				mac_proto = MAC_PROTO_NONE;
3386			break;
3387		}
3388
3389		case OVS_ACTION_ATTR_METER:
3390			/* Non-existent meters are simply ignored.  */
3391			break;
3392
3393		case OVS_ACTION_ATTR_CLONE: {
3394			bool last = nla_is_last(a, rem);
3395
3396			err = validate_and_copy_clone(net, a, key, sfa,
3397						      eth_type, vlan_tci,
3398						      mpls_label_count,
3399						      log, last, depth);
3400			if (err)
3401				return err;
3402			skip_copy = true;
3403			break;
3404		}
3405
3406		case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
3407			bool last = nla_is_last(a, rem);
3408
3409			err = validate_and_copy_check_pkt_len(net, a, key, sfa,
3410							      eth_type,
3411							      vlan_tci,
3412							      mpls_label_count,
3413							      log, last,
3414							      depth);
3415			if (err)
3416				return err;
3417			skip_copy = true;
3418			break;
3419		}
3420
3421		case OVS_ACTION_ATTR_DEC_TTL:
3422			err = validate_and_copy_dec_ttl(net, a, key, sfa,
3423							eth_type, vlan_tci,
3424							mpls_label_count, log,
3425							depth);
3426			if (err)
3427				return err;
3428			skip_copy = true;
3429			break;
3430
3431		default:
3432			OVS_NLERR(log, "Unknown Action type %d", type);
3433			return -EINVAL;
3434		}
3435		if (!skip_copy) {
3436			err = copy_action(a, sfa, log);
3437			if (err)
3438				return err;
3439		}
3440	}
3441
3442	if (rem > 0)
3443		return -EINVAL;
3444
3445	return 0;
3446}
3447
3448/* 'key' must be the masked key. */
3449int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
3450			 const struct sw_flow_key *key,
3451			 struct sw_flow_actions **sfa, bool log)
3452{
3453	int err;
3454	u32 mpls_label_count = 0;
3455
3456	*sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
3457	if (IS_ERR(*sfa))
3458		return PTR_ERR(*sfa);
3459
3460	if (eth_p_mpls(key->eth.type))
3461		mpls_label_count = hweight_long(key->mpls.num_labels_mask);
3462
3463	(*sfa)->orig_len = nla_len(attr);
3464	err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
3465				     key->eth.vlan.tci, mpls_label_count, log,
3466				     0);
3467	if (err)
3468		ovs_nla_free_flow_actions(*sfa);
3469
3470	return err;
3471}
3472
3473static int sample_action_to_attr(const struct nlattr *attr,
3474				 struct sk_buff *skb)
3475{
3476	struct nlattr *start, *ac_start = NULL, *sample_arg;
3477	int err = 0, rem = nla_len(attr);
3478	const struct sample_arg *arg;
3479	struct nlattr *actions;
3480
3481	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE);
3482	if (!start)
3483		return -EMSGSIZE;
3484
3485	sample_arg = nla_data(attr);
3486	arg = nla_data(sample_arg);
3487	actions = nla_next(sample_arg, &rem);
3488
3489	if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
3490		err = -EMSGSIZE;
3491		goto out;
3492	}
3493
3494	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
3495	if (!ac_start) {
3496		err = -EMSGSIZE;
3497		goto out;
3498	}
3499
3500	err = ovs_nla_put_actions(actions, rem, skb);
3501
3502out:
3503	if (err) {
3504		nla_nest_cancel(skb, ac_start);
3505		nla_nest_cancel(skb, start);
3506	} else {
3507		nla_nest_end(skb, ac_start);
3508		nla_nest_end(skb, start);
3509	}
3510
3511	return err;
3512}
3513
3514static int clone_action_to_attr(const struct nlattr *attr,
3515				struct sk_buff *skb)
3516{
3517	struct nlattr *start;
3518	int err = 0, rem = nla_len(attr);
3519
3520	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CLONE);
3521	if (!start)
3522		return -EMSGSIZE;
3523
3524	/* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */
3525	attr = nla_next(nla_data(attr), &rem);
3526	err = ovs_nla_put_actions(attr, rem, skb);
3527
3528	if (err)
3529		nla_nest_cancel(skb, start);
3530	else
3531		nla_nest_end(skb, start);
3532
3533	return err;
3534}
3535
3536static int check_pkt_len_action_to_attr(const struct nlattr *attr,
3537					struct sk_buff *skb)
3538{
3539	struct nlattr *start, *ac_start = NULL;
3540	const struct check_pkt_len_arg *arg;
3541	const struct nlattr *a, *cpl_arg;
3542	int err = 0, rem = nla_len(attr);
3543
3544	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN);
3545	if (!start)
3546		return -EMSGSIZE;
3547
3548	/* The first nested attribute in 'attr' is always
3549	 * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
3550	 */
3551	cpl_arg = nla_data(attr);
3552	arg = nla_data(cpl_arg);
3553
3554	if (nla_put_u16(skb, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, arg->pkt_len)) {
3555		err = -EMSGSIZE;
3556		goto out;
3557	}
3558
3559	/* Second nested attribute in 'attr' is always
3560	 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
3561	 */
3562	a = nla_next(cpl_arg, &rem);
3563	ac_start =  nla_nest_start_noflag(skb,
3564					  OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL);
3565	if (!ac_start) {
3566		err = -EMSGSIZE;
3567		goto out;
3568	}
3569
3570	err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
3571	if (err) {
3572		nla_nest_cancel(skb, ac_start);
3573		goto out;
3574	} else {
3575		nla_nest_end(skb, ac_start);
3576	}
3577
3578	/* Third nested attribute in 'attr' is always
3579	 * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER.
3580	 */
3581	a = nla_next(a, &rem);
3582	ac_start =  nla_nest_start_noflag(skb,
3583					  OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER);
3584	if (!ac_start) {
3585		err = -EMSGSIZE;
3586		goto out;
3587	}
3588
3589	err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
3590	if (err) {
3591		nla_nest_cancel(skb, ac_start);
3592		goto out;
3593	} else {
3594		nla_nest_end(skb, ac_start);
3595	}
3596
3597	nla_nest_end(skb, start);
3598	return 0;
3599
3600out:
3601	nla_nest_cancel(skb, start);
3602	return err;
3603}
3604
3605static int dec_ttl_action_to_attr(const struct nlattr *attr,
3606				  struct sk_buff *skb)
3607{
3608	struct nlattr *start, *action_start;
3609	const struct nlattr *a;
3610	int err = 0, rem;
3611
3612	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
3613	if (!start)
3614		return -EMSGSIZE;
3615
3616	nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
3617		switch (nla_type(a)) {
3618		case OVS_DEC_TTL_ATTR_ACTION:
3619
3620			action_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
3621			if (!action_start) {
3622				err = -EMSGSIZE;
3623				goto out;
3624			}
3625
3626			err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
3627			if (err)
3628				goto out;
3629
3630			nla_nest_end(skb, action_start);
3631			break;
3632
3633		default:
3634			/* Ignore all other option to be future compatible */
3635			break;
3636		}
3637	}
3638
3639	nla_nest_end(skb, start);
3640	return 0;
3641
3642out:
3643	nla_nest_cancel(skb, start);
3644	return err;
3645}
3646
3647static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
3648{
3649	const struct nlattr *ovs_key = nla_data(a);
3650	int key_type = nla_type(ovs_key);
3651	struct nlattr *start;
3652	int err;
3653
3654	switch (key_type) {
3655	case OVS_KEY_ATTR_TUNNEL_INFO: {
3656		struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
3657		struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
3658
3659		start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET);
3660		if (!start)
3661			return -EMSGSIZE;
3662
3663		err =  ip_tun_to_nlattr(skb, &tun_info->key,
3664					ip_tunnel_info_opts(tun_info),
3665					tun_info->options_len,
3666					ip_tunnel_info_af(tun_info), tun_info->mode);
3667		if (err)
3668			return err;
3669		nla_nest_end(skb, start);
3670		break;
3671	}
3672	default:
3673		if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
3674			return -EMSGSIZE;
3675		break;
3676	}
3677
3678	return 0;
3679}
3680
3681static int masked_set_action_to_set_action_attr(const struct nlattr *a,
3682						struct sk_buff *skb)
3683{
3684	const struct nlattr *ovs_key = nla_data(a);
3685	struct nlattr *nla;
3686	size_t key_len = nla_len(ovs_key) / 2;
3687
3688	/* Revert the conversion we did from a non-masked set action to
3689	 * masked set action.
3690	 */
3691	nla = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET);
3692	if (!nla)
3693		return -EMSGSIZE;
3694
3695	if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
3696		return -EMSGSIZE;
3697
3698	nla_nest_end(skb, nla);
3699	return 0;
3700}
3701
3702int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
3703{
3704	const struct nlattr *a;
3705	int rem, err;
3706
3707	nla_for_each_attr(a, attr, len, rem) {
3708		int type = nla_type(a);
3709
3710		switch (type) {
3711		case OVS_ACTION_ATTR_SET:
3712			err = set_action_to_attr(a, skb);
3713			if (err)
3714				return err;
3715			break;
3716
3717		case OVS_ACTION_ATTR_SET_TO_MASKED:
3718			err = masked_set_action_to_set_action_attr(a, skb);
3719			if (err)
3720				return err;
3721			break;
3722
3723		case OVS_ACTION_ATTR_SAMPLE:
3724			err = sample_action_to_attr(a, skb);
3725			if (err)
3726				return err;
3727			break;
3728
3729		case OVS_ACTION_ATTR_CT:
3730			err = ovs_ct_action_to_attr(nla_data(a), skb);
3731			if (err)
3732				return err;
3733			break;
3734
3735		case OVS_ACTION_ATTR_CLONE:
3736			err = clone_action_to_attr(a, skb);
3737			if (err)
3738				return err;
3739			break;
3740
3741		case OVS_ACTION_ATTR_CHECK_PKT_LEN:
3742			err = check_pkt_len_action_to_attr(a, skb);
3743			if (err)
3744				return err;
3745			break;
3746
3747		case OVS_ACTION_ATTR_DEC_TTL:
3748			err = dec_ttl_action_to_attr(a, skb);
3749			if (err)
3750				return err;
3751			break;
3752
3753		default:
3754			if (nla_put(skb, type, nla_len(a), nla_data(a)))
3755				return -EMSGSIZE;
3756			break;
3757		}
3758	}
3759
3760	return 0;
3761}
3762