xref: /kernel/linux/linux-5.10/include/net/ip_vs.h (revision 8c2ecf20)
1/* SPDX-License-Identifier: GPL-2.0 */
2/* IP Virtual Server
3 * data structure and functionality definitions
4 */
5
6#ifndef _NET_IP_VS_H
7#define _NET_IP_VS_H
8
9#include <linux/ip_vs.h>                /* definitions shared with userland */
10
11#include <asm/types.h>                  /* for __uXX types */
12
13#include <linux/list.h>                 /* for struct list_head */
14#include <linux/spinlock.h>             /* for struct rwlock_t */
15#include <linux/atomic.h>               /* for struct atomic_t */
16#include <linux/refcount.h>             /* for struct refcount_t */
17#include <linux/workqueue.h>
18
19#include <linux/compiler.h>
20#include <linux/timer.h>
21#include <linux/bug.h>
22
23#include <net/checksum.h>
24#include <linux/netfilter.h>		/* for union nf_inet_addr */
25#include <linux/ip.h>
26#include <linux/ipv6.h>			/* for struct ipv6hdr */
27#include <net/ipv6.h>
28#if IS_ENABLED(CONFIG_NF_CONNTRACK)
29#include <net/netfilter/nf_conntrack.h>
30#endif
31#include <net/net_namespace.h>		/* Netw namespace */
32
33#define IP_VS_HDR_INVERSE	1
34#define IP_VS_HDR_ICMP		2
35
36/* Generic access of ipvs struct */
37static inline struct netns_ipvs *net_ipvs(struct net* net)
38{
39	return net->ipvs;
40}
41
42/* Connections' size value needed by ip_vs_ctl.c */
43extern int ip_vs_conn_tab_size;
44
45struct ip_vs_iphdr {
46	int hdr_flags;	/* ipvs flags */
47	__u32 off;	/* Where IP or IPv4 header starts */
48	__u32 len;	/* IPv4 simply where L4 starts
49			 * IPv6 where L4 Transport Header starts */
50	__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
51	__s16 protocol;
52	__s32 flags;
53	union nf_inet_addr saddr;
54	union nf_inet_addr daddr;
55};
56
57static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
58				      int len, void *buffer)
59{
60	return skb_header_pointer(skb, offset, len, buffer);
61}
62
63/* This function handles filling *ip_vs_iphdr, both for IPv4 and IPv6.
64 * IPv6 requires some extra work, as finding proper header position,
65 * depend on the IPv6 extension headers.
66 */
67static inline int
68ip_vs_fill_iph_skb_off(int af, const struct sk_buff *skb, int offset,
69		       int hdr_flags, struct ip_vs_iphdr *iphdr)
70{
71	iphdr->hdr_flags = hdr_flags;
72	iphdr->off = offset;
73
74#ifdef CONFIG_IP_VS_IPV6
75	if (af == AF_INET6) {
76		struct ipv6hdr _iph;
77		const struct ipv6hdr *iph = skb_header_pointer(
78			skb, offset, sizeof(_iph), &_iph);
79		if (!iph)
80			return 0;
81
82		iphdr->saddr.in6 = iph->saddr;
83		iphdr->daddr.in6 = iph->daddr;
84		/* ipv6_find_hdr() updates len, flags */
85		iphdr->len	 = offset;
86		iphdr->flags	 = 0;
87		iphdr->protocol  = ipv6_find_hdr(skb, &iphdr->len, -1,
88						 &iphdr->fragoffs,
89						 &iphdr->flags);
90		if (iphdr->protocol < 0)
91			return 0;
92	} else
93#endif
94	{
95		struct iphdr _iph;
96		const struct iphdr *iph = skb_header_pointer(
97			skb, offset, sizeof(_iph), &_iph);
98		if (!iph)
99			return 0;
100
101		iphdr->len	= offset + iph->ihl * 4;
102		iphdr->fragoffs	= 0;
103		iphdr->protocol	= iph->protocol;
104		iphdr->saddr.ip	= iph->saddr;
105		iphdr->daddr.ip	= iph->daddr;
106	}
107
108	return 1;
109}
110
111static inline int
112ip_vs_fill_iph_skb_icmp(int af, const struct sk_buff *skb, int offset,
113			bool inverse, struct ip_vs_iphdr *iphdr)
114{
115	int hdr_flags = IP_VS_HDR_ICMP;
116
117	if (inverse)
118		hdr_flags |= IP_VS_HDR_INVERSE;
119
120	return ip_vs_fill_iph_skb_off(af, skb, offset, hdr_flags, iphdr);
121}
122
123static inline int
124ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, bool inverse,
125		   struct ip_vs_iphdr *iphdr)
126{
127	int hdr_flags = 0;
128
129	if (inverse)
130		hdr_flags |= IP_VS_HDR_INVERSE;
131
132	return ip_vs_fill_iph_skb_off(af, skb, skb_network_offset(skb),
133				      hdr_flags, iphdr);
134}
135
136static inline bool
137ip_vs_iph_inverse(const struct ip_vs_iphdr *iph)
138{
139	return !!(iph->hdr_flags & IP_VS_HDR_INVERSE);
140}
141
142static inline bool
143ip_vs_iph_icmp(const struct ip_vs_iphdr *iph)
144{
145	return !!(iph->hdr_flags & IP_VS_HDR_ICMP);
146}
147
148static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
149				   const union nf_inet_addr *src)
150{
151#ifdef CONFIG_IP_VS_IPV6
152	if (af == AF_INET6)
153		dst->in6 = src->in6;
154	else
155#endif
156	dst->ip = src->ip;
157}
158
159static inline void ip_vs_addr_set(int af, union nf_inet_addr *dst,
160				  const union nf_inet_addr *src)
161{
162#ifdef CONFIG_IP_VS_IPV6
163	if (af == AF_INET6) {
164		dst->in6 = src->in6;
165		return;
166	}
167#endif
168	dst->ip = src->ip;
169	dst->all[1] = 0;
170	dst->all[2] = 0;
171	dst->all[3] = 0;
172}
173
174static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
175				   const union nf_inet_addr *b)
176{
177#ifdef CONFIG_IP_VS_IPV6
178	if (af == AF_INET6)
179		return ipv6_addr_equal(&a->in6, &b->in6);
180#endif
181	return a->ip == b->ip;
182}
183
184#ifdef CONFIG_IP_VS_DEBUG
185#include <linux/net.h>
186
187int ip_vs_get_debug_level(void);
188
189static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
190					 const union nf_inet_addr *addr,
191					 int *idx)
192{
193	int len;
194#ifdef CONFIG_IP_VS_IPV6
195	if (af == AF_INET6)
196		len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6c]",
197			       &addr->in6) + 1;
198	else
199#endif
200		len = snprintf(&buf[*idx], buf_len - *idx, "%pI4",
201			       &addr->ip) + 1;
202
203	*idx += len;
204	BUG_ON(*idx > buf_len + 1);
205	return &buf[*idx - len];
206}
207
208#define IP_VS_DBG_BUF(level, msg, ...)					\
209	do {								\
210		char ip_vs_dbg_buf[160];				\
211		int ip_vs_dbg_idx = 0;					\
212		if (level <= ip_vs_get_debug_level())			\
213			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
214	} while (0)
215#define IP_VS_ERR_BUF(msg...)						\
216	do {								\
217		char ip_vs_dbg_buf[160];				\
218		int ip_vs_dbg_idx = 0;					\
219		pr_err(msg);						\
220	} while (0)
221
222/* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */
223#define IP_VS_DBG_ADDR(af, addr)					\
224	ip_vs_dbg_addr(af, ip_vs_dbg_buf,				\
225		       sizeof(ip_vs_dbg_buf), addr,			\
226		       &ip_vs_dbg_idx)
227
228#define IP_VS_DBG(level, msg, ...)					\
229	do {								\
230		if (level <= ip_vs_get_debug_level())			\
231			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
232	} while (0)
233#define IP_VS_DBG_RL(msg, ...)						\
234	do {								\
235		if (net_ratelimit())					\
236			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
237	} while (0)
238#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)			\
239	do {								\
240		if (level <= ip_vs_get_debug_level())			\
241			pp->debug_packet(af, pp, skb, ofs, msg);	\
242	} while (0)
243#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)			\
244	do {								\
245		if (level <= ip_vs_get_debug_level() &&			\
246		    net_ratelimit())					\
247			pp->debug_packet(af, pp, skb, ofs, msg);	\
248	} while (0)
249#else	/* NO DEBUGGING at ALL */
250#define IP_VS_DBG_BUF(level, msg...)  do {} while (0)
251#define IP_VS_ERR_BUF(msg...)  do {} while (0)
252#define IP_VS_DBG(level, msg...)  do {} while (0)
253#define IP_VS_DBG_RL(msg...)  do {} while (0)
254#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)	do {} while (0)
255#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)	do {} while (0)
256#endif
257
258#define IP_VS_BUG() BUG()
259#define IP_VS_ERR_RL(msg, ...)						\
260	do {								\
261		if (net_ratelimit())					\
262			pr_err(msg, ##__VA_ARGS__);			\
263	} while (0)
264
265#ifdef CONFIG_IP_VS_DEBUG
266#define EnterFunction(level)						\
267	do {								\
268		if (level <= ip_vs_get_debug_level())			\
269			printk(KERN_DEBUG				\
270			       pr_fmt("Enter: %s, %s line %i\n"),	\
271			       __func__, __FILE__, __LINE__);		\
272	} while (0)
273#define LeaveFunction(level)						\
274	do {								\
275		if (level <= ip_vs_get_debug_level())			\
276			printk(KERN_DEBUG				\
277			       pr_fmt("Leave: %s, %s line %i\n"),	\
278			       __func__, __FILE__, __LINE__);		\
279	} while (0)
280#else
281#define EnterFunction(level)   do {} while (0)
282#define LeaveFunction(level)   do {} while (0)
283#endif
284
285/* The port number of FTP service (in network order). */
286#define FTPPORT  cpu_to_be16(21)
287#define FTPDATA  cpu_to_be16(20)
288
289/* TCP State Values */
290enum {
291	IP_VS_TCP_S_NONE = 0,
292	IP_VS_TCP_S_ESTABLISHED,
293	IP_VS_TCP_S_SYN_SENT,
294	IP_VS_TCP_S_SYN_RECV,
295	IP_VS_TCP_S_FIN_WAIT,
296	IP_VS_TCP_S_TIME_WAIT,
297	IP_VS_TCP_S_CLOSE,
298	IP_VS_TCP_S_CLOSE_WAIT,
299	IP_VS_TCP_S_LAST_ACK,
300	IP_VS_TCP_S_LISTEN,
301	IP_VS_TCP_S_SYNACK,
302	IP_VS_TCP_S_LAST
303};
304
305/* UDP State Values */
306enum {
307	IP_VS_UDP_S_NORMAL,
308	IP_VS_UDP_S_LAST,
309};
310
311/* ICMP State Values */
312enum {
313	IP_VS_ICMP_S_NORMAL,
314	IP_VS_ICMP_S_LAST,
315};
316
317/* SCTP State Values */
318enum ip_vs_sctp_states {
319	IP_VS_SCTP_S_NONE,
320	IP_VS_SCTP_S_INIT1,
321	IP_VS_SCTP_S_INIT,
322	IP_VS_SCTP_S_COOKIE_SENT,
323	IP_VS_SCTP_S_COOKIE_REPLIED,
324	IP_VS_SCTP_S_COOKIE_WAIT,
325	IP_VS_SCTP_S_COOKIE,
326	IP_VS_SCTP_S_COOKIE_ECHOED,
327	IP_VS_SCTP_S_ESTABLISHED,
328	IP_VS_SCTP_S_SHUTDOWN_SENT,
329	IP_VS_SCTP_S_SHUTDOWN_RECEIVED,
330	IP_VS_SCTP_S_SHUTDOWN_ACK_SENT,
331	IP_VS_SCTP_S_REJECTED,
332	IP_VS_SCTP_S_CLOSED,
333	IP_VS_SCTP_S_LAST
334};
335
336/* Connection templates use bits from state */
337#define IP_VS_CTPL_S_NONE		0x0000
338#define IP_VS_CTPL_S_ASSURED		0x0001
339#define IP_VS_CTPL_S_LAST		0x0002
340
341/* Delta sequence info structure
342 * Each ip_vs_conn has 2 (output AND input seq. changes).
343 * Only used in the VS/NAT.
344 */
345struct ip_vs_seq {
346	__u32			init_seq;	/* Add delta from this seq */
347	__u32			delta;		/* Delta in sequence numbers */
348	__u32			previous_delta;	/* Delta in sequence numbers
349						 * before last resized pkt */
350};
351
352/* counters per cpu */
353struct ip_vs_counters {
354	__u64		conns;		/* connections scheduled */
355	__u64		inpkts;		/* incoming packets */
356	__u64		outpkts;	/* outgoing packets */
357	__u64		inbytes;	/* incoming bytes */
358	__u64		outbytes;	/* outgoing bytes */
359};
360/* Stats per cpu */
361struct ip_vs_cpu_stats {
362	struct ip_vs_counters   cnt;
363	struct u64_stats_sync   syncp;
364};
365
366/* IPVS statistics objects */
367struct ip_vs_estimator {
368	struct list_head	list;
369
370	u64			last_inbytes;
371	u64			last_outbytes;
372	u64			last_conns;
373	u64			last_inpkts;
374	u64			last_outpkts;
375
376	u64			cps;
377	u64			inpps;
378	u64			outpps;
379	u64			inbps;
380	u64			outbps;
381};
382
383/*
384 * IPVS statistics object, 64-bit kernel version of struct ip_vs_stats_user
385 */
386struct ip_vs_kstats {
387	u64			conns;		/* connections scheduled */
388	u64			inpkts;		/* incoming packets */
389	u64			outpkts;	/* outgoing packets */
390	u64			inbytes;	/* incoming bytes */
391	u64			outbytes;	/* outgoing bytes */
392
393	u64			cps;		/* current connection rate */
394	u64			inpps;		/* current in packet rate */
395	u64			outpps;		/* current out packet rate */
396	u64			inbps;		/* current in byte rate */
397	u64			outbps;		/* current out byte rate */
398};
399
400struct ip_vs_stats {
401	struct ip_vs_kstats	kstats;		/* kernel statistics */
402	struct ip_vs_estimator	est;		/* estimator */
403	struct ip_vs_cpu_stats __percpu	*cpustats;	/* per cpu counters */
404	spinlock_t		lock;		/* spin lock */
405	struct ip_vs_kstats	kstats0;	/* reset values */
406};
407
408struct dst_entry;
409struct iphdr;
410struct ip_vs_conn;
411struct ip_vs_app;
412struct sk_buff;
413struct ip_vs_proto_data;
414
415struct ip_vs_protocol {
416	struct ip_vs_protocol	*next;
417	char			*name;
418	u16			protocol;
419	u16			num_states;
420	int			dont_defrag;
421
422	void (*init)(struct ip_vs_protocol *pp);
423
424	void (*exit)(struct ip_vs_protocol *pp);
425
426	int (*init_netns)(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd);
427
428	void (*exit_netns)(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd);
429
430	int (*conn_schedule)(struct netns_ipvs *ipvs,
431			     int af, struct sk_buff *skb,
432			     struct ip_vs_proto_data *pd,
433			     int *verdict, struct ip_vs_conn **cpp,
434			     struct ip_vs_iphdr *iph);
435
436	struct ip_vs_conn *
437	(*conn_in_get)(struct netns_ipvs *ipvs,
438		       int af,
439		       const struct sk_buff *skb,
440		       const struct ip_vs_iphdr *iph);
441
442	struct ip_vs_conn *
443	(*conn_out_get)(struct netns_ipvs *ipvs,
444			int af,
445			const struct sk_buff *skb,
446			const struct ip_vs_iphdr *iph);
447
448	int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
449			    struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
450
451	int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
452			    struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
453
454	const char *(*state_name)(int state);
455
456	void (*state_transition)(struct ip_vs_conn *cp, int direction,
457				 const struct sk_buff *skb,
458				 struct ip_vs_proto_data *pd);
459
460	int (*register_app)(struct netns_ipvs *ipvs, struct ip_vs_app *inc);
461
462	void (*unregister_app)(struct netns_ipvs *ipvs, struct ip_vs_app *inc);
463
464	int (*app_conn_bind)(struct ip_vs_conn *cp);
465
466	void (*debug_packet)(int af, struct ip_vs_protocol *pp,
467			     const struct sk_buff *skb,
468			     int offset,
469			     const char *msg);
470
471	void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
472};
473
474/* protocol data per netns */
475struct ip_vs_proto_data {
476	struct ip_vs_proto_data	*next;
477	struct ip_vs_protocol	*pp;
478	int			*timeout_table;	/* protocol timeout table */
479	atomic_t		appcnt;		/* counter of proto app incs. */
480	struct tcp_states_t	*tcp_state_table;
481};
482
483struct ip_vs_protocol   *ip_vs_proto_get(unsigned short proto);
484struct ip_vs_proto_data *ip_vs_proto_data_get(struct netns_ipvs *ipvs,
485					      unsigned short proto);
486
487struct ip_vs_conn_param {
488	struct netns_ipvs		*ipvs;
489	const union nf_inet_addr	*caddr;
490	const union nf_inet_addr	*vaddr;
491	__be16				cport;
492	__be16				vport;
493	__u16				protocol;
494	u16				af;
495
496	const struct ip_vs_pe		*pe;
497	char				*pe_data;
498	__u8				pe_data_len;
499};
500
501/* IP_VS structure allocated for each dynamically scheduled connection */
502struct ip_vs_conn {
503	struct hlist_node	c_list;         /* hashed list heads */
504	/* Protocol, addresses and port numbers */
505	__be16                  cport;
506	__be16                  dport;
507	__be16                  vport;
508	u16			af;		/* address family */
509	union nf_inet_addr      caddr;          /* client address */
510	union nf_inet_addr      vaddr;          /* virtual address */
511	union nf_inet_addr      daddr;          /* destination address */
512	volatile __u32          flags;          /* status flags */
513	__u16                   protocol;       /* Which protocol (TCP/UDP) */
514	__u16			daf;		/* Address family of the dest */
515	struct netns_ipvs	*ipvs;
516
517	/* counter and timer */
518	refcount_t		refcnt;		/* reference count */
519	struct timer_list	timer;		/* Expiration timer */
520	volatile unsigned long	timeout;	/* timeout */
521
522	/* Flags and state transition */
523	spinlock_t              lock;           /* lock for state transition */
524	volatile __u16          state;          /* state info */
525	volatile __u16          old_state;      /* old state, to be used for
526						 * state transition triggerd
527						 * synchronization
528						 */
529	__u32			fwmark;		/* Fire wall mark from skb */
530	unsigned long		sync_endtime;	/* jiffies + sent_retries */
531
532	/* Control members */
533	struct ip_vs_conn       *control;       /* Master control connection */
534	atomic_t                n_control;      /* Number of controlled ones */
535	struct ip_vs_dest       *dest;          /* real server */
536	atomic_t                in_pkts;        /* incoming packet counter */
537
538	/* Packet transmitter for different forwarding methods.  If it
539	 * mangles the packet, it must return NF_DROP or better NF_STOLEN,
540	 * otherwise this must be changed to a sk_buff **.
541	 * NF_ACCEPT can be returned when destination is local.
542	 */
543	int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
544			   struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
545
546	/* Note: we can group the following members into a structure,
547	 * in order to save more space, and the following members are
548	 * only used in VS/NAT anyway
549	 */
550	struct ip_vs_app        *app;           /* bound ip_vs_app object */
551	void                    *app_data;      /* Application private data */
552	struct_group(sync_conn_opt,
553		struct ip_vs_seq  in_seq;       /* incoming seq. struct */
554		struct ip_vs_seq  out_seq;      /* outgoing seq. struct */
555	);
556
557	const struct ip_vs_pe	*pe;
558	char			*pe_data;
559	__u8			pe_data_len;
560
561	struct rcu_head		rcu_head;
562};
563
564/* Extended internal versions of struct ip_vs_service_user and ip_vs_dest_user
565 * for IPv6 support.
566 *
567 * We need these to conveniently pass around service and destination
568 * options, but unfortunately, we also need to keep the old definitions to
569 * maintain userspace backwards compatibility for the setsockopt interface.
570 */
571struct ip_vs_service_user_kern {
572	/* virtual service addresses */
573	u16			af;
574	u16			protocol;
575	union nf_inet_addr	addr;		/* virtual ip address */
576	__be16			port;
577	u32			fwmark;		/* firwall mark of service */
578
579	/* virtual service options */
580	char			*sched_name;
581	char			*pe_name;
582	unsigned int		flags;		/* virtual service flags */
583	unsigned int		timeout;	/* persistent timeout in sec */
584	__be32			netmask;	/* persistent netmask or plen */
585};
586
587
588struct ip_vs_dest_user_kern {
589	/* destination server address */
590	union nf_inet_addr	addr;
591	__be16			port;
592
593	/* real server options */
594	unsigned int		conn_flags;	/* connection flags */
595	int			weight;		/* destination weight */
596
597	/* thresholds for active connections */
598	u32			u_threshold;	/* upper threshold */
599	u32			l_threshold;	/* lower threshold */
600
601	/* Address family of addr */
602	u16			af;
603
604	u16			tun_type;	/* tunnel type */
605	__be16			tun_port;	/* tunnel port */
606	u16			tun_flags;	/* tunnel flags */
607};
608
609
610/*
611 * The information about the virtual service offered to the net and the
612 * forwarding entries.
613 */
614struct ip_vs_service {
615	struct hlist_node	s_list;   /* for normal service table */
616	struct hlist_node	f_list;   /* for fwmark-based service table */
617	atomic_t		refcnt;   /* reference counter */
618
619	u16			af;       /* address family */
620	__u16			protocol; /* which protocol (TCP/UDP) */
621	union nf_inet_addr	addr;	  /* IP address for virtual service */
622	__be16			port;	  /* port number for the service */
623	__u32                   fwmark;   /* firewall mark of the service */
624	unsigned int		flags;	  /* service status flags */
625	unsigned int		timeout;  /* persistent timeout in ticks */
626	__be32			netmask;  /* grouping granularity, mask/plen */
627	struct netns_ipvs	*ipvs;
628
629	struct list_head	destinations;  /* real server d-linked list */
630	__u32			num_dests;     /* number of servers */
631	struct ip_vs_stats      stats;         /* statistics for the service */
632
633	/* for scheduling */
634	struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */
635	spinlock_t		sched_lock;    /* lock sched_data */
636	void			*sched_data;   /* scheduler application data */
637
638	/* alternate persistence engine */
639	struct ip_vs_pe __rcu	*pe;
640	int			conntrack_afmask;
641
642	struct rcu_head		rcu_head;
643};
644
645/* Information for cached dst */
646struct ip_vs_dest_dst {
647	struct dst_entry	*dst_cache;	/* destination cache entry */
648	u32			dst_cookie;
649	union nf_inet_addr	dst_saddr;
650	struct rcu_head		rcu_head;
651};
652
653/* The real server destination forwarding entry with ip address, port number,
654 * and so on.
655 */
656struct ip_vs_dest {
657	struct list_head	n_list;   /* for the dests in the service */
658	struct hlist_node	d_list;   /* for table with all the dests */
659
660	u16			af;		/* address family */
661	__be16			port;		/* port number of the server */
662	union nf_inet_addr	addr;		/* IP address of the server */
663	volatile unsigned int	flags;		/* dest status flags */
664	atomic_t		conn_flags;	/* flags to copy to conn */
665	atomic_t		weight;		/* server weight */
666	atomic_t		last_weight;	/* server latest weight */
667	__u16			tun_type;	/* tunnel type */
668	__be16			tun_port;	/* tunnel port */
669	__u16			tun_flags;	/* tunnel flags */
670
671	refcount_t		refcnt;		/* reference counter */
672	struct ip_vs_stats      stats;          /* statistics */
673	unsigned long		idle_start;	/* start time, jiffies */
674
675	/* connection counters and thresholds */
676	atomic_t		activeconns;	/* active connections */
677	atomic_t		inactconns;	/* inactive connections */
678	atomic_t		persistconns;	/* persistent connections */
679	__u32			u_threshold;	/* upper threshold */
680	__u32			l_threshold;	/* lower threshold */
681
682	/* for destination cache */
683	spinlock_t		dst_lock;	/* lock of dst_cache */
684	struct ip_vs_dest_dst __rcu *dest_dst;	/* cached dst info */
685
686	/* for virtual service */
687	struct ip_vs_service __rcu *svc;	/* service it belongs to */
688	__u16			protocol;	/* which protocol (TCP/UDP) */
689	__be16			vport;		/* virtual port number */
690	union nf_inet_addr	vaddr;		/* virtual IP address */
691	__u32			vfwmark;	/* firewall mark of service */
692
693	struct list_head	t_list;		/* in dest_trash */
694	unsigned int		in_rs_table:1;	/* we are in rs_table */
695};
696
697/* The scheduler object */
698struct ip_vs_scheduler {
699	struct list_head	n_list;		/* d-linked list head */
700	char			*name;		/* scheduler name */
701	atomic_t		refcnt;		/* reference counter */
702	struct module		*module;	/* THIS_MODULE/NULL */
703
704	/* scheduler initializing service */
705	int (*init_service)(struct ip_vs_service *svc);
706	/* scheduling service finish */
707	void (*done_service)(struct ip_vs_service *svc);
708	/* dest is linked */
709	int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
710	/* dest is unlinked */
711	int (*del_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
712	/* dest is updated */
713	int (*upd_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
714
715	/* selecting a server from the given service */
716	struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
717				       const struct sk_buff *skb,
718				       struct ip_vs_iphdr *iph);
719};
720
721/* The persistence engine object */
722struct ip_vs_pe {
723	struct list_head	n_list;		/* d-linked list head */
724	char			*name;		/* scheduler name */
725	atomic_t		refcnt;		/* reference counter */
726	struct module		*module;	/* THIS_MODULE/NULL */
727
728	/* get the connection template, if any */
729	int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb);
730	bool (*ct_match)(const struct ip_vs_conn_param *p,
731			 struct ip_vs_conn *ct);
732	u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval,
733			   bool inverse);
734	int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf);
735	/* create connections for real-server outgoing packets */
736	struct ip_vs_conn* (*conn_out)(struct ip_vs_service *svc,
737				       struct ip_vs_dest *dest,
738				       struct sk_buff *skb,
739				       const struct ip_vs_iphdr *iph,
740				       __be16 dport, __be16 cport);
741};
742
743/* The application module object (a.k.a. app incarnation) */
744struct ip_vs_app {
745	struct list_head	a_list;		/* member in app list */
746	int			type;		/* IP_VS_APP_TYPE_xxx */
747	char			*name;		/* application module name */
748	__u16			protocol;
749	struct module		*module;	/* THIS_MODULE/NULL */
750	struct list_head	incs_list;	/* list of incarnations */
751
752	/* members for application incarnations */
753	struct list_head	p_list;		/* member in proto app list */
754	struct ip_vs_app	*app;		/* its real application */
755	__be16			port;		/* port number in net order */
756	atomic_t		usecnt;		/* usage counter */
757	struct rcu_head		rcu_head;
758
759	/* output hook: Process packet in inout direction, diff set for TCP.
760	 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
761	 *	   2=Mangled but checksum was not updated
762	 */
763	int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
764		       struct sk_buff *, int *diff, struct ip_vs_iphdr *ipvsh);
765
766	/* input hook: Process packet in outin direction, diff set for TCP.
767	 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
768	 *	   2=Mangled but checksum was not updated
769	 */
770	int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
771		      struct sk_buff *, int *diff, struct ip_vs_iphdr *ipvsh);
772
773	/* ip_vs_app initializer */
774	int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *);
775
776	/* ip_vs_app finish */
777	int (*done_conn)(struct ip_vs_app *, struct ip_vs_conn *);
778
779
780	/* not used now */
781	int (*bind_conn)(struct ip_vs_app *, struct ip_vs_conn *,
782			 struct ip_vs_protocol *);
783
784	void (*unbind_conn)(struct ip_vs_app *, struct ip_vs_conn *);
785
786	int *			timeout_table;
787	int *			timeouts;
788	int			timeouts_size;
789
790	int (*conn_schedule)(struct sk_buff *skb, struct ip_vs_app *app,
791			     int *verdict, struct ip_vs_conn **cpp);
792
793	struct ip_vs_conn *
794	(*conn_in_get)(const struct sk_buff *skb, struct ip_vs_app *app,
795		       const struct iphdr *iph, int inverse);
796
797	struct ip_vs_conn *
798	(*conn_out_get)(const struct sk_buff *skb, struct ip_vs_app *app,
799			const struct iphdr *iph, int inverse);
800
801	int (*state_transition)(struct ip_vs_conn *cp, int direction,
802				const struct sk_buff *skb,
803				struct ip_vs_app *app);
804
805	void (*timeout_change)(struct ip_vs_app *app, int flags);
806};
807
808struct ipvs_master_sync_state {
809	struct list_head	sync_queue;
810	struct ip_vs_sync_buff	*sync_buff;
811	unsigned long		sync_queue_len;
812	unsigned int		sync_queue_delay;
813	struct delayed_work	master_wakeup_work;
814	struct netns_ipvs	*ipvs;
815};
816
817struct ip_vs_sync_thread_data;
818
819/* How much time to keep dests in trash */
820#define IP_VS_DEST_TRASH_PERIOD		(120 * HZ)
821
822struct ipvs_sync_daemon_cfg {
823	union nf_inet_addr	mcast_group;
824	int			syncid;
825	u16			sync_maxlen;
826	u16			mcast_port;
827	u8			mcast_af;
828	u8			mcast_ttl;
829	/* multicast interface name */
830	char			mcast_ifn[IP_VS_IFNAME_MAXLEN];
831};
832
833/* IPVS in network namespace */
834struct netns_ipvs {
835	int			gen;		/* Generation */
836	int			enable;		/* enable like nf_hooks do */
837	/* Hash table: for real service lookups */
838	#define IP_VS_RTAB_BITS 4
839	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
840	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
841
842	struct hlist_head	rs_table[IP_VS_RTAB_SIZE];
843	/* ip_vs_app */
844	struct list_head	app_list;
845	/* ip_vs_proto */
846	#define IP_VS_PROTO_TAB_SIZE	32	/* must be power of 2 */
847	struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
848	/* ip_vs_proto_tcp */
849#ifdef CONFIG_IP_VS_PROTO_TCP
850	#define	TCP_APP_TAB_BITS	4
851	#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
852	#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
853	struct list_head	tcp_apps[TCP_APP_TAB_SIZE];
854#endif
855	/* ip_vs_proto_udp */
856#ifdef CONFIG_IP_VS_PROTO_UDP
857	#define	UDP_APP_TAB_BITS	4
858	#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
859	#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
860	struct list_head	udp_apps[UDP_APP_TAB_SIZE];
861#endif
862	/* ip_vs_proto_sctp */
863#ifdef CONFIG_IP_VS_PROTO_SCTP
864	#define SCTP_APP_TAB_BITS	4
865	#define SCTP_APP_TAB_SIZE	(1 << SCTP_APP_TAB_BITS)
866	#define SCTP_APP_TAB_MASK	(SCTP_APP_TAB_SIZE - 1)
867	/* Hash table for SCTP application incarnations	 */
868	struct list_head	sctp_apps[SCTP_APP_TAB_SIZE];
869#endif
870	/* ip_vs_conn */
871	atomic_t		conn_count;      /* connection counter */
872
873	/* ip_vs_ctl */
874	struct ip_vs_stats		tot_stats;  /* Statistics & est. */
875
876	int			num_services;    /* no of virtual services */
877	int			num_services6;   /* IPv6 virtual services */
878
879	/* Trash for destinations */
880	struct list_head	dest_trash;
881	spinlock_t		dest_trash_lock;
882	struct timer_list	dest_trash_timer; /* expiration timer */
883	/* Service counters */
884	atomic_t		ftpsvc_counter;
885	atomic_t		nullsvc_counter;
886	atomic_t		conn_out_counter;
887
888#ifdef CONFIG_SYSCTL
889	/* delayed work for expiring no dest connections */
890	struct delayed_work	expire_nodest_conn_work;
891	/* 1/rate drop and drop-entry variables */
892	struct delayed_work	defense_work;   /* Work handler */
893	int			drop_rate;
894	int			drop_counter;
895	int			old_secure_tcp;
896	atomic_t		dropentry;
897	/* locks in ctl.c */
898	spinlock_t		dropentry_lock;  /* drop entry handling */
899	spinlock_t		droppacket_lock; /* drop packet handling */
900	spinlock_t		securetcp_lock;  /* state and timeout tables */
901
902	/* sys-ctl struct */
903	struct ctl_table_header	*sysctl_hdr;
904	struct ctl_table	*sysctl_tbl;
905#endif
906
907	/* sysctl variables */
908	int			sysctl_amemthresh;
909	int			sysctl_am_droprate;
910	int			sysctl_drop_entry;
911	int			sysctl_drop_packet;
912	int			sysctl_secure_tcp;
913#ifdef CONFIG_IP_VS_NFCT
914	int			sysctl_conntrack;
915#endif
916	int			sysctl_snat_reroute;
917	int			sysctl_sync_ver;
918	int			sysctl_sync_ports;
919	int			sysctl_sync_persist_mode;
920	unsigned long		sysctl_sync_qlen_max;
921	int			sysctl_sync_sock_size;
922	int			sysctl_cache_bypass;
923	int			sysctl_expire_nodest_conn;
924	int			sysctl_sloppy_tcp;
925	int			sysctl_sloppy_sctp;
926	int			sysctl_expire_quiescent_template;
927	int			sysctl_sync_threshold[2];
928	unsigned int		sysctl_sync_refresh_period;
929	int			sysctl_sync_retries;
930	int			sysctl_nat_icmp_send;
931	int			sysctl_pmtu_disc;
932	int			sysctl_backup_only;
933	int			sysctl_conn_reuse_mode;
934	int			sysctl_schedule_icmp;
935	int			sysctl_ignore_tunneled;
936
937	/* ip_vs_lblc */
938	int			sysctl_lblc_expiration;
939	struct ctl_table_header	*lblc_ctl_header;
940	struct ctl_table	*lblc_ctl_table;
941	/* ip_vs_lblcr */
942	int			sysctl_lblcr_expiration;
943	struct ctl_table_header	*lblcr_ctl_header;
944	struct ctl_table	*lblcr_ctl_table;
945	/* ip_vs_est */
946	struct list_head	est_list;	/* estimator list */
947	spinlock_t		est_lock;
948	struct timer_list	est_timer;	/* Estimation timer */
949	/* ip_vs_sync */
950	spinlock_t		sync_lock;
951	struct ipvs_master_sync_state *ms;
952	spinlock_t		sync_buff_lock;
953	struct ip_vs_sync_thread_data *master_tinfo;
954	struct ip_vs_sync_thread_data *backup_tinfo;
955	int			threads_mask;
956	volatile int		sync_state;
957	struct mutex		sync_mutex;
958	struct ipvs_sync_daemon_cfg	mcfg;	/* Master Configuration */
959	struct ipvs_sync_daemon_cfg	bcfg;	/* Backup Configuration */
960	/* net name space ptr */
961	struct net		*net;            /* Needed by timer routines */
962	/* Number of heterogeneous destinations, needed becaus heterogeneous
963	 * are not supported when synchronization is enabled.
964	 */
965	unsigned int		mixed_address_family_dests;
966	unsigned int		hooks_afmask;	/* &1=AF_INET, &2=AF_INET6 */
967};
968
969#define DEFAULT_SYNC_THRESHOLD	3
970#define DEFAULT_SYNC_PERIOD	50
971#define DEFAULT_SYNC_VER	1
972#define DEFAULT_SLOPPY_TCP	0
973#define DEFAULT_SLOPPY_SCTP	0
974#define DEFAULT_SYNC_REFRESH_PERIOD	(0U * HZ)
975#define DEFAULT_SYNC_RETRIES		0
976#define IPVS_SYNC_WAKEUP_RATE	8
977#define IPVS_SYNC_QLEN_MAX	(IPVS_SYNC_WAKEUP_RATE * 4)
978#define IPVS_SYNC_SEND_DELAY	(HZ / 50)
979#define IPVS_SYNC_CHECK_PERIOD	HZ
980#define IPVS_SYNC_FLUSH_TIME	(HZ * 2)
981#define IPVS_SYNC_PORTS_MAX	(1 << 6)
982
983#ifdef CONFIG_SYSCTL
984
985static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
986{
987	return ipvs->sysctl_sync_threshold[0];
988}
989
990static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
991{
992	return READ_ONCE(ipvs->sysctl_sync_threshold[1]);
993}
994
995static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
996{
997	return READ_ONCE(ipvs->sysctl_sync_refresh_period);
998}
999
1000static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
1001{
1002	return ipvs->sysctl_sync_retries;
1003}
1004
1005static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
1006{
1007	return ipvs->sysctl_sync_ver;
1008}
1009
1010static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs)
1011{
1012	return ipvs->sysctl_sloppy_tcp;
1013}
1014
1015static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
1016{
1017	return ipvs->sysctl_sloppy_sctp;
1018}
1019
1020static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
1021{
1022	return READ_ONCE(ipvs->sysctl_sync_ports);
1023}
1024
1025static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs)
1026{
1027	return ipvs->sysctl_sync_persist_mode;
1028}
1029
1030static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
1031{
1032	return ipvs->sysctl_sync_qlen_max;
1033}
1034
1035static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
1036{
1037	return ipvs->sysctl_sync_sock_size;
1038}
1039
1040static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
1041{
1042	return ipvs->sysctl_pmtu_disc;
1043}
1044
1045static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
1046{
1047	return ipvs->sync_state & IP_VS_STATE_BACKUP &&
1048	       ipvs->sysctl_backup_only;
1049}
1050
1051static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
1052{
1053	return ipvs->sysctl_conn_reuse_mode;
1054}
1055
1056static inline int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
1057{
1058	return ipvs->sysctl_expire_nodest_conn;
1059}
1060
1061static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs)
1062{
1063	return ipvs->sysctl_schedule_icmp;
1064}
1065
1066static inline int sysctl_ignore_tunneled(struct netns_ipvs *ipvs)
1067{
1068	return ipvs->sysctl_ignore_tunneled;
1069}
1070
1071static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
1072{
1073	return ipvs->sysctl_cache_bypass;
1074}
1075
1076#else
1077
1078static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
1079{
1080	return DEFAULT_SYNC_THRESHOLD;
1081}
1082
1083static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
1084{
1085	return DEFAULT_SYNC_PERIOD;
1086}
1087
1088static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
1089{
1090	return DEFAULT_SYNC_REFRESH_PERIOD;
1091}
1092
1093static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
1094{
1095	return DEFAULT_SYNC_RETRIES & 3;
1096}
1097
1098static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
1099{
1100	return DEFAULT_SYNC_VER;
1101}
1102
1103static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs)
1104{
1105	return DEFAULT_SLOPPY_TCP;
1106}
1107
1108static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
1109{
1110	return DEFAULT_SLOPPY_SCTP;
1111}
1112
1113static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
1114{
1115	return 1;
1116}
1117
1118static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs)
1119{
1120	return 0;
1121}
1122
1123static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
1124{
1125	return IPVS_SYNC_QLEN_MAX;
1126}
1127
1128static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
1129{
1130	return 0;
1131}
1132
1133static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
1134{
1135	return 1;
1136}
1137
1138static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
1139{
1140	return 0;
1141}
1142
1143static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
1144{
1145	return 1;
1146}
1147
1148static inline int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
1149{
1150	return 0;
1151}
1152
1153static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs)
1154{
1155	return 0;
1156}
1157
1158static inline int sysctl_ignore_tunneled(struct netns_ipvs *ipvs)
1159{
1160	return 0;
1161}
1162
1163static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
1164{
1165	return 0;
1166}
1167
1168#endif
1169
1170/* IPVS core functions
1171 * (from ip_vs_core.c)
1172 */
1173const char *ip_vs_proto_name(unsigned int proto);
1174void ip_vs_init_hash_table(struct list_head *table, int rows);
1175struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
1176				      struct ip_vs_dest *dest,
1177				      struct sk_buff *skb,
1178				      const struct ip_vs_iphdr *iph,
1179				      __be16 dport,
1180				      __be16 cport);
1181#define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t)))
1182
1183#define IP_VS_APP_TYPE_FTP	1
1184
1185/* ip_vs_conn handling functions
1186 * (from ip_vs_conn.c)
1187 */
1188enum {
1189	IP_VS_DIR_INPUT = 0,
1190	IP_VS_DIR_OUTPUT,
1191	IP_VS_DIR_INPUT_ONLY,
1192	IP_VS_DIR_LAST,
1193};
1194
1195static inline void ip_vs_conn_fill_param(struct netns_ipvs *ipvs, int af, int protocol,
1196					 const union nf_inet_addr *caddr,
1197					 __be16 cport,
1198					 const union nf_inet_addr *vaddr,
1199					 __be16 vport,
1200					 struct ip_vs_conn_param *p)
1201{
1202	p->ipvs = ipvs;
1203	p->af = af;
1204	p->protocol = protocol;
1205	p->caddr = caddr;
1206	p->cport = cport;
1207	p->vaddr = vaddr;
1208	p->vport = vport;
1209	p->pe = NULL;
1210	p->pe_data = NULL;
1211}
1212
1213struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
1214struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
1215
1216struct ip_vs_conn * ip_vs_conn_in_get_proto(struct netns_ipvs *ipvs, int af,
1217					    const struct sk_buff *skb,
1218					    const struct ip_vs_iphdr *iph);
1219
1220struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
1221
1222struct ip_vs_conn * ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
1223					     const struct sk_buff *skb,
1224					     const struct ip_vs_iphdr *iph);
1225
1226/* Get reference to gain full access to conn.
1227 * By default, RCU read-side critical sections have access only to
1228 * conn fields and its PE data, see ip_vs_conn_rcu_free() for reference.
1229 */
1230static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp)
1231{
1232	return refcount_inc_not_zero(&cp->refcnt);
1233}
1234
1235/* put back the conn without restarting its timer */
1236static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
1237{
1238	smp_mb__before_atomic();
1239	refcount_dec(&cp->refcnt);
1240}
1241void ip_vs_conn_put(struct ip_vs_conn *cp);
1242void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
1243
1244struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
1245				  const union nf_inet_addr *daddr,
1246				  __be16 dport, unsigned int flags,
1247				  struct ip_vs_dest *dest, __u32 fwmark);
1248void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
1249
1250const char *ip_vs_state_name(const struct ip_vs_conn *cp);
1251
1252void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
1253int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest);
1254void ip_vs_random_dropentry(struct netns_ipvs *ipvs);
1255int ip_vs_conn_init(void);
1256void ip_vs_conn_cleanup(void);
1257
1258static inline void ip_vs_control_del(struct ip_vs_conn *cp)
1259{
1260	struct ip_vs_conn *ctl_cp = cp->control;
1261	if (!ctl_cp) {
1262		IP_VS_ERR_BUF("request control DEL for uncontrolled: "
1263			      "%s:%d to %s:%d\n",
1264			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1265			      ntohs(cp->cport),
1266			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1267			      ntohs(cp->vport));
1268
1269		return;
1270	}
1271
1272	IP_VS_DBG_BUF(7, "DELeting control for: "
1273		      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
1274		      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1275		      ntohs(cp->cport),
1276		      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
1277		      ntohs(ctl_cp->cport));
1278
1279	cp->control = NULL;
1280	if (atomic_read(&ctl_cp->n_control) == 0) {
1281		IP_VS_ERR_BUF("BUG control DEL with n=0 : "
1282			      "%s:%d to %s:%d\n",
1283			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1284			      ntohs(cp->cport),
1285			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1286			      ntohs(cp->vport));
1287
1288		return;
1289	}
1290	atomic_dec(&ctl_cp->n_control);
1291}
1292
1293static inline void
1294ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
1295{
1296	if (cp->control) {
1297		IP_VS_ERR_BUF("request control ADD for already controlled: "
1298			      "%s:%d to %s:%d\n",
1299			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1300			      ntohs(cp->cport),
1301			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1302			      ntohs(cp->vport));
1303
1304		ip_vs_control_del(cp);
1305	}
1306
1307	IP_VS_DBG_BUF(7, "ADDing control for: "
1308		      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
1309		      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1310		      ntohs(cp->cport),
1311		      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
1312		      ntohs(ctl_cp->cport));
1313
1314	cp->control = ctl_cp;
1315	atomic_inc(&ctl_cp->n_control);
1316}
1317
1318/* Mark our template as assured */
1319static inline void
1320ip_vs_control_assure_ct(struct ip_vs_conn *cp)
1321{
1322	struct ip_vs_conn *ct = cp->control;
1323
1324	if (ct && !(ct->state & IP_VS_CTPL_S_ASSURED) &&
1325	    (ct->flags & IP_VS_CONN_F_TEMPLATE))
1326		ct->state |= IP_VS_CTPL_S_ASSURED;
1327}
1328
1329/* IPVS netns init & cleanup functions */
1330int ip_vs_estimator_net_init(struct netns_ipvs *ipvs);
1331int ip_vs_control_net_init(struct netns_ipvs *ipvs);
1332int ip_vs_protocol_net_init(struct netns_ipvs *ipvs);
1333int ip_vs_app_net_init(struct netns_ipvs *ipvs);
1334int ip_vs_conn_net_init(struct netns_ipvs *ipvs);
1335int ip_vs_sync_net_init(struct netns_ipvs *ipvs);
1336void ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs);
1337void ip_vs_app_net_cleanup(struct netns_ipvs *ipvs);
1338void ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs);
1339void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs);
1340void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs);
1341void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs);
1342void ip_vs_service_nets_cleanup(struct list_head *net_list);
1343
1344/* IPVS application functions
1345 * (from ip_vs_app.c)
1346 */
1347#define IP_VS_APP_MAX_PORTS  8
1348struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app);
1349void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app);
1350int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1351void ip_vs_unbind_app(struct ip_vs_conn *cp);
1352int register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
1353			   __u16 port);
1354int ip_vs_app_inc_get(struct ip_vs_app *inc);
1355void ip_vs_app_inc_put(struct ip_vs_app *inc);
1356
1357int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb,
1358		      struct ip_vs_iphdr *ipvsh);
1359int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb,
1360		     struct ip_vs_iphdr *ipvsh);
1361
1362int register_ip_vs_pe(struct ip_vs_pe *pe);
1363int unregister_ip_vs_pe(struct ip_vs_pe *pe);
1364struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
1365struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
1366
1367/* Use a #define to avoid all of module.h just for these trivial ops */
1368#define ip_vs_pe_get(pe)			\
1369	if (pe && pe->module)			\
1370		__module_get(pe->module);
1371
1372#define ip_vs_pe_put(pe)			\
1373	if (pe && pe->module)			\
1374		module_put(pe->module);
1375
1376/* IPVS protocol functions (from ip_vs_proto.c) */
1377int ip_vs_protocol_init(void);
1378void ip_vs_protocol_cleanup(void);
1379void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
1380int *ip_vs_create_timeout_table(int *table, int size);
1381void ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
1382			       const struct sk_buff *skb, int offset,
1383			       const char *msg);
1384
1385extern struct ip_vs_protocol ip_vs_protocol_tcp;
1386extern struct ip_vs_protocol ip_vs_protocol_udp;
1387extern struct ip_vs_protocol ip_vs_protocol_icmp;
1388extern struct ip_vs_protocol ip_vs_protocol_esp;
1389extern struct ip_vs_protocol ip_vs_protocol_ah;
1390extern struct ip_vs_protocol ip_vs_protocol_sctp;
1391
1392/* Registering/unregistering scheduler functions
1393 * (from ip_vs_sched.c)
1394 */
1395int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
1396int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
1397int ip_vs_bind_scheduler(struct ip_vs_service *svc,
1398			 struct ip_vs_scheduler *scheduler);
1399void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
1400			    struct ip_vs_scheduler *sched);
1401struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
1402void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
1403struct ip_vs_conn *
1404ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
1405	       struct ip_vs_proto_data *pd, int *ignored,
1406	       struct ip_vs_iphdr *iph);
1407int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
1408		struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph);
1409
1410void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
1411
1412/* IPVS control data and functions (from ip_vs_ctl.c) */
1413extern struct ip_vs_stats ip_vs_stats;
1414extern int sysctl_ip_vs_sync_ver;
1415
1416struct ip_vs_service *
1417ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
1418		  const union nf_inet_addr *vaddr, __be16 vport);
1419
1420bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
1421			    const union nf_inet_addr *daddr, __be16 dport);
1422
1423struct ip_vs_dest *
1424ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
1425			const union nf_inet_addr *daddr, __be16 dport);
1426struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
1427				     const union nf_inet_addr *daddr,
1428				     __be16 tun_port);
1429
1430int ip_vs_use_count_inc(void);
1431void ip_vs_use_count_dec(void);
1432int ip_vs_register_nl_ioctl(void);
1433void ip_vs_unregister_nl_ioctl(void);
1434int ip_vs_control_init(void);
1435void ip_vs_control_cleanup(void);
1436struct ip_vs_dest *
1437ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
1438		const union nf_inet_addr *daddr, __be16 dport,
1439		const union nf_inet_addr *vaddr, __be16 vport,
1440		__u16 protocol, __u32 fwmark, __u32 flags);
1441void ip_vs_try_bind_dest(struct ip_vs_conn *cp);
1442
1443static inline void ip_vs_dest_hold(struct ip_vs_dest *dest)
1444{
1445	refcount_inc(&dest->refcnt);
1446}
1447
1448static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
1449{
1450	smp_mb__before_atomic();
1451	refcount_dec(&dest->refcnt);
1452}
1453
1454static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)
1455{
1456	if (refcount_dec_and_test(&dest->refcnt))
1457		kfree(dest);
1458}
1459
1460/* IPVS sync daemon data and function prototypes
1461 * (from ip_vs_sync.c)
1462 */
1463int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *cfg,
1464		      int state);
1465int stop_sync_thread(struct netns_ipvs *ipvs, int state);
1466void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts);
1467
1468/* IPVS rate estimator prototypes (from ip_vs_est.c) */
1469void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);
1470void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);
1471void ip_vs_zero_estimator(struct ip_vs_stats *stats);
1472void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats);
1473
1474/* Various IPVS packet transmitters (from ip_vs_xmit.c) */
1475int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1476		    struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1477int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1478		      struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1479int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1480		   struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1481int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1482		      struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1483int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1484		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1485int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1486		    struct ip_vs_protocol *pp, int offset,
1487		    unsigned int hooknum, struct ip_vs_iphdr *iph);
1488void ip_vs_dest_dst_rcu_free(struct rcu_head *head);
1489
1490#ifdef CONFIG_IP_VS_IPV6
1491int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1492			 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1493int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1494		      struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1495int ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1496			 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1497int ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1498		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1499int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1500		       struct ip_vs_protocol *pp, int offset,
1501		       unsigned int hooknum, struct ip_vs_iphdr *iph);
1502#endif
1503
1504#ifdef CONFIG_SYSCTL
1505/* This is a simple mechanism to ignore packets when
1506 * we are loaded. Just set ip_vs_drop_rate to 'n' and
1507 * we start to drop 1/rate of the packets
1508 */
1509static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
1510{
1511	if (!ipvs->drop_rate)
1512		return 0;
1513	if (--ipvs->drop_counter > 0)
1514		return 0;
1515	ipvs->drop_counter = ipvs->drop_rate;
1516	return 1;
1517}
1518#else
1519static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
1520#endif
1521
1522#ifdef CONFIG_SYSCTL
1523/* Enqueue delayed work for expiring no dest connections
1524 * Only run when sysctl_expire_nodest=1
1525 */
1526static inline void ip_vs_enqueue_expire_nodest_conns(struct netns_ipvs *ipvs)
1527{
1528	if (sysctl_expire_nodest_conn(ipvs))
1529		queue_delayed_work(system_long_wq,
1530				   &ipvs->expire_nodest_conn_work, 1);
1531}
1532
1533void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs);
1534#else
1535static inline void ip_vs_enqueue_expire_nodest_conns(struct netns_ipvs *ipvs) {}
1536#endif
1537
1538#define IP_VS_DFWD_METHOD(dest) (atomic_read(&(dest)->conn_flags) & \
1539				 IP_VS_CONN_F_FWD_MASK)
1540
1541/* ip_vs_fwd_tag returns the forwarding tag of the connection */
1542#define IP_VS_FWD_METHOD(cp)  (cp->flags & IP_VS_CONN_F_FWD_MASK)
1543
1544static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
1545{
1546	char fwd;
1547
1548	switch (IP_VS_FWD_METHOD(cp)) {
1549	case IP_VS_CONN_F_MASQ:
1550		fwd = 'M'; break;
1551	case IP_VS_CONN_F_LOCALNODE:
1552		fwd = 'L'; break;
1553	case IP_VS_CONN_F_TUNNEL:
1554		fwd = 'T'; break;
1555	case IP_VS_CONN_F_DROUTE:
1556		fwd = 'R'; break;
1557	case IP_VS_CONN_F_BYPASS:
1558		fwd = 'B'; break;
1559	default:
1560		fwd = '?'; break;
1561	}
1562	return fwd;
1563}
1564
1565void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
1566		    struct ip_vs_conn *cp, int dir);
1567
1568#ifdef CONFIG_IP_VS_IPV6
1569void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
1570		       struct ip_vs_conn *cp, int dir);
1571#endif
1572
1573__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset);
1574
1575static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
1576{
1577	__be32 diff[2] = { ~old, new };
1578
1579	return csum_partial(diff, sizeof(diff), oldsum);
1580}
1581
1582#ifdef CONFIG_IP_VS_IPV6
1583static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
1584					__wsum oldsum)
1585{
1586	__be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
1587			    new[3],  new[2],  new[1],  new[0] };
1588
1589	return csum_partial(diff, sizeof(diff), oldsum);
1590}
1591#endif
1592
1593static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
1594{
1595	__be16 diff[2] = { ~old, new };
1596
1597	return csum_partial(diff, sizeof(diff), oldsum);
1598}
1599
1600/* Forget current conntrack (unconfirmed) and attach notrack entry */
1601static inline void ip_vs_notrack(struct sk_buff *skb)
1602{
1603#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1604	enum ip_conntrack_info ctinfo;
1605	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1606
1607	if (ct) {
1608		nf_conntrack_put(&ct->ct_general);
1609		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
1610	}
1611#endif
1612}
1613
1614#ifdef CONFIG_IP_VS_NFCT
1615/* Netfilter connection tracking
1616 * (from ip_vs_nfct.c)
1617 */
1618static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1619{
1620#ifdef CONFIG_SYSCTL
1621	return ipvs->sysctl_conntrack;
1622#else
1623	return 0;
1624#endif
1625}
1626
1627void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
1628			    int outin);
1629int ip_vs_confirm_conntrack(struct sk_buff *skb);
1630void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
1631			       struct ip_vs_conn *cp, u_int8_t proto,
1632			       const __be16 port, int from_rs);
1633void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
1634
1635#else
1636
1637static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1638{
1639	return 0;
1640}
1641
1642static inline void ip_vs_update_conntrack(struct sk_buff *skb,
1643					  struct ip_vs_conn *cp, int outin)
1644{
1645}
1646
1647static inline int ip_vs_confirm_conntrack(struct sk_buff *skb)
1648{
1649	return NF_ACCEPT;
1650}
1651
1652static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
1653{
1654}
1655#endif /* CONFIG_IP_VS_NFCT */
1656
1657/* Using old conntrack that can not be redirected to another real server? */
1658static inline bool ip_vs_conn_uses_old_conntrack(struct ip_vs_conn *cp,
1659						 struct sk_buff *skb)
1660{
1661#ifdef CONFIG_IP_VS_NFCT
1662	enum ip_conntrack_info ctinfo;
1663	struct nf_conn *ct;
1664
1665	ct = nf_ct_get(skb, &ctinfo);
1666	if (ct && nf_ct_is_confirmed(ct))
1667		return true;
1668#endif
1669	return false;
1670}
1671
1672static inline int ip_vs_register_conntrack(struct ip_vs_service *svc)
1673{
1674#if IS_ENABLED(CONFIG_NF_CONNTRACK)
1675	int afmask = (svc->af == AF_INET6) ? 2 : 1;
1676	int ret = 0;
1677
1678	if (!(svc->conntrack_afmask & afmask)) {
1679		ret = nf_ct_netns_get(svc->ipvs->net, svc->af);
1680		if (ret >= 0)
1681			svc->conntrack_afmask |= afmask;
1682	}
1683	return ret;
1684#else
1685	return 0;
1686#endif
1687}
1688
1689static inline void ip_vs_unregister_conntrack(struct ip_vs_service *svc)
1690{
1691#if IS_ENABLED(CONFIG_NF_CONNTRACK)
1692	int afmask = (svc->af == AF_INET6) ? 2 : 1;
1693
1694	if (svc->conntrack_afmask & afmask) {
1695		nf_ct_netns_put(svc->ipvs->net, svc->af);
1696		svc->conntrack_afmask &= ~afmask;
1697	}
1698#endif
1699}
1700
1701int ip_vs_register_hooks(struct netns_ipvs *ipvs, unsigned int af);
1702void ip_vs_unregister_hooks(struct netns_ipvs *ipvs, unsigned int af);
1703
1704static inline int
1705ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
1706{
1707	/* We think the overhead of processing active connections is 256
1708	 * times higher than that of inactive connections in average. (This
1709	 * 256 times might not be accurate, we will change it later) We
1710	 * use the following formula to estimate the overhead now:
1711	 *		  dest->activeconns*256 + dest->inactconns
1712	 */
1713	return (atomic_read(&dest->activeconns) << 8) +
1714		atomic_read(&dest->inactconns);
1715}
1716
1717#endif	/* _NET_IP_VS_H */
1718