162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/* MPTCP socket monitoring support
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * Copyright (c) 2020 Red Hat
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Author: Paolo Abeni <pabeni@redhat.com>
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include <linux/kernel.h>
1062306a36Sopenharmony_ci#include <linux/net.h>
1162306a36Sopenharmony_ci#include <linux/inet_diag.h>
1262306a36Sopenharmony_ci#include <net/netlink.h>
1362306a36Sopenharmony_ci#include <uapi/linux/mptcp.h>
1462306a36Sopenharmony_ci#include "protocol.h"
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_cistatic int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
1762306a36Sopenharmony_ci			struct netlink_callback *cb,
1862306a36Sopenharmony_ci			const struct inet_diag_req_v2 *req,
1962306a36Sopenharmony_ci			struct nlattr *bc, bool net_admin)
2062306a36Sopenharmony_ci{
2162306a36Sopenharmony_ci	if (!inet_diag_bc_sk(bc, sk))
2262306a36Sopenharmony_ci		return 0;
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci	return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, req, NLM_F_MULTI,
2562306a36Sopenharmony_ci				 net_admin);
2662306a36Sopenharmony_ci}
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_cistatic int mptcp_diag_dump_one(struct netlink_callback *cb,
2962306a36Sopenharmony_ci			       const struct inet_diag_req_v2 *req)
3062306a36Sopenharmony_ci{
3162306a36Sopenharmony_ci	struct sk_buff *in_skb = cb->skb;
3262306a36Sopenharmony_ci	struct mptcp_sock *msk = NULL;
3362306a36Sopenharmony_ci	struct sk_buff *rep;
3462306a36Sopenharmony_ci	int err = -ENOENT;
3562306a36Sopenharmony_ci	struct net *net;
3662306a36Sopenharmony_ci	struct sock *sk;
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci	net = sock_net(in_skb->sk);
3962306a36Sopenharmony_ci	msk = mptcp_token_get_sock(net, req->id.idiag_cookie[0]);
4062306a36Sopenharmony_ci	if (!msk)
4162306a36Sopenharmony_ci		goto out_nosk;
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci	err = -ENOMEM;
4462306a36Sopenharmony_ci	sk = (struct sock *)msk;
4562306a36Sopenharmony_ci	rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
4662306a36Sopenharmony_ci			inet_diag_msg_attrs_size() +
4762306a36Sopenharmony_ci			nla_total_size(sizeof(struct mptcp_info)) +
4862306a36Sopenharmony_ci			nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
4962306a36Sopenharmony_ci			GFP_KERNEL);
5062306a36Sopenharmony_ci	if (!rep)
5162306a36Sopenharmony_ci		goto out;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci	err = inet_sk_diag_fill(sk, inet_csk(sk), rep, cb, req, 0,
5462306a36Sopenharmony_ci				netlink_net_capable(in_skb, CAP_NET_ADMIN));
5562306a36Sopenharmony_ci	if (err < 0) {
5662306a36Sopenharmony_ci		WARN_ON(err == -EMSGSIZE);
5762306a36Sopenharmony_ci		kfree_skb(rep);
5862306a36Sopenharmony_ci		goto out;
5962306a36Sopenharmony_ci	}
6062306a36Sopenharmony_ci	err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ciout:
6362306a36Sopenharmony_ci	sock_put(sk);
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ciout_nosk:
6662306a36Sopenharmony_ci	return err;
6762306a36Sopenharmony_ci}
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_cistruct mptcp_diag_ctx {
7062306a36Sopenharmony_ci	long s_slot;
7162306a36Sopenharmony_ci	long s_num;
7262306a36Sopenharmony_ci	unsigned int l_slot;
7362306a36Sopenharmony_ci	unsigned int l_num;
7462306a36Sopenharmony_ci};
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_cistatic void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callback *cb,
7762306a36Sopenharmony_ci				      const struct inet_diag_req_v2 *r,
7862306a36Sopenharmony_ci				      bool net_admin)
7962306a36Sopenharmony_ci{
8062306a36Sopenharmony_ci	struct inet_diag_dump_data *cb_data = cb->data;
8162306a36Sopenharmony_ci	struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
8262306a36Sopenharmony_ci	struct nlattr *bc = cb_data->inet_diag_nla_bc;
8362306a36Sopenharmony_ci	struct net *net = sock_net(skb->sk);
8462306a36Sopenharmony_ci	struct inet_hashinfo *hinfo;
8562306a36Sopenharmony_ci	int i;
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	hinfo = net->ipv4.tcp_death_row.hashinfo;
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	for (i = diag_ctx->l_slot; i <= hinfo->lhash2_mask; i++) {
9062306a36Sopenharmony_ci		struct inet_listen_hashbucket *ilb;
9162306a36Sopenharmony_ci		struct hlist_nulls_node *node;
9262306a36Sopenharmony_ci		struct sock *sk;
9362306a36Sopenharmony_ci		int num = 0;
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci		ilb = &hinfo->lhash2[i];
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci		rcu_read_lock();
9862306a36Sopenharmony_ci		spin_lock(&ilb->lock);
9962306a36Sopenharmony_ci		sk_nulls_for_each(sk, node, &ilb->nulls_head) {
10062306a36Sopenharmony_ci			const struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
10162306a36Sopenharmony_ci			struct inet_sock *inet = inet_sk(sk);
10262306a36Sopenharmony_ci			int ret;
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci			if (num < diag_ctx->l_num)
10562306a36Sopenharmony_ci				goto next_listen;
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci			if (!ctx || strcmp(inet_csk(sk)->icsk_ulp_ops->name, "mptcp"))
10862306a36Sopenharmony_ci				goto next_listen;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci			sk = ctx->conn;
11162306a36Sopenharmony_ci			if (!sk || !net_eq(sock_net(sk), net))
11262306a36Sopenharmony_ci				goto next_listen;
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci			if (r->sdiag_family != AF_UNSPEC &&
11562306a36Sopenharmony_ci			    sk->sk_family != r->sdiag_family)
11662306a36Sopenharmony_ci				goto next_listen;
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci			if (r->id.idiag_sport != inet->inet_sport &&
11962306a36Sopenharmony_ci			    r->id.idiag_sport)
12062306a36Sopenharmony_ci				goto next_listen;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci			if (!refcount_inc_not_zero(&sk->sk_refcnt))
12362306a36Sopenharmony_ci				goto next_listen;
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci			ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci			sock_put(sk);
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci			if (ret < 0) {
13062306a36Sopenharmony_ci				spin_unlock(&ilb->lock);
13162306a36Sopenharmony_ci				rcu_read_unlock();
13262306a36Sopenharmony_ci				diag_ctx->l_slot = i;
13362306a36Sopenharmony_ci				diag_ctx->l_num = num;
13462306a36Sopenharmony_ci				return;
13562306a36Sopenharmony_ci			}
13662306a36Sopenharmony_ci			diag_ctx->l_num = num + 1;
13762306a36Sopenharmony_ci			num = 0;
13862306a36Sopenharmony_cinext_listen:
13962306a36Sopenharmony_ci			++num;
14062306a36Sopenharmony_ci		}
14162306a36Sopenharmony_ci		spin_unlock(&ilb->lock);
14262306a36Sopenharmony_ci		rcu_read_unlock();
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci		cond_resched();
14562306a36Sopenharmony_ci		diag_ctx->l_num = 0;
14662306a36Sopenharmony_ci	}
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	diag_ctx->l_num = 0;
14962306a36Sopenharmony_ci	diag_ctx->l_slot = i;
15062306a36Sopenharmony_ci}
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_cistatic void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
15362306a36Sopenharmony_ci			    const struct inet_diag_req_v2 *r)
15462306a36Sopenharmony_ci{
15562306a36Sopenharmony_ci	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
15662306a36Sopenharmony_ci	struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
15762306a36Sopenharmony_ci	struct net *net = sock_net(skb->sk);
15862306a36Sopenharmony_ci	struct inet_diag_dump_data *cb_data;
15962306a36Sopenharmony_ci	struct mptcp_sock *msk;
16062306a36Sopenharmony_ci	struct nlattr *bc;
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx));
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	cb_data = cb->data;
16562306a36Sopenharmony_ci	bc = cb_data->inet_diag_nla_bc;
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot,
16862306a36Sopenharmony_ci					    &diag_ctx->s_num)) != NULL) {
16962306a36Sopenharmony_ci		struct inet_sock *inet = (struct inet_sock *)msk;
17062306a36Sopenharmony_ci		struct sock *sk = (struct sock *)msk;
17162306a36Sopenharmony_ci		int ret = 0;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci		if (!(r->idiag_states & (1 << sk->sk_state)))
17462306a36Sopenharmony_ci			goto next;
17562306a36Sopenharmony_ci		if (r->sdiag_family != AF_UNSPEC &&
17662306a36Sopenharmony_ci		    sk->sk_family != r->sdiag_family)
17762306a36Sopenharmony_ci			goto next;
17862306a36Sopenharmony_ci		if (r->id.idiag_sport != inet->inet_sport &&
17962306a36Sopenharmony_ci		    r->id.idiag_sport)
18062306a36Sopenharmony_ci			goto next;
18162306a36Sopenharmony_ci		if (r->id.idiag_dport != inet->inet_dport &&
18262306a36Sopenharmony_ci		    r->id.idiag_dport)
18362306a36Sopenharmony_ci			goto next;
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci		ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
18662306a36Sopenharmony_cinext:
18762306a36Sopenharmony_ci		sock_put(sk);
18862306a36Sopenharmony_ci		if (ret < 0) {
18962306a36Sopenharmony_ci			/* will retry on the same position */
19062306a36Sopenharmony_ci			diag_ctx->s_num--;
19162306a36Sopenharmony_ci			break;
19262306a36Sopenharmony_ci		}
19362306a36Sopenharmony_ci		cond_resched();
19462306a36Sopenharmony_ci	}
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	if ((r->idiag_states & TCPF_LISTEN) && r->id.idiag_dport == 0)
19762306a36Sopenharmony_ci		mptcp_diag_dump_listeners(skb, cb, r, net_admin);
19862306a36Sopenharmony_ci}
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_cistatic void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
20162306a36Sopenharmony_ci				void *_info)
20262306a36Sopenharmony_ci{
20362306a36Sopenharmony_ci	struct mptcp_sock *msk = mptcp_sk(sk);
20462306a36Sopenharmony_ci	struct mptcp_info *info = _info;
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci	r->idiag_rqueue = sk_rmem_alloc_get(sk);
20762306a36Sopenharmony_ci	r->idiag_wqueue = sk_wmem_alloc_get(sk);
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci	if (inet_sk_state_load(sk) == TCP_LISTEN) {
21062306a36Sopenharmony_ci		struct sock *lsk = READ_ONCE(msk->first);
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci		if (lsk) {
21362306a36Sopenharmony_ci			/* override with settings from tcp listener,
21462306a36Sopenharmony_ci			 * so Send-Q will show accept queue.
21562306a36Sopenharmony_ci			 */
21662306a36Sopenharmony_ci			r->idiag_rqueue = READ_ONCE(lsk->sk_ack_backlog);
21762306a36Sopenharmony_ci			r->idiag_wqueue = READ_ONCE(lsk->sk_max_ack_backlog);
21862306a36Sopenharmony_ci		}
21962306a36Sopenharmony_ci	}
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	if (!info)
22262306a36Sopenharmony_ci		return;
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	mptcp_diag_fill_info(msk, info);
22562306a36Sopenharmony_ci}
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_cistatic const struct inet_diag_handler mptcp_diag_handler = {
22862306a36Sopenharmony_ci	.dump		 = mptcp_diag_dump,
22962306a36Sopenharmony_ci	.dump_one	 = mptcp_diag_dump_one,
23062306a36Sopenharmony_ci	.idiag_get_info  = mptcp_diag_get_info,
23162306a36Sopenharmony_ci	.idiag_type	 = IPPROTO_MPTCP,
23262306a36Sopenharmony_ci	.idiag_info_size = sizeof(struct mptcp_info),
23362306a36Sopenharmony_ci};
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_cistatic int __init mptcp_diag_init(void)
23662306a36Sopenharmony_ci{
23762306a36Sopenharmony_ci	return inet_diag_register(&mptcp_diag_handler);
23862306a36Sopenharmony_ci}
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_cistatic void __exit mptcp_diag_exit(void)
24162306a36Sopenharmony_ci{
24262306a36Sopenharmony_ci	inet_diag_unregister(&mptcp_diag_handler);
24362306a36Sopenharmony_ci}
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_cimodule_init(mptcp_diag_init);
24662306a36Sopenharmony_cimodule_exit(mptcp_diag_exit);
24762306a36Sopenharmony_ciMODULE_LICENSE("GPL");
24862306a36Sopenharmony_ciMODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-262 /* AF_INET - IPPROTO_MPTCP */);
249