xref: /kernel/linux/linux-5.10/net/smc/smc_close.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4 *
5 *  Socket Closing - normal and abnormal
6 *
7 *  Copyright IBM Corp. 2016
8 *
9 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10 */
11
12#include <linux/workqueue.h>
13#include <linux/sched/signal.h>
14
15#include <net/sock.h>
16#include <net/tcp.h>
17
18#include "smc.h"
19#include "smc_tx.h"
20#include "smc_cdc.h"
21#include "smc_close.h"
22
23/* release the clcsock that is assigned to the smc_sock */
24void smc_clcsock_release(struct smc_sock *smc)
25{
26	struct socket *tcp;
27
28	if (smc->listen_smc && current_work() != &smc->smc_listen_work)
29		cancel_work_sync(&smc->smc_listen_work);
30	mutex_lock(&smc->clcsock_release_lock);
31	if (smc->clcsock) {
32		tcp = smc->clcsock;
33		smc->clcsock = NULL;
34		sock_release(tcp);
35	}
36	mutex_unlock(&smc->clcsock_release_lock);
37}
38
39static void smc_close_cleanup_listen(struct sock *parent)
40{
41	struct sock *sk;
42
43	/* Close non-accepted connections */
44	while ((sk = smc_accept_dequeue(parent, NULL)))
45		smc_close_non_accepted(sk);
46}
47
48/* wait for sndbuf data being transmitted */
49static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
50{
51	DEFINE_WAIT_FUNC(wait, woken_wake_function);
52	struct sock *sk = &smc->sk;
53
54	if (!timeout)
55		return;
56
57	if (!smc_tx_prepared_sends(&smc->conn))
58		return;
59
60	smc->wait_close_tx_prepared = 1;
61	add_wait_queue(sk_sleep(sk), &wait);
62	while (!signal_pending(current) && timeout) {
63		int rc;
64
65		rc = sk_wait_event(sk, &timeout,
66				   !smc_tx_prepared_sends(&smc->conn) ||
67				   READ_ONCE(sk->sk_err) == ECONNABORTED ||
68				   READ_ONCE(sk->sk_err) == ECONNRESET ||
69				   smc->conn.killed,
70				   &wait);
71		if (rc)
72			break;
73	}
74	remove_wait_queue(sk_sleep(sk), &wait);
75	smc->wait_close_tx_prepared = 0;
76}
77
78void smc_close_wake_tx_prepared(struct smc_sock *smc)
79{
80	if (smc->wait_close_tx_prepared)
81		/* wake up socket closing */
82		smc->sk.sk_state_change(&smc->sk);
83}
84
85static int smc_close_wr(struct smc_connection *conn)
86{
87	conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
88
89	return smc_cdc_get_slot_and_msg_send(conn);
90}
91
92static int smc_close_final(struct smc_connection *conn)
93{
94	if (atomic_read(&conn->bytes_to_rcv))
95		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
96	else
97		conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
98	if (conn->killed)
99		return -EPIPE;
100
101	return smc_cdc_get_slot_and_msg_send(conn);
102}
103
104int smc_close_abort(struct smc_connection *conn)
105{
106	conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
107
108	return smc_cdc_get_slot_and_msg_send(conn);
109}
110
111static void smc_close_cancel_work(struct smc_sock *smc)
112{
113	struct sock *sk = &smc->sk;
114
115	release_sock(sk);
116	if (cancel_work_sync(&smc->conn.close_work))
117		sock_put(sk);
118	cancel_delayed_work_sync(&smc->conn.tx_work);
119	lock_sock(sk);
120}
121
122/* terminate smc socket abnormally - active abort
123 * link group is terminated, i.e. RDMA communication no longer possible
124 */
125void smc_close_active_abort(struct smc_sock *smc)
126{
127	struct sock *sk = &smc->sk;
128	bool release_clcsock = false;
129
130	if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
131		sk->sk_err = ECONNABORTED;
132		if (smc->clcsock && smc->clcsock->sk)
133			tcp_abort(smc->clcsock->sk, ECONNABORTED);
134	}
135	switch (sk->sk_state) {
136	case SMC_ACTIVE:
137	case SMC_APPCLOSEWAIT1:
138	case SMC_APPCLOSEWAIT2:
139		sk->sk_state = SMC_PEERABORTWAIT;
140		smc_close_cancel_work(smc);
141		if (sk->sk_state != SMC_PEERABORTWAIT)
142			break;
143		sk->sk_state = SMC_CLOSED;
144		sock_put(sk); /* (postponed) passive closing */
145		break;
146	case SMC_PEERCLOSEWAIT1:
147	case SMC_PEERCLOSEWAIT2:
148	case SMC_PEERFINCLOSEWAIT:
149		sk->sk_state = SMC_PEERABORTWAIT;
150		smc_close_cancel_work(smc);
151		if (sk->sk_state != SMC_PEERABORTWAIT)
152			break;
153		sk->sk_state = SMC_CLOSED;
154		smc_conn_free(&smc->conn);
155		release_clcsock = true;
156		sock_put(sk); /* passive closing */
157		break;
158	case SMC_PROCESSABORT:
159	case SMC_APPFINCLOSEWAIT:
160		sk->sk_state = SMC_PEERABORTWAIT;
161		smc_close_cancel_work(smc);
162		if (sk->sk_state != SMC_PEERABORTWAIT)
163			break;
164		sk->sk_state = SMC_CLOSED;
165		smc_conn_free(&smc->conn);
166		release_clcsock = true;
167		break;
168	case SMC_INIT:
169	case SMC_PEERABORTWAIT:
170	case SMC_CLOSED:
171		break;
172	}
173
174	smc_sock_set_flag(sk, SOCK_DEAD);
175	sk->sk_state_change(sk);
176
177	if (release_clcsock) {
178		release_sock(sk);
179		smc_clcsock_release(smc);
180		lock_sock(sk);
181	}
182}
183
184static inline bool smc_close_sent_any_close(struct smc_connection *conn)
185{
186	return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
187	       conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
188}
189
190int smc_close_active(struct smc_sock *smc)
191{
192	struct smc_cdc_conn_state_flags *txflags =
193		&smc->conn.local_tx_ctrl.conn_state_flags;
194	struct smc_connection *conn = &smc->conn;
195	struct sock *sk = &smc->sk;
196	int old_state;
197	long timeout;
198	int rc = 0;
199	int rc1 = 0;
200
201	timeout = current->flags & PF_EXITING ?
202		  0 : sock_flag(sk, SOCK_LINGER) ?
203		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
204
205	old_state = sk->sk_state;
206again:
207	switch (sk->sk_state) {
208	case SMC_INIT:
209		sk->sk_state = SMC_CLOSED;
210		break;
211	case SMC_LISTEN:
212		sk->sk_state = SMC_CLOSED;
213		sk->sk_state_change(sk); /* wake up accept */
214		if (smc->clcsock && smc->clcsock->sk) {
215			smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
216			smc->clcsock->sk->sk_user_data = NULL;
217			rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
218		}
219		smc_close_cleanup_listen(sk);
220		release_sock(sk);
221		flush_work(&smc->tcp_listen_work);
222		lock_sock(sk);
223		break;
224	case SMC_ACTIVE:
225		smc_close_stream_wait(smc, timeout);
226		release_sock(sk);
227		cancel_delayed_work_sync(&conn->tx_work);
228		lock_sock(sk);
229		if (sk->sk_state == SMC_ACTIVE) {
230			/* send close request */
231			rc = smc_close_final(conn);
232			sk->sk_state = SMC_PEERCLOSEWAIT1;
233
234			/* actively shutdown clcsock before peer close it,
235			 * prevent peer from entering TIME_WAIT state.
236			 */
237			if (smc->clcsock && smc->clcsock->sk) {
238				rc1 = kernel_sock_shutdown(smc->clcsock,
239							   SHUT_RDWR);
240				rc = rc ? rc : rc1;
241			}
242		} else {
243			/* peer event has changed the state */
244			goto again;
245		}
246		break;
247	case SMC_APPFINCLOSEWAIT:
248		/* socket already shutdown wr or both (active close) */
249		if (txflags->peer_done_writing &&
250		    !smc_close_sent_any_close(conn)) {
251			/* just shutdown wr done, send close request */
252			rc = smc_close_final(conn);
253		}
254		sk->sk_state = SMC_CLOSED;
255		break;
256	case SMC_APPCLOSEWAIT1:
257	case SMC_APPCLOSEWAIT2:
258		if (!smc_cdc_rxed_any_close(conn))
259			smc_close_stream_wait(smc, timeout);
260		release_sock(sk);
261		cancel_delayed_work_sync(&conn->tx_work);
262		lock_sock(sk);
263		if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
264		    sk->sk_state != SMC_APPCLOSEWAIT2)
265			goto again;
266		/* confirm close from peer */
267		rc = smc_close_final(conn);
268		if (smc_cdc_rxed_any_close(conn)) {
269			/* peer has closed the socket already */
270			sk->sk_state = SMC_CLOSED;
271			sock_put(sk); /* postponed passive closing */
272		} else {
273			/* peer has just issued a shutdown write */
274			sk->sk_state = SMC_PEERFINCLOSEWAIT;
275		}
276		break;
277	case SMC_PEERCLOSEWAIT1:
278	case SMC_PEERCLOSEWAIT2:
279		if (txflags->peer_done_writing &&
280		    !smc_close_sent_any_close(conn)) {
281			/* just shutdown wr done, send close request */
282			rc = smc_close_final(conn);
283		}
284		/* peer sending PeerConnectionClosed will cause transition */
285		break;
286	case SMC_PEERFINCLOSEWAIT:
287		/* peer sending PeerConnectionClosed will cause transition */
288		break;
289	case SMC_PROCESSABORT:
290		rc = smc_close_abort(conn);
291		sk->sk_state = SMC_CLOSED;
292		break;
293	case SMC_PEERABORTWAIT:
294		sk->sk_state = SMC_CLOSED;
295		break;
296	case SMC_CLOSED:
297		/* nothing to do, add tracing in future patch */
298		break;
299	}
300
301	if (old_state != sk->sk_state)
302		sk->sk_state_change(sk);
303	return rc;
304}
305
306static void smc_close_passive_abort_received(struct smc_sock *smc)
307{
308	struct smc_cdc_conn_state_flags *txflags =
309		&smc->conn.local_tx_ctrl.conn_state_flags;
310	struct sock *sk = &smc->sk;
311
312	switch (sk->sk_state) {
313	case SMC_INIT:
314	case SMC_ACTIVE:
315	case SMC_APPCLOSEWAIT1:
316		sk->sk_state = SMC_PROCESSABORT;
317		sock_put(sk); /* passive closing */
318		break;
319	case SMC_APPFINCLOSEWAIT:
320		sk->sk_state = SMC_PROCESSABORT;
321		break;
322	case SMC_PEERCLOSEWAIT1:
323	case SMC_PEERCLOSEWAIT2:
324		if (txflags->peer_done_writing &&
325		    !smc_close_sent_any_close(&smc->conn))
326			/* just shutdown, but not yet closed locally */
327			sk->sk_state = SMC_PROCESSABORT;
328		else
329			sk->sk_state = SMC_CLOSED;
330		sock_put(sk); /* passive closing */
331		break;
332	case SMC_APPCLOSEWAIT2:
333	case SMC_PEERFINCLOSEWAIT:
334		sk->sk_state = SMC_CLOSED;
335		sock_put(sk); /* passive closing */
336		break;
337	case SMC_PEERABORTWAIT:
338		sk->sk_state = SMC_CLOSED;
339		break;
340	case SMC_PROCESSABORT:
341	/* nothing to do, add tracing in future patch */
342		break;
343	}
344}
345
346/* Either some kind of closing has been received: peer_conn_closed,
347 * peer_conn_abort, or peer_done_writing
348 * or the link group of the connection terminates abnormally.
349 */
350static void smc_close_passive_work(struct work_struct *work)
351{
352	struct smc_connection *conn = container_of(work,
353						   struct smc_connection,
354						   close_work);
355	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
356	struct smc_cdc_conn_state_flags *rxflags;
357	bool release_clcsock = false;
358	struct sock *sk = &smc->sk;
359	int old_state;
360
361	lock_sock(sk);
362	old_state = sk->sk_state;
363
364	rxflags = &conn->local_rx_ctrl.conn_state_flags;
365	if (rxflags->peer_conn_abort) {
366		/* peer has not received all data */
367		smc_close_passive_abort_received(smc);
368		release_sock(&smc->sk);
369		cancel_delayed_work_sync(&conn->tx_work);
370		lock_sock(&smc->sk);
371		goto wakeup;
372	}
373
374	switch (sk->sk_state) {
375	case SMC_INIT:
376		sk->sk_state = SMC_APPCLOSEWAIT1;
377		break;
378	case SMC_ACTIVE:
379		sk->sk_state = SMC_APPCLOSEWAIT1;
380		/* postpone sock_put() for passive closing to cover
381		 * received SEND_SHUTDOWN as well
382		 */
383		break;
384	case SMC_PEERCLOSEWAIT1:
385		if (rxflags->peer_done_writing)
386			sk->sk_state = SMC_PEERCLOSEWAIT2;
387		fallthrough;
388		/* to check for closing */
389	case SMC_PEERCLOSEWAIT2:
390		if (!smc_cdc_rxed_any_close(conn))
391			break;
392		if (sock_flag(sk, SOCK_DEAD) &&
393		    smc_close_sent_any_close(conn)) {
394			/* smc_release has already been called locally */
395			sk->sk_state = SMC_CLOSED;
396		} else {
397			/* just shutdown, but not yet closed locally */
398			sk->sk_state = SMC_APPFINCLOSEWAIT;
399		}
400		sock_put(sk); /* passive closing */
401		break;
402	case SMC_PEERFINCLOSEWAIT:
403		if (smc_cdc_rxed_any_close(conn)) {
404			sk->sk_state = SMC_CLOSED;
405			sock_put(sk); /* passive closing */
406		}
407		break;
408	case SMC_APPCLOSEWAIT1:
409	case SMC_APPCLOSEWAIT2:
410		/* postpone sock_put() for passive closing to cover
411		 * received SEND_SHUTDOWN as well
412		 */
413		break;
414	case SMC_APPFINCLOSEWAIT:
415	case SMC_PEERABORTWAIT:
416	case SMC_PROCESSABORT:
417	case SMC_CLOSED:
418		/* nothing to do, add tracing in future patch */
419		break;
420	}
421
422wakeup:
423	sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
424	sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
425
426	if (old_state != sk->sk_state) {
427		sk->sk_state_change(sk);
428		if ((sk->sk_state == SMC_CLOSED) &&
429		    (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
430			smc_conn_free(conn);
431			if (smc->clcsock)
432				release_clcsock = true;
433		}
434	}
435	release_sock(sk);
436	if (release_clcsock)
437		smc_clcsock_release(smc);
438	sock_put(sk); /* sock_hold done by schedulers of close_work */
439}
440
441int smc_close_shutdown_write(struct smc_sock *smc)
442{
443	struct smc_connection *conn = &smc->conn;
444	struct sock *sk = &smc->sk;
445	int old_state;
446	long timeout;
447	int rc = 0;
448
449	timeout = current->flags & PF_EXITING ?
450		  0 : sock_flag(sk, SOCK_LINGER) ?
451		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
452
453	old_state = sk->sk_state;
454again:
455	switch (sk->sk_state) {
456	case SMC_ACTIVE:
457		smc_close_stream_wait(smc, timeout);
458		release_sock(sk);
459		cancel_delayed_work_sync(&conn->tx_work);
460		lock_sock(sk);
461		if (sk->sk_state != SMC_ACTIVE)
462			goto again;
463		/* send close wr request */
464		rc = smc_close_wr(conn);
465		sk->sk_state = SMC_PEERCLOSEWAIT1;
466		break;
467	case SMC_APPCLOSEWAIT1:
468		/* passive close */
469		if (!smc_cdc_rxed_any_close(conn))
470			smc_close_stream_wait(smc, timeout);
471		release_sock(sk);
472		cancel_delayed_work_sync(&conn->tx_work);
473		lock_sock(sk);
474		if (sk->sk_state != SMC_APPCLOSEWAIT1)
475			goto again;
476		/* confirm close from peer */
477		rc = smc_close_wr(conn);
478		sk->sk_state = SMC_APPCLOSEWAIT2;
479		break;
480	case SMC_APPCLOSEWAIT2:
481	case SMC_PEERFINCLOSEWAIT:
482	case SMC_PEERCLOSEWAIT1:
483	case SMC_PEERCLOSEWAIT2:
484	case SMC_APPFINCLOSEWAIT:
485	case SMC_PROCESSABORT:
486	case SMC_PEERABORTWAIT:
487		/* nothing to do, add tracing in future patch */
488		break;
489	}
490
491	if (old_state != sk->sk_state)
492		sk->sk_state_change(sk);
493	return rc;
494}
495
496/* Initialize close properties on connection establishment. */
497void smc_close_init(struct smc_sock *smc)
498{
499	INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
500}
501