162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * VMware vSockets Driver
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/types.h>
962306a36Sopenharmony_ci#include <linux/socket.h>
1062306a36Sopenharmony_ci#include <linux/stddef.h>
1162306a36Sopenharmony_ci#include <net/sock.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include "vmci_transport_notify.h"
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name)
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_cistatic bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
1862306a36Sopenharmony_ci{
1962306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
2062306a36Sopenharmony_ci	bool retval;
2162306a36Sopenharmony_ci	u64 notify_limit;
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci	if (!PKT_FIELD(vsk, peer_waiting_write))
2462306a36Sopenharmony_ci		return false;
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
2762306a36Sopenharmony_ci	/* When the sender blocks, we take that as a sign that the sender is
2862306a36Sopenharmony_ci	 * faster than the receiver. To reduce the transmit rate of the sender,
2962306a36Sopenharmony_ci	 * we delay the sending of the read notification by decreasing the
3062306a36Sopenharmony_ci	 * write_notify_window. The notification is delayed until the number of
3162306a36Sopenharmony_ci	 * bytes used in the queue drops below the write_notify_window.
3262306a36Sopenharmony_ci	 */
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci	if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
3562306a36Sopenharmony_ci		PKT_FIELD(vsk, peer_waiting_write_detected) = true;
3662306a36Sopenharmony_ci		if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
3762306a36Sopenharmony_ci			PKT_FIELD(vsk, write_notify_window) =
3862306a36Sopenharmony_ci			    PKT_FIELD(vsk, write_notify_min_window);
3962306a36Sopenharmony_ci		} else {
4062306a36Sopenharmony_ci			PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
4162306a36Sopenharmony_ci			if (PKT_FIELD(vsk, write_notify_window) <
4262306a36Sopenharmony_ci			    PKT_FIELD(vsk, write_notify_min_window))
4362306a36Sopenharmony_ci				PKT_FIELD(vsk, write_notify_window) =
4462306a36Sopenharmony_ci				    PKT_FIELD(vsk, write_notify_min_window);
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci		}
4762306a36Sopenharmony_ci	}
4862306a36Sopenharmony_ci	notify_limit = vmci_trans(vsk)->consume_size -
4962306a36Sopenharmony_ci		PKT_FIELD(vsk, write_notify_window);
5062306a36Sopenharmony_ci#else
5162306a36Sopenharmony_ci	notify_limit = 0;
5262306a36Sopenharmony_ci#endif
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci	/* For now we ignore the wait information and just see if the free
5562306a36Sopenharmony_ci	 * space exceeds the notify limit.  Note that improving this function
5662306a36Sopenharmony_ci	 * to be more intelligent will not require a protocol change and will
5762306a36Sopenharmony_ci	 * retain compatibility between endpoints with mixed versions of this
5862306a36Sopenharmony_ci	 * function.
5962306a36Sopenharmony_ci	 *
6062306a36Sopenharmony_ci	 * The notify_limit is used to delay notifications in the case where
6162306a36Sopenharmony_ci	 * flow control is enabled. Below the test is expressed in terms of
6262306a36Sopenharmony_ci	 * free space in the queue: if free_space > ConsumeSize -
6362306a36Sopenharmony_ci	 * write_notify_window then notify An alternate way of expressing this
6462306a36Sopenharmony_ci	 * is to rewrite the expression to use the data ready in the receive
6562306a36Sopenharmony_ci	 * queue: if write_notify_window > bufferReady then notify as
6662306a36Sopenharmony_ci	 * free_space == ConsumeSize - bufferReady.
6762306a36Sopenharmony_ci	 */
6862306a36Sopenharmony_ci	retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
6962306a36Sopenharmony_ci		notify_limit;
7062306a36Sopenharmony_ci#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
7162306a36Sopenharmony_ci	if (retval) {
7262306a36Sopenharmony_ci		/*
7362306a36Sopenharmony_ci		 * Once we notify the peer, we reset the detected flag so the
7462306a36Sopenharmony_ci		 * next wait will again cause a decrease in the window size.
7562306a36Sopenharmony_ci		 */
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci		PKT_FIELD(vsk, peer_waiting_write_detected) = false;
7862306a36Sopenharmony_ci	}
7962306a36Sopenharmony_ci#endif
8062306a36Sopenharmony_ci	return retval;
8162306a36Sopenharmony_ci#else
8262306a36Sopenharmony_ci	return true;
8362306a36Sopenharmony_ci#endif
8462306a36Sopenharmony_ci}
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_cistatic bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk)
8762306a36Sopenharmony_ci{
8862306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
8962306a36Sopenharmony_ci	if (!PKT_FIELD(vsk, peer_waiting_read))
9062306a36Sopenharmony_ci		return false;
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	/* For now we ignore the wait information and just see if there is any
9362306a36Sopenharmony_ci	 * data for our peer to read.  Note that improving this function to be
9462306a36Sopenharmony_ci	 * more intelligent will not require a protocol change and will retain
9562306a36Sopenharmony_ci	 * compatibility between endpoints with mixed versions of this
9662306a36Sopenharmony_ci	 * function.
9762306a36Sopenharmony_ci	 */
9862306a36Sopenharmony_ci	return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0;
9962306a36Sopenharmony_ci#else
10062306a36Sopenharmony_ci	return true;
10162306a36Sopenharmony_ci#endif
10262306a36Sopenharmony_ci}
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_cistatic void
10562306a36Sopenharmony_civmci_transport_handle_waiting_read(struct sock *sk,
10662306a36Sopenharmony_ci				   struct vmci_transport_packet *pkt,
10762306a36Sopenharmony_ci				   bool bottom_half,
10862306a36Sopenharmony_ci				   struct sockaddr_vm *dst,
10962306a36Sopenharmony_ci				   struct sockaddr_vm *src)
11062306a36Sopenharmony_ci{
11162306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
11262306a36Sopenharmony_ci	struct vsock_sock *vsk;
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	vsk = vsock_sk(sk);
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	PKT_FIELD(vsk, peer_waiting_read) = true;
11762306a36Sopenharmony_ci	memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait,
11862306a36Sopenharmony_ci	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	if (vmci_transport_notify_waiting_read(vsk)) {
12162306a36Sopenharmony_ci		bool sent;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci		if (bottom_half)
12462306a36Sopenharmony_ci			sent = vmci_transport_send_wrote_bh(dst, src) > 0;
12562306a36Sopenharmony_ci		else
12662306a36Sopenharmony_ci			sent = vmci_transport_send_wrote(sk) > 0;
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci		if (sent)
12962306a36Sopenharmony_ci			PKT_FIELD(vsk, peer_waiting_read) = false;
13062306a36Sopenharmony_ci	}
13162306a36Sopenharmony_ci#endif
13262306a36Sopenharmony_ci}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_cistatic void
13562306a36Sopenharmony_civmci_transport_handle_waiting_write(struct sock *sk,
13662306a36Sopenharmony_ci				    struct vmci_transport_packet *pkt,
13762306a36Sopenharmony_ci				    bool bottom_half,
13862306a36Sopenharmony_ci				    struct sockaddr_vm *dst,
13962306a36Sopenharmony_ci				    struct sockaddr_vm *src)
14062306a36Sopenharmony_ci{
14162306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
14262306a36Sopenharmony_ci	struct vsock_sock *vsk;
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	vsk = vsock_sk(sk);
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	PKT_FIELD(vsk, peer_waiting_write) = true;
14762306a36Sopenharmony_ci	memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait,
14862306a36Sopenharmony_ci	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	if (vmci_transport_notify_waiting_write(vsk)) {
15162306a36Sopenharmony_ci		bool sent;
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci		if (bottom_half)
15462306a36Sopenharmony_ci			sent = vmci_transport_send_read_bh(dst, src) > 0;
15562306a36Sopenharmony_ci		else
15662306a36Sopenharmony_ci			sent = vmci_transport_send_read(sk) > 0;
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci		if (sent)
15962306a36Sopenharmony_ci			PKT_FIELD(vsk, peer_waiting_write) = false;
16062306a36Sopenharmony_ci	}
16162306a36Sopenharmony_ci#endif
16262306a36Sopenharmony_ci}
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_cistatic void
16562306a36Sopenharmony_civmci_transport_handle_read(struct sock *sk,
16662306a36Sopenharmony_ci			   struct vmci_transport_packet *pkt,
16762306a36Sopenharmony_ci			   bool bottom_half,
16862306a36Sopenharmony_ci			   struct sockaddr_vm *dst, struct sockaddr_vm *src)
16962306a36Sopenharmony_ci{
17062306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
17162306a36Sopenharmony_ci	struct vsock_sock *vsk;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	vsk = vsock_sk(sk);
17462306a36Sopenharmony_ci	PKT_FIELD(vsk, sent_waiting_write) = false;
17562306a36Sopenharmony_ci#endif
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	sk->sk_write_space(sk);
17862306a36Sopenharmony_ci}
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_cistatic bool send_waiting_read(struct sock *sk, u64 room_needed)
18162306a36Sopenharmony_ci{
18262306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
18362306a36Sopenharmony_ci	struct vsock_sock *vsk;
18462306a36Sopenharmony_ci	struct vmci_transport_waiting_info waiting_info;
18562306a36Sopenharmony_ci	u64 tail;
18662306a36Sopenharmony_ci	u64 head;
18762306a36Sopenharmony_ci	u64 room_left;
18862306a36Sopenharmony_ci	bool ret;
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	vsk = vsock_sk(sk);
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	if (PKT_FIELD(vsk, sent_waiting_read))
19362306a36Sopenharmony_ci		return true;
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	if (PKT_FIELD(vsk, write_notify_window) <
19662306a36Sopenharmony_ci			vmci_trans(vsk)->consume_size)
19762306a36Sopenharmony_ci		PKT_FIELD(vsk, write_notify_window) =
19862306a36Sopenharmony_ci		    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
19962306a36Sopenharmony_ci			vmci_trans(vsk)->consume_size);
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head);
20262306a36Sopenharmony_ci	room_left = vmci_trans(vsk)->consume_size - head;
20362306a36Sopenharmony_ci	if (room_needed >= room_left) {
20462306a36Sopenharmony_ci		waiting_info.offset = room_needed - room_left;
20562306a36Sopenharmony_ci		waiting_info.generation =
20662306a36Sopenharmony_ci		    PKT_FIELD(vsk, consume_q_generation) + 1;
20762306a36Sopenharmony_ci	} else {
20862306a36Sopenharmony_ci		waiting_info.offset = head + room_needed;
20962306a36Sopenharmony_ci		waiting_info.generation = PKT_FIELD(vsk, consume_q_generation);
21062306a36Sopenharmony_ci	}
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0;
21362306a36Sopenharmony_ci	if (ret)
21462306a36Sopenharmony_ci		PKT_FIELD(vsk, sent_waiting_read) = true;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	return ret;
21762306a36Sopenharmony_ci#else
21862306a36Sopenharmony_ci	return true;
21962306a36Sopenharmony_ci#endif
22062306a36Sopenharmony_ci}
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_cistatic bool send_waiting_write(struct sock *sk, u64 room_needed)
22362306a36Sopenharmony_ci{
22462306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
22562306a36Sopenharmony_ci	struct vsock_sock *vsk;
22662306a36Sopenharmony_ci	struct vmci_transport_waiting_info waiting_info;
22762306a36Sopenharmony_ci	u64 tail;
22862306a36Sopenharmony_ci	u64 head;
22962306a36Sopenharmony_ci	u64 room_left;
23062306a36Sopenharmony_ci	bool ret;
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	vsk = vsock_sk(sk);
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	if (PKT_FIELD(vsk, sent_waiting_write))
23562306a36Sopenharmony_ci		return true;
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head);
23862306a36Sopenharmony_ci	room_left = vmci_trans(vsk)->produce_size - tail;
23962306a36Sopenharmony_ci	if (room_needed + 1 >= room_left) {
24062306a36Sopenharmony_ci		/* Wraps around to current generation. */
24162306a36Sopenharmony_ci		waiting_info.offset = room_needed + 1 - room_left;
24262306a36Sopenharmony_ci		waiting_info.generation = PKT_FIELD(vsk, produce_q_generation);
24362306a36Sopenharmony_ci	} else {
24462306a36Sopenharmony_ci		waiting_info.offset = tail + room_needed + 1;
24562306a36Sopenharmony_ci		waiting_info.generation =
24662306a36Sopenharmony_ci		    PKT_FIELD(vsk, produce_q_generation) - 1;
24762306a36Sopenharmony_ci	}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0;
25062306a36Sopenharmony_ci	if (ret)
25162306a36Sopenharmony_ci		PKT_FIELD(vsk, sent_waiting_write) = true;
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	return ret;
25462306a36Sopenharmony_ci#else
25562306a36Sopenharmony_ci	return true;
25662306a36Sopenharmony_ci#endif
25762306a36Sopenharmony_ci}
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_cistatic int vmci_transport_send_read_notification(struct sock *sk)
26062306a36Sopenharmony_ci{
26162306a36Sopenharmony_ci	struct vsock_sock *vsk;
26262306a36Sopenharmony_ci	bool sent_read;
26362306a36Sopenharmony_ci	unsigned int retries;
26462306a36Sopenharmony_ci	int err;
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	vsk = vsock_sk(sk);
26762306a36Sopenharmony_ci	sent_read = false;
26862306a36Sopenharmony_ci	retries = 0;
26962306a36Sopenharmony_ci	err = 0;
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	if (vmci_transport_notify_waiting_write(vsk)) {
27262306a36Sopenharmony_ci		/* Notify the peer that we have read, retrying the send on
27362306a36Sopenharmony_ci		 * failure up to our maximum value.  XXX For now we just log
27462306a36Sopenharmony_ci		 * the failure, but later we should schedule a work item to
27562306a36Sopenharmony_ci		 * handle the resend until it succeeds.  That would require
27662306a36Sopenharmony_ci		 * keeping track of work items in the vsk and cleaning them up
27762306a36Sopenharmony_ci		 * upon socket close.
27862306a36Sopenharmony_ci		 */
27962306a36Sopenharmony_ci		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
28062306a36Sopenharmony_ci		       !sent_read &&
28162306a36Sopenharmony_ci		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
28262306a36Sopenharmony_ci			err = vmci_transport_send_read(sk);
28362306a36Sopenharmony_ci			if (err >= 0)
28462306a36Sopenharmony_ci				sent_read = true;
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci			retries++;
28762306a36Sopenharmony_ci		}
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS)
29062306a36Sopenharmony_ci			pr_err("%p unable to send read notify to peer\n", sk);
29162306a36Sopenharmony_ci		else
29262306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
29362306a36Sopenharmony_ci			PKT_FIELD(vsk, peer_waiting_write) = false;
29462306a36Sopenharmony_ci#endif
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	}
29762306a36Sopenharmony_ci	return err;
29862306a36Sopenharmony_ci}
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_cistatic void
30162306a36Sopenharmony_civmci_transport_handle_wrote(struct sock *sk,
30262306a36Sopenharmony_ci			    struct vmci_transport_packet *pkt,
30362306a36Sopenharmony_ci			    bool bottom_half,
30462306a36Sopenharmony_ci			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
30562306a36Sopenharmony_ci{
30662306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
30762306a36Sopenharmony_ci	struct vsock_sock *vsk = vsock_sk(sk);
30862306a36Sopenharmony_ci	PKT_FIELD(vsk, sent_waiting_read) = false;
30962306a36Sopenharmony_ci#endif
31062306a36Sopenharmony_ci	vsock_data_ready(sk);
31162306a36Sopenharmony_ci}
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_cistatic void vmci_transport_notify_pkt_socket_init(struct sock *sk)
31462306a36Sopenharmony_ci{
31562306a36Sopenharmony_ci	struct vsock_sock *vsk = vsock_sk(sk);
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
31862306a36Sopenharmony_ci	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
31962306a36Sopenharmony_ci	PKT_FIELD(vsk, peer_waiting_read) = false;
32062306a36Sopenharmony_ci	PKT_FIELD(vsk, peer_waiting_write) = false;
32162306a36Sopenharmony_ci	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
32262306a36Sopenharmony_ci	PKT_FIELD(vsk, sent_waiting_read) = false;
32362306a36Sopenharmony_ci	PKT_FIELD(vsk, sent_waiting_write) = false;
32462306a36Sopenharmony_ci	PKT_FIELD(vsk, produce_q_generation) = 0;
32562306a36Sopenharmony_ci	PKT_FIELD(vsk, consume_q_generation) = 0;
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0,
32862306a36Sopenharmony_ci	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
32962306a36Sopenharmony_ci	memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0,
33062306a36Sopenharmony_ci	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
33162306a36Sopenharmony_ci}
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_cistatic void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
33462306a36Sopenharmony_ci{
33562306a36Sopenharmony_ci}
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_cistatic int
33862306a36Sopenharmony_civmci_transport_notify_pkt_poll_in(struct sock *sk,
33962306a36Sopenharmony_ci				  size_t target, bool *data_ready_now)
34062306a36Sopenharmony_ci{
34162306a36Sopenharmony_ci	struct vsock_sock *vsk = vsock_sk(sk);
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	if (vsock_stream_has_data(vsk) >= target) {
34462306a36Sopenharmony_ci		*data_ready_now = true;
34562306a36Sopenharmony_ci	} else {
34662306a36Sopenharmony_ci		/* We can't read right now because there is not enough data
34762306a36Sopenharmony_ci		 * in the queue. Ask for notifications when there is something
34862306a36Sopenharmony_ci		 * to read.
34962306a36Sopenharmony_ci		 */
35062306a36Sopenharmony_ci		if (sk->sk_state == TCP_ESTABLISHED) {
35162306a36Sopenharmony_ci			if (!send_waiting_read(sk, 1))
35262306a36Sopenharmony_ci				return -1;
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci		}
35562306a36Sopenharmony_ci		*data_ready_now = false;
35662306a36Sopenharmony_ci	}
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci	return 0;
35962306a36Sopenharmony_ci}
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_cistatic int
36262306a36Sopenharmony_civmci_transport_notify_pkt_poll_out(struct sock *sk,
36362306a36Sopenharmony_ci				   size_t target, bool *space_avail_now)
36462306a36Sopenharmony_ci{
36562306a36Sopenharmony_ci	s64 produce_q_free_space;
36662306a36Sopenharmony_ci	struct vsock_sock *vsk = vsock_sk(sk);
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	produce_q_free_space = vsock_stream_has_space(vsk);
36962306a36Sopenharmony_ci	if (produce_q_free_space > 0) {
37062306a36Sopenharmony_ci		*space_avail_now = true;
37162306a36Sopenharmony_ci		return 0;
37262306a36Sopenharmony_ci	} else if (produce_q_free_space == 0) {
37362306a36Sopenharmony_ci		/* This is a connected socket but we can't currently send data.
37462306a36Sopenharmony_ci		 * Notify the peer that we are waiting if the queue is full. We
37562306a36Sopenharmony_ci		 * only send a waiting write if the queue is full because
37662306a36Sopenharmony_ci		 * otherwise we end up in an infinite WAITING_WRITE, READ,
37762306a36Sopenharmony_ci		 * WAITING_WRITE, READ, etc. loop. Treat failing to send the
37862306a36Sopenharmony_ci		 * notification as a socket error, passing that back through
37962306a36Sopenharmony_ci		 * the mask.
38062306a36Sopenharmony_ci		 */
38162306a36Sopenharmony_ci		if (!send_waiting_write(sk, 1))
38262306a36Sopenharmony_ci			return -1;
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ci		*space_avail_now = false;
38562306a36Sopenharmony_ci	}
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	return 0;
38862306a36Sopenharmony_ci}
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_cistatic int
39162306a36Sopenharmony_civmci_transport_notify_pkt_recv_init(
39262306a36Sopenharmony_ci			struct sock *sk,
39362306a36Sopenharmony_ci			size_t target,
39462306a36Sopenharmony_ci			struct vmci_transport_recv_notify_data *data)
39562306a36Sopenharmony_ci{
39662306a36Sopenharmony_ci	struct vsock_sock *vsk = vsock_sk(sk);
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
39962306a36Sopenharmony_ci	data->consume_head = 0;
40062306a36Sopenharmony_ci	data->produce_tail = 0;
40162306a36Sopenharmony_ci#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
40262306a36Sopenharmony_ci	data->notify_on_block = false;
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
40562306a36Sopenharmony_ci		PKT_FIELD(vsk, write_notify_min_window) = target + 1;
40662306a36Sopenharmony_ci		if (PKT_FIELD(vsk, write_notify_window) <
40762306a36Sopenharmony_ci		    PKT_FIELD(vsk, write_notify_min_window)) {
40862306a36Sopenharmony_ci			/* If the current window is smaller than the new
40962306a36Sopenharmony_ci			 * minimal window size, we need to reevaluate whether
41062306a36Sopenharmony_ci			 * we need to notify the sender. If the number of ready
41162306a36Sopenharmony_ci			 * bytes are smaller than the new window, we need to
41262306a36Sopenharmony_ci			 * send a notification to the sender before we block.
41362306a36Sopenharmony_ci			 */
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci			PKT_FIELD(vsk, write_notify_window) =
41662306a36Sopenharmony_ci			    PKT_FIELD(vsk, write_notify_min_window);
41762306a36Sopenharmony_ci			data->notify_on_block = true;
41862306a36Sopenharmony_ci		}
41962306a36Sopenharmony_ci	}
42062306a36Sopenharmony_ci#endif
42162306a36Sopenharmony_ci#endif
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci	return 0;
42462306a36Sopenharmony_ci}
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_cistatic int
42762306a36Sopenharmony_civmci_transport_notify_pkt_recv_pre_block(
42862306a36Sopenharmony_ci				struct sock *sk,
42962306a36Sopenharmony_ci				size_t target,
43062306a36Sopenharmony_ci				struct vmci_transport_recv_notify_data *data)
43162306a36Sopenharmony_ci{
43262306a36Sopenharmony_ci	int err = 0;
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	/* Notify our peer that we are waiting for data to read. */
43562306a36Sopenharmony_ci	if (!send_waiting_read(sk, target)) {
43662306a36Sopenharmony_ci		err = -EHOSTUNREACH;
43762306a36Sopenharmony_ci		return err;
43862306a36Sopenharmony_ci	}
43962306a36Sopenharmony_ci#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
44062306a36Sopenharmony_ci	if (data->notify_on_block) {
44162306a36Sopenharmony_ci		err = vmci_transport_send_read_notification(sk);
44262306a36Sopenharmony_ci		if (err < 0)
44362306a36Sopenharmony_ci			return err;
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci		data->notify_on_block = false;
44662306a36Sopenharmony_ci	}
44762306a36Sopenharmony_ci#endif
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_ci	return err;
45062306a36Sopenharmony_ci}
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_cistatic int
45362306a36Sopenharmony_civmci_transport_notify_pkt_recv_pre_dequeue(
45462306a36Sopenharmony_ci				struct sock *sk,
45562306a36Sopenharmony_ci				size_t target,
45662306a36Sopenharmony_ci				struct vmci_transport_recv_notify_data *data)
45762306a36Sopenharmony_ci{
45862306a36Sopenharmony_ci	struct vsock_sock *vsk = vsock_sk(sk);
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	/* Now consume up to len bytes from the queue.  Note that since we have
46162306a36Sopenharmony_ci	 * the socket locked we should copy at least ready bytes.
46262306a36Sopenharmony_ci	 */
46362306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
46462306a36Sopenharmony_ci	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair,
46562306a36Sopenharmony_ci				       &data->produce_tail,
46662306a36Sopenharmony_ci				       &data->consume_head);
46762306a36Sopenharmony_ci#endif
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci	return 0;
47062306a36Sopenharmony_ci}
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_cistatic int
47362306a36Sopenharmony_civmci_transport_notify_pkt_recv_post_dequeue(
47462306a36Sopenharmony_ci				struct sock *sk,
47562306a36Sopenharmony_ci				size_t target,
47662306a36Sopenharmony_ci				ssize_t copied,
47762306a36Sopenharmony_ci				bool data_read,
47862306a36Sopenharmony_ci				struct vmci_transport_recv_notify_data *data)
47962306a36Sopenharmony_ci{
48062306a36Sopenharmony_ci	struct vsock_sock *vsk;
48162306a36Sopenharmony_ci	int err;
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	vsk = vsock_sk(sk);
48462306a36Sopenharmony_ci	err = 0;
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci	if (data_read) {
48762306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
48862306a36Sopenharmony_ci		/* Detect a wrap-around to maintain queue generation.  Note
48962306a36Sopenharmony_ci		 * that this is safe since we hold the socket lock across the
49062306a36Sopenharmony_ci		 * two queue pair operations.
49162306a36Sopenharmony_ci		 */
49262306a36Sopenharmony_ci		if (copied >=
49362306a36Sopenharmony_ci			vmci_trans(vsk)->consume_size - data->consume_head)
49462306a36Sopenharmony_ci			PKT_FIELD(vsk, consume_q_generation)++;
49562306a36Sopenharmony_ci#endif
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci		err = vmci_transport_send_read_notification(sk);
49862306a36Sopenharmony_ci		if (err < 0)
49962306a36Sopenharmony_ci			return err;
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	}
50262306a36Sopenharmony_ci	return err;
50362306a36Sopenharmony_ci}
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_cistatic int
50662306a36Sopenharmony_civmci_transport_notify_pkt_send_init(
50762306a36Sopenharmony_ci			struct sock *sk,
50862306a36Sopenharmony_ci			struct vmci_transport_send_notify_data *data)
50962306a36Sopenharmony_ci{
51062306a36Sopenharmony_ci#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
51162306a36Sopenharmony_ci	data->consume_head = 0;
51262306a36Sopenharmony_ci	data->produce_tail = 0;
51362306a36Sopenharmony_ci#endif
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	return 0;
51662306a36Sopenharmony_ci}
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_cistatic int
51962306a36Sopenharmony_civmci_transport_notify_pkt_send_pre_block(
52062306a36Sopenharmony_ci				struct sock *sk,
52162306a36Sopenharmony_ci				struct vmci_transport_send_notify_data *data)
52262306a36Sopenharmony_ci{
52362306a36Sopenharmony_ci	/* Notify our peer that we are waiting for room to write. */
52462306a36Sopenharmony_ci	if (!send_waiting_write(sk, 1))
52562306a36Sopenharmony_ci		return -EHOSTUNREACH;
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	return 0;
52862306a36Sopenharmony_ci}
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_cistatic int
53162306a36Sopenharmony_civmci_transport_notify_pkt_send_pre_enqueue(
53262306a36Sopenharmony_ci				struct sock *sk,
53362306a36Sopenharmony_ci				struct vmci_transport_send_notify_data *data)
53462306a36Sopenharmony_ci{
53562306a36Sopenharmony_ci	struct vsock_sock *vsk = vsock_sk(sk);
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
53862306a36Sopenharmony_ci	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair,
53962306a36Sopenharmony_ci				       &data->produce_tail,
54062306a36Sopenharmony_ci				       &data->consume_head);
54162306a36Sopenharmony_ci#endif
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci	return 0;
54462306a36Sopenharmony_ci}
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_cistatic int
54762306a36Sopenharmony_civmci_transport_notify_pkt_send_post_enqueue(
54862306a36Sopenharmony_ci				struct sock *sk,
54962306a36Sopenharmony_ci				ssize_t written,
55062306a36Sopenharmony_ci				struct vmci_transport_send_notify_data *data)
55162306a36Sopenharmony_ci{
55262306a36Sopenharmony_ci	int err = 0;
55362306a36Sopenharmony_ci	struct vsock_sock *vsk;
55462306a36Sopenharmony_ci	bool sent_wrote = false;
55562306a36Sopenharmony_ci	int retries = 0;
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	vsk = vsock_sk(sk);
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
56062306a36Sopenharmony_ci	/* Detect a wrap-around to maintain queue generation.  Note that this
56162306a36Sopenharmony_ci	 * is safe since we hold the socket lock across the two queue pair
56262306a36Sopenharmony_ci	 * operations.
56362306a36Sopenharmony_ci	 */
56462306a36Sopenharmony_ci	if (written >= vmci_trans(vsk)->produce_size - data->produce_tail)
56562306a36Sopenharmony_ci		PKT_FIELD(vsk, produce_q_generation)++;
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_ci#endif
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci	if (vmci_transport_notify_waiting_read(vsk)) {
57062306a36Sopenharmony_ci		/* Notify the peer that we have written, retrying the send on
57162306a36Sopenharmony_ci		 * failure up to our maximum value. See the XXX comment for the
57262306a36Sopenharmony_ci		 * corresponding piece of code in StreamRecvmsg() for potential
57362306a36Sopenharmony_ci		 * improvements.
57462306a36Sopenharmony_ci		 */
57562306a36Sopenharmony_ci		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
57662306a36Sopenharmony_ci		       !sent_wrote &&
57762306a36Sopenharmony_ci		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
57862306a36Sopenharmony_ci			err = vmci_transport_send_wrote(sk);
57962306a36Sopenharmony_ci			if (err >= 0)
58062306a36Sopenharmony_ci				sent_wrote = true;
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci			retries++;
58362306a36Sopenharmony_ci		}
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
58662306a36Sopenharmony_ci			pr_err("%p unable to send wrote notify to peer\n", sk);
58762306a36Sopenharmony_ci			return err;
58862306a36Sopenharmony_ci		} else {
58962306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
59062306a36Sopenharmony_ci			PKT_FIELD(vsk, peer_waiting_read) = false;
59162306a36Sopenharmony_ci#endif
59262306a36Sopenharmony_ci		}
59362306a36Sopenharmony_ci	}
59462306a36Sopenharmony_ci	return err;
59562306a36Sopenharmony_ci}
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_cistatic void
59862306a36Sopenharmony_civmci_transport_notify_pkt_handle_pkt(
59962306a36Sopenharmony_ci			struct sock *sk,
60062306a36Sopenharmony_ci			struct vmci_transport_packet *pkt,
60162306a36Sopenharmony_ci			bool bottom_half,
60262306a36Sopenharmony_ci			struct sockaddr_vm *dst,
60362306a36Sopenharmony_ci			struct sockaddr_vm *src, bool *pkt_processed)
60462306a36Sopenharmony_ci{
60562306a36Sopenharmony_ci	bool processed = false;
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_ci	switch (pkt->type) {
60862306a36Sopenharmony_ci	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
60962306a36Sopenharmony_ci		vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
61062306a36Sopenharmony_ci		processed = true;
61162306a36Sopenharmony_ci		break;
61262306a36Sopenharmony_ci	case VMCI_TRANSPORT_PACKET_TYPE_READ:
61362306a36Sopenharmony_ci		vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
61462306a36Sopenharmony_ci		processed = true;
61562306a36Sopenharmony_ci		break;
61662306a36Sopenharmony_ci	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
61762306a36Sopenharmony_ci		vmci_transport_handle_waiting_write(sk, pkt, bottom_half,
61862306a36Sopenharmony_ci						    dst, src);
61962306a36Sopenharmony_ci		processed = true;
62062306a36Sopenharmony_ci		break;
62162306a36Sopenharmony_ci
62262306a36Sopenharmony_ci	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
62362306a36Sopenharmony_ci		vmci_transport_handle_waiting_read(sk, pkt, bottom_half,
62462306a36Sopenharmony_ci						   dst, src);
62562306a36Sopenharmony_ci		processed = true;
62662306a36Sopenharmony_ci		break;
62762306a36Sopenharmony_ci	}
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci	if (pkt_processed)
63062306a36Sopenharmony_ci		*pkt_processed = processed;
63162306a36Sopenharmony_ci}
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_cistatic void vmci_transport_notify_pkt_process_request(struct sock *sk)
63462306a36Sopenharmony_ci{
63562306a36Sopenharmony_ci	struct vsock_sock *vsk = vsock_sk(sk);
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
63862306a36Sopenharmony_ci	if (vmci_trans(vsk)->consume_size <
63962306a36Sopenharmony_ci		PKT_FIELD(vsk, write_notify_min_window))
64062306a36Sopenharmony_ci		PKT_FIELD(vsk, write_notify_min_window) =
64162306a36Sopenharmony_ci			vmci_trans(vsk)->consume_size;
64262306a36Sopenharmony_ci}
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_cistatic void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
64562306a36Sopenharmony_ci{
64662306a36Sopenharmony_ci	struct vsock_sock *vsk = vsock_sk(sk);
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
64962306a36Sopenharmony_ci	if (vmci_trans(vsk)->consume_size <
65062306a36Sopenharmony_ci		PKT_FIELD(vsk, write_notify_min_window))
65162306a36Sopenharmony_ci		PKT_FIELD(vsk, write_notify_min_window) =
65262306a36Sopenharmony_ci			vmci_trans(vsk)->consume_size;
65362306a36Sopenharmony_ci}
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_ci/* Socket control packet based operations. */
65662306a36Sopenharmony_ciconst struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
65762306a36Sopenharmony_ci	.socket_init = vmci_transport_notify_pkt_socket_init,
65862306a36Sopenharmony_ci	.socket_destruct = vmci_transport_notify_pkt_socket_destruct,
65962306a36Sopenharmony_ci	.poll_in = vmci_transport_notify_pkt_poll_in,
66062306a36Sopenharmony_ci	.poll_out = vmci_transport_notify_pkt_poll_out,
66162306a36Sopenharmony_ci	.handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
66262306a36Sopenharmony_ci	.recv_init = vmci_transport_notify_pkt_recv_init,
66362306a36Sopenharmony_ci	.recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
66462306a36Sopenharmony_ci	.recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
66562306a36Sopenharmony_ci	.recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
66662306a36Sopenharmony_ci	.send_init = vmci_transport_notify_pkt_send_init,
66762306a36Sopenharmony_ci	.send_pre_block = vmci_transport_notify_pkt_send_pre_block,
66862306a36Sopenharmony_ci	.send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
66962306a36Sopenharmony_ci	.send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
67062306a36Sopenharmony_ci	.process_request = vmci_transport_notify_pkt_process_request,
67162306a36Sopenharmony_ci	.process_negotiate = vmci_transport_notify_pkt_process_negotiate,
67262306a36Sopenharmony_ci};
673