162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * VMware vSockets Driver 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/types.h> 962306a36Sopenharmony_ci#include <linux/socket.h> 1062306a36Sopenharmony_ci#include <linux/stddef.h> 1162306a36Sopenharmony_ci#include <net/sock.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include "vmci_transport_notify.h" 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name) 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_cistatic bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) 1862306a36Sopenharmony_ci{ 1962306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 2062306a36Sopenharmony_ci bool retval; 2162306a36Sopenharmony_ci u64 notify_limit; 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci if (!PKT_FIELD(vsk, peer_waiting_write)) 2462306a36Sopenharmony_ci return false; 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL 2762306a36Sopenharmony_ci /* When the sender blocks, we take that as a sign that the sender is 2862306a36Sopenharmony_ci * faster than the receiver. To reduce the transmit rate of the sender, 2962306a36Sopenharmony_ci * we delay the sending of the read notification by decreasing the 3062306a36Sopenharmony_ci * write_notify_window. The notification is delayed until the number of 3162306a36Sopenharmony_ci * bytes used in the queue drops below the write_notify_window. 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { 3562306a36Sopenharmony_ci PKT_FIELD(vsk, peer_waiting_write_detected) = true; 3662306a36Sopenharmony_ci if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { 3762306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_window) = 3862306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_min_window); 3962306a36Sopenharmony_ci } else { 4062306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; 4162306a36Sopenharmony_ci if (PKT_FIELD(vsk, write_notify_window) < 4262306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_min_window)) 4362306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_window) = 4462306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_min_window); 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci } 4762306a36Sopenharmony_ci } 4862306a36Sopenharmony_ci notify_limit = vmci_trans(vsk)->consume_size - 4962306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_window); 5062306a36Sopenharmony_ci#else 5162306a36Sopenharmony_ci notify_limit = 0; 5262306a36Sopenharmony_ci#endif 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci /* For now we ignore the wait information and just see if the free 5562306a36Sopenharmony_ci * space exceeds the notify limit. Note that improving this function 5662306a36Sopenharmony_ci * to be more intelligent will not require a protocol change and will 5762306a36Sopenharmony_ci * retain compatibility between endpoints with mixed versions of this 5862306a36Sopenharmony_ci * function. 5962306a36Sopenharmony_ci * 6062306a36Sopenharmony_ci * The notify_limit is used to delay notifications in the case where 6162306a36Sopenharmony_ci * flow control is enabled. Below the test is expressed in terms of 6262306a36Sopenharmony_ci * free space in the queue: if free_space > ConsumeSize - 6362306a36Sopenharmony_ci * write_notify_window then notify An alternate way of expressing this 6462306a36Sopenharmony_ci * is to rewrite the expression to use the data ready in the receive 6562306a36Sopenharmony_ci * queue: if write_notify_window > bufferReady then notify as 6662306a36Sopenharmony_ci * free_space == ConsumeSize - bufferReady. 6762306a36Sopenharmony_ci */ 6862306a36Sopenharmony_ci retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > 6962306a36Sopenharmony_ci notify_limit; 7062306a36Sopenharmony_ci#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL 7162306a36Sopenharmony_ci if (retval) { 7262306a36Sopenharmony_ci /* 7362306a36Sopenharmony_ci * Once we notify the peer, we reset the detected flag so the 7462306a36Sopenharmony_ci * next wait will again cause a decrease in the window size. 7562306a36Sopenharmony_ci */ 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci PKT_FIELD(vsk, peer_waiting_write_detected) = false; 7862306a36Sopenharmony_ci } 7962306a36Sopenharmony_ci#endif 8062306a36Sopenharmony_ci return retval; 8162306a36Sopenharmony_ci#else 8262306a36Sopenharmony_ci return true; 8362306a36Sopenharmony_ci#endif 8462306a36Sopenharmony_ci} 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_cistatic bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk) 8762306a36Sopenharmony_ci{ 8862306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 8962306a36Sopenharmony_ci if (!PKT_FIELD(vsk, peer_waiting_read)) 9062306a36Sopenharmony_ci return false; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci /* For now we ignore the wait information and just see if there is any 9362306a36Sopenharmony_ci * data for our peer to read. Note that improving this function to be 9462306a36Sopenharmony_ci * more intelligent will not require a protocol change and will retain 9562306a36Sopenharmony_ci * compatibility between endpoints with mixed versions of this 9662306a36Sopenharmony_ci * function. 9762306a36Sopenharmony_ci */ 9862306a36Sopenharmony_ci return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0; 9962306a36Sopenharmony_ci#else 10062306a36Sopenharmony_ci return true; 10162306a36Sopenharmony_ci#endif 10262306a36Sopenharmony_ci} 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_cistatic void 10562306a36Sopenharmony_civmci_transport_handle_waiting_read(struct sock *sk, 10662306a36Sopenharmony_ci struct vmci_transport_packet *pkt, 10762306a36Sopenharmony_ci bool bottom_half, 10862306a36Sopenharmony_ci struct sockaddr_vm *dst, 10962306a36Sopenharmony_ci struct sockaddr_vm *src) 11062306a36Sopenharmony_ci{ 11162306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 11262306a36Sopenharmony_ci struct vsock_sock *vsk; 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci vsk = vsock_sk(sk); 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci PKT_FIELD(vsk, peer_waiting_read) = true; 11762306a36Sopenharmony_ci memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait, 11862306a36Sopenharmony_ci sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci if (vmci_transport_notify_waiting_read(vsk)) { 12162306a36Sopenharmony_ci bool sent; 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci if (bottom_half) 12462306a36Sopenharmony_ci sent = vmci_transport_send_wrote_bh(dst, src) > 0; 12562306a36Sopenharmony_ci else 12662306a36Sopenharmony_ci sent = vmci_transport_send_wrote(sk) > 0; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci if (sent) 12962306a36Sopenharmony_ci PKT_FIELD(vsk, peer_waiting_read) = false; 13062306a36Sopenharmony_ci } 13162306a36Sopenharmony_ci#endif 13262306a36Sopenharmony_ci} 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_cistatic void 13562306a36Sopenharmony_civmci_transport_handle_waiting_write(struct sock *sk, 13662306a36Sopenharmony_ci struct vmci_transport_packet *pkt, 13762306a36Sopenharmony_ci bool bottom_half, 13862306a36Sopenharmony_ci struct sockaddr_vm *dst, 13962306a36Sopenharmony_ci struct sockaddr_vm *src) 14062306a36Sopenharmony_ci{ 14162306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 14262306a36Sopenharmony_ci struct vsock_sock *vsk; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci vsk = vsock_sk(sk); 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci PKT_FIELD(vsk, peer_waiting_write) = true; 14762306a36Sopenharmony_ci memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait, 14862306a36Sopenharmony_ci sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci if (vmci_transport_notify_waiting_write(vsk)) { 15162306a36Sopenharmony_ci bool sent; 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci if (bottom_half) 15462306a36Sopenharmony_ci sent = vmci_transport_send_read_bh(dst, src) > 0; 15562306a36Sopenharmony_ci else 15662306a36Sopenharmony_ci sent = vmci_transport_send_read(sk) > 0; 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci if (sent) 15962306a36Sopenharmony_ci PKT_FIELD(vsk, peer_waiting_write) = false; 16062306a36Sopenharmony_ci } 16162306a36Sopenharmony_ci#endif 16262306a36Sopenharmony_ci} 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_cistatic void 16562306a36Sopenharmony_civmci_transport_handle_read(struct sock *sk, 16662306a36Sopenharmony_ci struct vmci_transport_packet *pkt, 16762306a36Sopenharmony_ci bool bottom_half, 16862306a36Sopenharmony_ci struct sockaddr_vm *dst, struct sockaddr_vm *src) 16962306a36Sopenharmony_ci{ 17062306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 17162306a36Sopenharmony_ci struct vsock_sock *vsk; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci vsk = vsock_sk(sk); 17462306a36Sopenharmony_ci PKT_FIELD(vsk, sent_waiting_write) = false; 17562306a36Sopenharmony_ci#endif 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci sk->sk_write_space(sk); 17862306a36Sopenharmony_ci} 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_cistatic bool send_waiting_read(struct sock *sk, u64 room_needed) 18162306a36Sopenharmony_ci{ 18262306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 18362306a36Sopenharmony_ci struct vsock_sock *vsk; 18462306a36Sopenharmony_ci struct vmci_transport_waiting_info waiting_info; 18562306a36Sopenharmony_ci u64 tail; 18662306a36Sopenharmony_ci u64 head; 18762306a36Sopenharmony_ci u64 room_left; 18862306a36Sopenharmony_ci bool ret; 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci vsk = vsock_sk(sk); 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci if (PKT_FIELD(vsk, sent_waiting_read)) 19362306a36Sopenharmony_ci return true; 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci if (PKT_FIELD(vsk, write_notify_window) < 19662306a36Sopenharmony_ci vmci_trans(vsk)->consume_size) 19762306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_window) = 19862306a36Sopenharmony_ci min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, 19962306a36Sopenharmony_ci vmci_trans(vsk)->consume_size); 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head); 20262306a36Sopenharmony_ci room_left = vmci_trans(vsk)->consume_size - head; 20362306a36Sopenharmony_ci if (room_needed >= room_left) { 20462306a36Sopenharmony_ci waiting_info.offset = room_needed - room_left; 20562306a36Sopenharmony_ci waiting_info.generation = 20662306a36Sopenharmony_ci PKT_FIELD(vsk, consume_q_generation) + 1; 20762306a36Sopenharmony_ci } else { 20862306a36Sopenharmony_ci waiting_info.offset = head + room_needed; 20962306a36Sopenharmony_ci waiting_info.generation = PKT_FIELD(vsk, consume_q_generation); 21062306a36Sopenharmony_ci } 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0; 21362306a36Sopenharmony_ci if (ret) 21462306a36Sopenharmony_ci PKT_FIELD(vsk, sent_waiting_read) = true; 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci return ret; 21762306a36Sopenharmony_ci#else 21862306a36Sopenharmony_ci return true; 21962306a36Sopenharmony_ci#endif 22062306a36Sopenharmony_ci} 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_cistatic bool send_waiting_write(struct sock *sk, u64 room_needed) 22362306a36Sopenharmony_ci{ 22462306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 22562306a36Sopenharmony_ci struct vsock_sock *vsk; 22662306a36Sopenharmony_ci struct vmci_transport_waiting_info waiting_info; 22762306a36Sopenharmony_ci u64 tail; 22862306a36Sopenharmony_ci u64 head; 22962306a36Sopenharmony_ci u64 room_left; 23062306a36Sopenharmony_ci bool ret; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci vsk = vsock_sk(sk); 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci if (PKT_FIELD(vsk, sent_waiting_write)) 23562306a36Sopenharmony_ci return true; 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head); 23862306a36Sopenharmony_ci room_left = vmci_trans(vsk)->produce_size - tail; 23962306a36Sopenharmony_ci if (room_needed + 1 >= room_left) { 24062306a36Sopenharmony_ci /* Wraps around to current generation. */ 24162306a36Sopenharmony_ci waiting_info.offset = room_needed + 1 - room_left; 24262306a36Sopenharmony_ci waiting_info.generation = PKT_FIELD(vsk, produce_q_generation); 24362306a36Sopenharmony_ci } else { 24462306a36Sopenharmony_ci waiting_info.offset = tail + room_needed + 1; 24562306a36Sopenharmony_ci waiting_info.generation = 24662306a36Sopenharmony_ci PKT_FIELD(vsk, produce_q_generation) - 1; 24762306a36Sopenharmony_ci } 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0; 25062306a36Sopenharmony_ci if (ret) 25162306a36Sopenharmony_ci PKT_FIELD(vsk, sent_waiting_write) = true; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci return ret; 25462306a36Sopenharmony_ci#else 25562306a36Sopenharmony_ci return true; 25662306a36Sopenharmony_ci#endif 25762306a36Sopenharmony_ci} 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_cistatic int vmci_transport_send_read_notification(struct sock *sk) 26062306a36Sopenharmony_ci{ 26162306a36Sopenharmony_ci struct vsock_sock *vsk; 26262306a36Sopenharmony_ci bool sent_read; 26362306a36Sopenharmony_ci unsigned int retries; 26462306a36Sopenharmony_ci int err; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci vsk = vsock_sk(sk); 26762306a36Sopenharmony_ci sent_read = false; 26862306a36Sopenharmony_ci retries = 0; 26962306a36Sopenharmony_ci err = 0; 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci if (vmci_transport_notify_waiting_write(vsk)) { 27262306a36Sopenharmony_ci /* Notify the peer that we have read, retrying the send on 27362306a36Sopenharmony_ci * failure up to our maximum value. XXX For now we just log 27462306a36Sopenharmony_ci * the failure, but later we should schedule a work item to 27562306a36Sopenharmony_ci * handle the resend until it succeeds. That would require 27662306a36Sopenharmony_ci * keeping track of work items in the vsk and cleaning them up 27762306a36Sopenharmony_ci * upon socket close. 27862306a36Sopenharmony_ci */ 27962306a36Sopenharmony_ci while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && 28062306a36Sopenharmony_ci !sent_read && 28162306a36Sopenharmony_ci retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { 28262306a36Sopenharmony_ci err = vmci_transport_send_read(sk); 28362306a36Sopenharmony_ci if (err >= 0) 28462306a36Sopenharmony_ci sent_read = true; 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci retries++; 28762306a36Sopenharmony_ci } 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) 29062306a36Sopenharmony_ci pr_err("%p unable to send read notify to peer\n", sk); 29162306a36Sopenharmony_ci else 29262306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 29362306a36Sopenharmony_ci PKT_FIELD(vsk, peer_waiting_write) = false; 29462306a36Sopenharmony_ci#endif 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci } 29762306a36Sopenharmony_ci return err; 29862306a36Sopenharmony_ci} 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_cistatic void 30162306a36Sopenharmony_civmci_transport_handle_wrote(struct sock *sk, 30262306a36Sopenharmony_ci struct vmci_transport_packet *pkt, 30362306a36Sopenharmony_ci bool bottom_half, 30462306a36Sopenharmony_ci struct sockaddr_vm *dst, struct sockaddr_vm *src) 30562306a36Sopenharmony_ci{ 30662306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 30762306a36Sopenharmony_ci struct vsock_sock *vsk = vsock_sk(sk); 30862306a36Sopenharmony_ci PKT_FIELD(vsk, sent_waiting_read) = false; 30962306a36Sopenharmony_ci#endif 31062306a36Sopenharmony_ci vsock_data_ready(sk); 31162306a36Sopenharmony_ci} 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_cistatic void vmci_transport_notify_pkt_socket_init(struct sock *sk) 31462306a36Sopenharmony_ci{ 31562306a36Sopenharmony_ci struct vsock_sock *vsk = vsock_sk(sk); 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; 31862306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; 31962306a36Sopenharmony_ci PKT_FIELD(vsk, peer_waiting_read) = false; 32062306a36Sopenharmony_ci PKT_FIELD(vsk, peer_waiting_write) = false; 32162306a36Sopenharmony_ci PKT_FIELD(vsk, peer_waiting_write_detected) = false; 32262306a36Sopenharmony_ci PKT_FIELD(vsk, sent_waiting_read) = false; 32362306a36Sopenharmony_ci PKT_FIELD(vsk, sent_waiting_write) = false; 32462306a36Sopenharmony_ci PKT_FIELD(vsk, produce_q_generation) = 0; 32562306a36Sopenharmony_ci PKT_FIELD(vsk, consume_q_generation) = 0; 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0, 32862306a36Sopenharmony_ci sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); 32962306a36Sopenharmony_ci memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0, 33062306a36Sopenharmony_ci sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); 33162306a36Sopenharmony_ci} 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_cistatic void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) 33462306a36Sopenharmony_ci{ 33562306a36Sopenharmony_ci} 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_cistatic int 33862306a36Sopenharmony_civmci_transport_notify_pkt_poll_in(struct sock *sk, 33962306a36Sopenharmony_ci size_t target, bool *data_ready_now) 34062306a36Sopenharmony_ci{ 34162306a36Sopenharmony_ci struct vsock_sock *vsk = vsock_sk(sk); 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci if (vsock_stream_has_data(vsk) >= target) { 34462306a36Sopenharmony_ci *data_ready_now = true; 34562306a36Sopenharmony_ci } else { 34662306a36Sopenharmony_ci /* We can't read right now because there is not enough data 34762306a36Sopenharmony_ci * in the queue. Ask for notifications when there is something 34862306a36Sopenharmony_ci * to read. 34962306a36Sopenharmony_ci */ 35062306a36Sopenharmony_ci if (sk->sk_state == TCP_ESTABLISHED) { 35162306a36Sopenharmony_ci if (!send_waiting_read(sk, 1)) 35262306a36Sopenharmony_ci return -1; 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci } 35562306a36Sopenharmony_ci *data_ready_now = false; 35662306a36Sopenharmony_ci } 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci return 0; 35962306a36Sopenharmony_ci} 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_cistatic int 36262306a36Sopenharmony_civmci_transport_notify_pkt_poll_out(struct sock *sk, 36362306a36Sopenharmony_ci size_t target, bool *space_avail_now) 36462306a36Sopenharmony_ci{ 36562306a36Sopenharmony_ci s64 produce_q_free_space; 36662306a36Sopenharmony_ci struct vsock_sock *vsk = vsock_sk(sk); 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci produce_q_free_space = vsock_stream_has_space(vsk); 36962306a36Sopenharmony_ci if (produce_q_free_space > 0) { 37062306a36Sopenharmony_ci *space_avail_now = true; 37162306a36Sopenharmony_ci return 0; 37262306a36Sopenharmony_ci } else if (produce_q_free_space == 0) { 37362306a36Sopenharmony_ci /* This is a connected socket but we can't currently send data. 37462306a36Sopenharmony_ci * Notify the peer that we are waiting if the queue is full. We 37562306a36Sopenharmony_ci * only send a waiting write if the queue is full because 37662306a36Sopenharmony_ci * otherwise we end up in an infinite WAITING_WRITE, READ, 37762306a36Sopenharmony_ci * WAITING_WRITE, READ, etc. loop. Treat failing to send the 37862306a36Sopenharmony_ci * notification as a socket error, passing that back through 37962306a36Sopenharmony_ci * the mask. 38062306a36Sopenharmony_ci */ 38162306a36Sopenharmony_ci if (!send_waiting_write(sk, 1)) 38262306a36Sopenharmony_ci return -1; 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci *space_avail_now = false; 38562306a36Sopenharmony_ci } 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci return 0; 38862306a36Sopenharmony_ci} 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_cistatic int 39162306a36Sopenharmony_civmci_transport_notify_pkt_recv_init( 39262306a36Sopenharmony_ci struct sock *sk, 39362306a36Sopenharmony_ci size_t target, 39462306a36Sopenharmony_ci struct vmci_transport_recv_notify_data *data) 39562306a36Sopenharmony_ci{ 39662306a36Sopenharmony_ci struct vsock_sock *vsk = vsock_sk(sk); 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY 39962306a36Sopenharmony_ci data->consume_head = 0; 40062306a36Sopenharmony_ci data->produce_tail = 0; 40162306a36Sopenharmony_ci#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL 40262306a36Sopenharmony_ci data->notify_on_block = false; 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { 40562306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_min_window) = target + 1; 40662306a36Sopenharmony_ci if (PKT_FIELD(vsk, write_notify_window) < 40762306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_min_window)) { 40862306a36Sopenharmony_ci /* If the current window is smaller than the new 40962306a36Sopenharmony_ci * minimal window size, we need to reevaluate whether 41062306a36Sopenharmony_ci * we need to notify the sender. If the number of ready 41162306a36Sopenharmony_ci * bytes are smaller than the new window, we need to 41262306a36Sopenharmony_ci * send a notification to the sender before we block. 41362306a36Sopenharmony_ci */ 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_window) = 41662306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_min_window); 41762306a36Sopenharmony_ci data->notify_on_block = true; 41862306a36Sopenharmony_ci } 41962306a36Sopenharmony_ci } 42062306a36Sopenharmony_ci#endif 42162306a36Sopenharmony_ci#endif 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci return 0; 42462306a36Sopenharmony_ci} 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_cistatic int 42762306a36Sopenharmony_civmci_transport_notify_pkt_recv_pre_block( 42862306a36Sopenharmony_ci struct sock *sk, 42962306a36Sopenharmony_ci size_t target, 43062306a36Sopenharmony_ci struct vmci_transport_recv_notify_data *data) 43162306a36Sopenharmony_ci{ 43262306a36Sopenharmony_ci int err = 0; 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci /* Notify our peer that we are waiting for data to read. */ 43562306a36Sopenharmony_ci if (!send_waiting_read(sk, target)) { 43662306a36Sopenharmony_ci err = -EHOSTUNREACH; 43762306a36Sopenharmony_ci return err; 43862306a36Sopenharmony_ci } 43962306a36Sopenharmony_ci#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL 44062306a36Sopenharmony_ci if (data->notify_on_block) { 44162306a36Sopenharmony_ci err = vmci_transport_send_read_notification(sk); 44262306a36Sopenharmony_ci if (err < 0) 44362306a36Sopenharmony_ci return err; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci data->notify_on_block = false; 44662306a36Sopenharmony_ci } 44762306a36Sopenharmony_ci#endif 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci return err; 45062306a36Sopenharmony_ci} 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_cistatic int 45362306a36Sopenharmony_civmci_transport_notify_pkt_recv_pre_dequeue( 45462306a36Sopenharmony_ci struct sock *sk, 45562306a36Sopenharmony_ci size_t target, 45662306a36Sopenharmony_ci struct vmci_transport_recv_notify_data *data) 45762306a36Sopenharmony_ci{ 45862306a36Sopenharmony_ci struct vsock_sock *vsk = vsock_sk(sk); 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci /* Now consume up to len bytes from the queue. Note that since we have 46162306a36Sopenharmony_ci * the socket locked we should copy at least ready bytes. 46262306a36Sopenharmony_ci */ 46362306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 46462306a36Sopenharmony_ci vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, 46562306a36Sopenharmony_ci &data->produce_tail, 46662306a36Sopenharmony_ci &data->consume_head); 46762306a36Sopenharmony_ci#endif 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci return 0; 47062306a36Sopenharmony_ci} 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_cistatic int 47362306a36Sopenharmony_civmci_transport_notify_pkt_recv_post_dequeue( 47462306a36Sopenharmony_ci struct sock *sk, 47562306a36Sopenharmony_ci size_t target, 47662306a36Sopenharmony_ci ssize_t copied, 47762306a36Sopenharmony_ci bool data_read, 47862306a36Sopenharmony_ci struct vmci_transport_recv_notify_data *data) 47962306a36Sopenharmony_ci{ 48062306a36Sopenharmony_ci struct vsock_sock *vsk; 48162306a36Sopenharmony_ci int err; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci vsk = vsock_sk(sk); 48462306a36Sopenharmony_ci err = 0; 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci if (data_read) { 48762306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 48862306a36Sopenharmony_ci /* Detect a wrap-around to maintain queue generation. Note 48962306a36Sopenharmony_ci * that this is safe since we hold the socket lock across the 49062306a36Sopenharmony_ci * two queue pair operations. 49162306a36Sopenharmony_ci */ 49262306a36Sopenharmony_ci if (copied >= 49362306a36Sopenharmony_ci vmci_trans(vsk)->consume_size - data->consume_head) 49462306a36Sopenharmony_ci PKT_FIELD(vsk, consume_q_generation)++; 49562306a36Sopenharmony_ci#endif 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci err = vmci_transport_send_read_notification(sk); 49862306a36Sopenharmony_ci if (err < 0) 49962306a36Sopenharmony_ci return err; 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci } 50262306a36Sopenharmony_ci return err; 50362306a36Sopenharmony_ci} 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_cistatic int 50662306a36Sopenharmony_civmci_transport_notify_pkt_send_init( 50762306a36Sopenharmony_ci struct sock *sk, 50862306a36Sopenharmony_ci struct vmci_transport_send_notify_data *data) 50962306a36Sopenharmony_ci{ 51062306a36Sopenharmony_ci#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY 51162306a36Sopenharmony_ci data->consume_head = 0; 51262306a36Sopenharmony_ci data->produce_tail = 0; 51362306a36Sopenharmony_ci#endif 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci return 0; 51662306a36Sopenharmony_ci} 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_cistatic int 51962306a36Sopenharmony_civmci_transport_notify_pkt_send_pre_block( 52062306a36Sopenharmony_ci struct sock *sk, 52162306a36Sopenharmony_ci struct vmci_transport_send_notify_data *data) 52262306a36Sopenharmony_ci{ 52362306a36Sopenharmony_ci /* Notify our peer that we are waiting for room to write. */ 52462306a36Sopenharmony_ci if (!send_waiting_write(sk, 1)) 52562306a36Sopenharmony_ci return -EHOSTUNREACH; 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci return 0; 52862306a36Sopenharmony_ci} 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_cistatic int 53162306a36Sopenharmony_civmci_transport_notify_pkt_send_pre_enqueue( 53262306a36Sopenharmony_ci struct sock *sk, 53362306a36Sopenharmony_ci struct vmci_transport_send_notify_data *data) 53462306a36Sopenharmony_ci{ 53562306a36Sopenharmony_ci struct vsock_sock *vsk = vsock_sk(sk); 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 53862306a36Sopenharmony_ci vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, 53962306a36Sopenharmony_ci &data->produce_tail, 54062306a36Sopenharmony_ci &data->consume_head); 54162306a36Sopenharmony_ci#endif 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci return 0; 54462306a36Sopenharmony_ci} 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_cistatic int 54762306a36Sopenharmony_civmci_transport_notify_pkt_send_post_enqueue( 54862306a36Sopenharmony_ci struct sock *sk, 54962306a36Sopenharmony_ci ssize_t written, 55062306a36Sopenharmony_ci struct vmci_transport_send_notify_data *data) 55162306a36Sopenharmony_ci{ 55262306a36Sopenharmony_ci int err = 0; 55362306a36Sopenharmony_ci struct vsock_sock *vsk; 55462306a36Sopenharmony_ci bool sent_wrote = false; 55562306a36Sopenharmony_ci int retries = 0; 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci vsk = vsock_sk(sk); 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 56062306a36Sopenharmony_ci /* Detect a wrap-around to maintain queue generation. Note that this 56162306a36Sopenharmony_ci * is safe since we hold the socket lock across the two queue pair 56262306a36Sopenharmony_ci * operations. 56362306a36Sopenharmony_ci */ 56462306a36Sopenharmony_ci if (written >= vmci_trans(vsk)->produce_size - data->produce_tail) 56562306a36Sopenharmony_ci PKT_FIELD(vsk, produce_q_generation)++; 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci#endif 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci if (vmci_transport_notify_waiting_read(vsk)) { 57062306a36Sopenharmony_ci /* Notify the peer that we have written, retrying the send on 57162306a36Sopenharmony_ci * failure up to our maximum value. See the XXX comment for the 57262306a36Sopenharmony_ci * corresponding piece of code in StreamRecvmsg() for potential 57362306a36Sopenharmony_ci * improvements. 57462306a36Sopenharmony_ci */ 57562306a36Sopenharmony_ci while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && 57662306a36Sopenharmony_ci !sent_wrote && 57762306a36Sopenharmony_ci retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { 57862306a36Sopenharmony_ci err = vmci_transport_send_wrote(sk); 57962306a36Sopenharmony_ci if (err >= 0) 58062306a36Sopenharmony_ci sent_wrote = true; 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci retries++; 58362306a36Sopenharmony_ci } 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { 58662306a36Sopenharmony_ci pr_err("%p unable to send wrote notify to peer\n", sk); 58762306a36Sopenharmony_ci return err; 58862306a36Sopenharmony_ci } else { 58962306a36Sopenharmony_ci#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 59062306a36Sopenharmony_ci PKT_FIELD(vsk, peer_waiting_read) = false; 59162306a36Sopenharmony_ci#endif 59262306a36Sopenharmony_ci } 59362306a36Sopenharmony_ci } 59462306a36Sopenharmony_ci return err; 59562306a36Sopenharmony_ci} 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_cistatic void 59862306a36Sopenharmony_civmci_transport_notify_pkt_handle_pkt( 59962306a36Sopenharmony_ci struct sock *sk, 60062306a36Sopenharmony_ci struct vmci_transport_packet *pkt, 60162306a36Sopenharmony_ci bool bottom_half, 60262306a36Sopenharmony_ci struct sockaddr_vm *dst, 60362306a36Sopenharmony_ci struct sockaddr_vm *src, bool *pkt_processed) 60462306a36Sopenharmony_ci{ 60562306a36Sopenharmony_ci bool processed = false; 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci switch (pkt->type) { 60862306a36Sopenharmony_ci case VMCI_TRANSPORT_PACKET_TYPE_WROTE: 60962306a36Sopenharmony_ci vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); 61062306a36Sopenharmony_ci processed = true; 61162306a36Sopenharmony_ci break; 61262306a36Sopenharmony_ci case VMCI_TRANSPORT_PACKET_TYPE_READ: 61362306a36Sopenharmony_ci vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); 61462306a36Sopenharmony_ci processed = true; 61562306a36Sopenharmony_ci break; 61662306a36Sopenharmony_ci case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: 61762306a36Sopenharmony_ci vmci_transport_handle_waiting_write(sk, pkt, bottom_half, 61862306a36Sopenharmony_ci dst, src); 61962306a36Sopenharmony_ci processed = true; 62062306a36Sopenharmony_ci break; 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_ci case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: 62362306a36Sopenharmony_ci vmci_transport_handle_waiting_read(sk, pkt, bottom_half, 62462306a36Sopenharmony_ci dst, src); 62562306a36Sopenharmony_ci processed = true; 62662306a36Sopenharmony_ci break; 62762306a36Sopenharmony_ci } 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci if (pkt_processed) 63062306a36Sopenharmony_ci *pkt_processed = processed; 63162306a36Sopenharmony_ci} 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_cistatic void vmci_transport_notify_pkt_process_request(struct sock *sk) 63462306a36Sopenharmony_ci{ 63562306a36Sopenharmony_ci struct vsock_sock *vsk = vsock_sk(sk); 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; 63862306a36Sopenharmony_ci if (vmci_trans(vsk)->consume_size < 63962306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_min_window)) 64062306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_min_window) = 64162306a36Sopenharmony_ci vmci_trans(vsk)->consume_size; 64262306a36Sopenharmony_ci} 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_cistatic void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) 64562306a36Sopenharmony_ci{ 64662306a36Sopenharmony_ci struct vsock_sock *vsk = vsock_sk(sk); 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; 64962306a36Sopenharmony_ci if (vmci_trans(vsk)->consume_size < 65062306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_min_window)) 65162306a36Sopenharmony_ci PKT_FIELD(vsk, write_notify_min_window) = 65262306a36Sopenharmony_ci vmci_trans(vsk)->consume_size; 65362306a36Sopenharmony_ci} 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci/* Socket control packet based operations. */ 65662306a36Sopenharmony_ciconst struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { 65762306a36Sopenharmony_ci .socket_init = vmci_transport_notify_pkt_socket_init, 65862306a36Sopenharmony_ci .socket_destruct = vmci_transport_notify_pkt_socket_destruct, 65962306a36Sopenharmony_ci .poll_in = vmci_transport_notify_pkt_poll_in, 66062306a36Sopenharmony_ci .poll_out = vmci_transport_notify_pkt_poll_out, 66162306a36Sopenharmony_ci .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt, 66262306a36Sopenharmony_ci .recv_init = vmci_transport_notify_pkt_recv_init, 66362306a36Sopenharmony_ci .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block, 66462306a36Sopenharmony_ci .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue, 66562306a36Sopenharmony_ci .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue, 66662306a36Sopenharmony_ci .send_init = vmci_transport_notify_pkt_send_init, 66762306a36Sopenharmony_ci .send_pre_block = vmci_transport_notify_pkt_send_pre_block, 66862306a36Sopenharmony_ci .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue, 66962306a36Sopenharmony_ci .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue, 67062306a36Sopenharmony_ci .process_request = vmci_transport_notify_pkt_process_request, 67162306a36Sopenharmony_ci .process_negotiate = vmci_transport_notify_pkt_process_negotiate, 67262306a36Sopenharmony_ci}; 673