162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * This software is available to you under a choice of one of two 562306a36Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 662306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 762306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the 862306a36Sopenharmony_ci * OpenIB.org BSD license below: 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or 1162306a36Sopenharmony_ci * without modification, are permitted provided that the following 1262306a36Sopenharmony_ci * conditions are met: 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * - Redistributions of source code must retain the above 1562306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 1662306a36Sopenharmony_ci * disclaimer. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * - Redistributions in binary form must reproduce the above 1962306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 2062306a36Sopenharmony_ci * disclaimer in the documentation and/or other materials 2162306a36Sopenharmony_ci * provided with the distribution. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 2462306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2562306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 2662306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 2762306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 2862306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 2962306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 3062306a36Sopenharmony_ci * SOFTWARE. 3162306a36Sopenharmony_ci * 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci#include <linux/kernel.h> 3462306a36Sopenharmony_ci#include <linux/slab.h> 3562306a36Sopenharmony_ci#include <net/sock.h> 3662306a36Sopenharmony_ci#include <linux/in.h> 3762306a36Sopenharmony_ci#include <linux/export.h> 3862306a36Sopenharmony_ci#include <linux/sched/clock.h> 3962306a36Sopenharmony_ci#include <linux/time.h> 4062306a36Sopenharmony_ci#include <linux/rds.h> 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci#include "rds.h" 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_civoid rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, 4562306a36Sopenharmony_ci struct in6_addr *saddr) 4662306a36Sopenharmony_ci{ 4762306a36Sopenharmony_ci refcount_set(&inc->i_refcount, 1); 4862306a36Sopenharmony_ci INIT_LIST_HEAD(&inc->i_item); 4962306a36Sopenharmony_ci inc->i_conn = conn; 5062306a36Sopenharmony_ci inc->i_saddr = *saddr; 5162306a36Sopenharmony_ci inc->i_usercopy.rdma_cookie = 0; 5262306a36Sopenharmony_ci inc->i_usercopy.rx_tstamp = ktime_set(0, 0); 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci memset(inc->i_rx_lat_trace, 0, sizeof(inc->i_rx_lat_trace)); 5562306a36Sopenharmony_ci} 5662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_inc_init); 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_civoid rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp, 5962306a36Sopenharmony_ci struct in6_addr *saddr) 6062306a36Sopenharmony_ci{ 6162306a36Sopenharmony_ci refcount_set(&inc->i_refcount, 1); 6262306a36Sopenharmony_ci INIT_LIST_HEAD(&inc->i_item); 6362306a36Sopenharmony_ci inc->i_conn = cp->cp_conn; 6462306a36Sopenharmony_ci inc->i_conn_path = cp; 6562306a36Sopenharmony_ci inc->i_saddr = *saddr; 6662306a36Sopenharmony_ci inc->i_usercopy.rdma_cookie = 0; 6762306a36Sopenharmony_ci inc->i_usercopy.rx_tstamp = ktime_set(0, 0); 6862306a36Sopenharmony_ci} 6962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_inc_path_init); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_cistatic void rds_inc_addref(struct rds_incoming *inc) 7262306a36Sopenharmony_ci{ 7362306a36Sopenharmony_ci rdsdebug("addref inc %p ref %d\n", inc, refcount_read(&inc->i_refcount)); 7462306a36Sopenharmony_ci refcount_inc(&inc->i_refcount); 7562306a36Sopenharmony_ci} 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_civoid rds_inc_put(struct rds_incoming *inc) 7862306a36Sopenharmony_ci{ 7962306a36Sopenharmony_ci rdsdebug("put inc %p ref %d\n", inc, refcount_read(&inc->i_refcount)); 8062306a36Sopenharmony_ci if (refcount_dec_and_test(&inc->i_refcount)) { 8162306a36Sopenharmony_ci BUG_ON(!list_empty(&inc->i_item)); 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci inc->i_conn->c_trans->inc_free(inc); 8462306a36Sopenharmony_ci } 8562306a36Sopenharmony_ci} 8662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_inc_put); 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_cistatic void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk, 8962306a36Sopenharmony_ci struct rds_cong_map *map, 9062306a36Sopenharmony_ci int delta, __be16 port) 9162306a36Sopenharmony_ci{ 9262306a36Sopenharmony_ci int now_congested; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci if (delta == 0) 9562306a36Sopenharmony_ci return; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci rs->rs_rcv_bytes += delta; 9862306a36Sopenharmony_ci if (delta > 0) 9962306a36Sopenharmony_ci rds_stats_add(s_recv_bytes_added_to_socket, delta); 10062306a36Sopenharmony_ci else 10162306a36Sopenharmony_ci rds_stats_add(s_recv_bytes_removed_from_socket, -delta); 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci /* loop transport doesn't send/recv congestion updates */ 10462306a36Sopenharmony_ci if (rs->rs_transport->t_type == RDS_TRANS_LOOP) 10562306a36Sopenharmony_ci return; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs); 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci rdsdebug("rs %p (%pI6c:%u) recv bytes %d buf %d " 11062306a36Sopenharmony_ci "now_cong %d delta %d\n", 11162306a36Sopenharmony_ci rs, &rs->rs_bound_addr, 11262306a36Sopenharmony_ci ntohs(rs->rs_bound_port), rs->rs_rcv_bytes, 11362306a36Sopenharmony_ci rds_sk_rcvbuf(rs), now_congested, delta); 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci /* wasn't -> am congested */ 11662306a36Sopenharmony_ci if (!rs->rs_congested && now_congested) { 11762306a36Sopenharmony_ci rs->rs_congested = 1; 11862306a36Sopenharmony_ci rds_cong_set_bit(map, port); 11962306a36Sopenharmony_ci rds_cong_queue_updates(map); 12062306a36Sopenharmony_ci } 12162306a36Sopenharmony_ci /* was -> aren't congested */ 12262306a36Sopenharmony_ci /* Require more free space before reporting uncongested to prevent 12362306a36Sopenharmony_ci bouncing cong/uncong state too often */ 12462306a36Sopenharmony_ci else if (rs->rs_congested && (rs->rs_rcv_bytes < (rds_sk_rcvbuf(rs)/2))) { 12562306a36Sopenharmony_ci rs->rs_congested = 0; 12662306a36Sopenharmony_ci rds_cong_clear_bit(map, port); 12762306a36Sopenharmony_ci rds_cong_queue_updates(map); 12862306a36Sopenharmony_ci } 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci /* do nothing if no change in cong state */ 13162306a36Sopenharmony_ci} 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_cistatic void rds_conn_peer_gen_update(struct rds_connection *conn, 13462306a36Sopenharmony_ci u32 peer_gen_num) 13562306a36Sopenharmony_ci{ 13662306a36Sopenharmony_ci int i; 13762306a36Sopenharmony_ci struct rds_message *rm, *tmp; 13862306a36Sopenharmony_ci unsigned long flags; 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci WARN_ON(conn->c_trans->t_type != RDS_TRANS_TCP); 14162306a36Sopenharmony_ci if (peer_gen_num != 0) { 14262306a36Sopenharmony_ci if (conn->c_peer_gen_num != 0 && 14362306a36Sopenharmony_ci peer_gen_num != conn->c_peer_gen_num) { 14462306a36Sopenharmony_ci for (i = 0; i < RDS_MPATH_WORKERS; i++) { 14562306a36Sopenharmony_ci struct rds_conn_path *cp; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci cp = &conn->c_path[i]; 14862306a36Sopenharmony_ci spin_lock_irqsave(&cp->cp_lock, flags); 14962306a36Sopenharmony_ci cp->cp_next_tx_seq = 1; 15062306a36Sopenharmony_ci cp->cp_next_rx_seq = 0; 15162306a36Sopenharmony_ci list_for_each_entry_safe(rm, tmp, 15262306a36Sopenharmony_ci &cp->cp_retrans, 15362306a36Sopenharmony_ci m_conn_item) { 15462306a36Sopenharmony_ci set_bit(RDS_MSG_FLUSH, &rm->m_flags); 15562306a36Sopenharmony_ci } 15662306a36Sopenharmony_ci spin_unlock_irqrestore(&cp->cp_lock, flags); 15762306a36Sopenharmony_ci } 15862306a36Sopenharmony_ci } 15962306a36Sopenharmony_ci conn->c_peer_gen_num = peer_gen_num; 16062306a36Sopenharmony_ci } 16162306a36Sopenharmony_ci} 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci/* 16462306a36Sopenharmony_ci * Process all extension headers that come with this message. 16562306a36Sopenharmony_ci */ 16662306a36Sopenharmony_cistatic void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock *rs) 16762306a36Sopenharmony_ci{ 16862306a36Sopenharmony_ci struct rds_header *hdr = &inc->i_hdr; 16962306a36Sopenharmony_ci unsigned int pos = 0, type, len; 17062306a36Sopenharmony_ci union { 17162306a36Sopenharmony_ci struct rds_ext_header_version version; 17262306a36Sopenharmony_ci struct rds_ext_header_rdma rdma; 17362306a36Sopenharmony_ci struct rds_ext_header_rdma_dest rdma_dest; 17462306a36Sopenharmony_ci } buffer; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci while (1) { 17762306a36Sopenharmony_ci len = sizeof(buffer); 17862306a36Sopenharmony_ci type = rds_message_next_extension(hdr, &pos, &buffer, &len); 17962306a36Sopenharmony_ci if (type == RDS_EXTHDR_NONE) 18062306a36Sopenharmony_ci break; 18162306a36Sopenharmony_ci /* Process extension header here */ 18262306a36Sopenharmony_ci switch (type) { 18362306a36Sopenharmony_ci case RDS_EXTHDR_RDMA: 18462306a36Sopenharmony_ci rds_rdma_unuse(rs, be32_to_cpu(buffer.rdma.h_rdma_rkey), 0); 18562306a36Sopenharmony_ci break; 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci case RDS_EXTHDR_RDMA_DEST: 18862306a36Sopenharmony_ci /* We ignore the size for now. We could stash it 18962306a36Sopenharmony_ci * somewhere and use it for error checking. */ 19062306a36Sopenharmony_ci inc->i_usercopy.rdma_cookie = rds_rdma_make_cookie( 19162306a36Sopenharmony_ci be32_to_cpu(buffer.rdma_dest.h_rdma_rkey), 19262306a36Sopenharmony_ci be32_to_cpu(buffer.rdma_dest.h_rdma_offset)); 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci break; 19562306a36Sopenharmony_ci } 19662306a36Sopenharmony_ci } 19762306a36Sopenharmony_ci} 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_cistatic void rds_recv_hs_exthdrs(struct rds_header *hdr, 20062306a36Sopenharmony_ci struct rds_connection *conn) 20162306a36Sopenharmony_ci{ 20262306a36Sopenharmony_ci unsigned int pos = 0, type, len; 20362306a36Sopenharmony_ci union { 20462306a36Sopenharmony_ci struct rds_ext_header_version version; 20562306a36Sopenharmony_ci u16 rds_npaths; 20662306a36Sopenharmony_ci u32 rds_gen_num; 20762306a36Sopenharmony_ci } buffer; 20862306a36Sopenharmony_ci u32 new_peer_gen_num = 0; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci while (1) { 21162306a36Sopenharmony_ci len = sizeof(buffer); 21262306a36Sopenharmony_ci type = rds_message_next_extension(hdr, &pos, &buffer, &len); 21362306a36Sopenharmony_ci if (type == RDS_EXTHDR_NONE) 21462306a36Sopenharmony_ci break; 21562306a36Sopenharmony_ci /* Process extension header here */ 21662306a36Sopenharmony_ci switch (type) { 21762306a36Sopenharmony_ci case RDS_EXTHDR_NPATHS: 21862306a36Sopenharmony_ci conn->c_npaths = min_t(int, RDS_MPATH_WORKERS, 21962306a36Sopenharmony_ci be16_to_cpu(buffer.rds_npaths)); 22062306a36Sopenharmony_ci break; 22162306a36Sopenharmony_ci case RDS_EXTHDR_GEN_NUM: 22262306a36Sopenharmony_ci new_peer_gen_num = be32_to_cpu(buffer.rds_gen_num); 22362306a36Sopenharmony_ci break; 22462306a36Sopenharmony_ci default: 22562306a36Sopenharmony_ci pr_warn_ratelimited("ignoring unknown exthdr type " 22662306a36Sopenharmony_ci "0x%x\n", type); 22762306a36Sopenharmony_ci } 22862306a36Sopenharmony_ci } 22962306a36Sopenharmony_ci /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */ 23062306a36Sopenharmony_ci conn->c_npaths = max_t(int, conn->c_npaths, 1); 23162306a36Sopenharmony_ci conn->c_ping_triggered = 0; 23262306a36Sopenharmony_ci rds_conn_peer_gen_update(conn, new_peer_gen_num); 23362306a36Sopenharmony_ci} 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci/* rds_start_mprds() will synchronously start multiple paths when appropriate. 23662306a36Sopenharmony_ci * The scheme is based on the following rules: 23762306a36Sopenharmony_ci * 23862306a36Sopenharmony_ci * 1. rds_sendmsg on first connect attempt sends the probe ping, with the 23962306a36Sopenharmony_ci * sender's npaths (s_npaths) 24062306a36Sopenharmony_ci * 2. rcvr of probe-ping knows the mprds_paths = min(s_npaths, r_npaths). It 24162306a36Sopenharmony_ci * sends back a probe-pong with r_npaths. After that, if rcvr is the 24262306a36Sopenharmony_ci * smaller ip addr, it starts rds_conn_path_connect_if_down on all 24362306a36Sopenharmony_ci * mprds_paths. 24462306a36Sopenharmony_ci * 3. sender gets woken up, and can move to rds_conn_path_connect_if_down. 24562306a36Sopenharmony_ci * If it is the smaller ipaddr, rds_conn_path_connect_if_down can be 24662306a36Sopenharmony_ci * called after reception of the probe-pong on all mprds_paths. 24762306a36Sopenharmony_ci * Otherwise (sender of probe-ping is not the smaller ip addr): just call 24862306a36Sopenharmony_ci * rds_conn_path_connect_if_down on the hashed path. (see rule 4) 24962306a36Sopenharmony_ci * 4. rds_connect_worker must only trigger a connection if laddr < faddr. 25062306a36Sopenharmony_ci * 5. sender may end up queuing the packet on the cp. will get sent out later. 25162306a36Sopenharmony_ci * when connection is completed. 25262306a36Sopenharmony_ci */ 25362306a36Sopenharmony_cistatic void rds_start_mprds(struct rds_connection *conn) 25462306a36Sopenharmony_ci{ 25562306a36Sopenharmony_ci int i; 25662306a36Sopenharmony_ci struct rds_conn_path *cp; 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci if (conn->c_npaths > 1 && 25962306a36Sopenharmony_ci rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) < 0) { 26062306a36Sopenharmony_ci for (i = 0; i < conn->c_npaths; i++) { 26162306a36Sopenharmony_ci cp = &conn->c_path[i]; 26262306a36Sopenharmony_ci rds_conn_path_connect_if_down(cp); 26362306a36Sopenharmony_ci } 26462306a36Sopenharmony_ci } 26562306a36Sopenharmony_ci} 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci/* 26862306a36Sopenharmony_ci * The transport must make sure that this is serialized against other 26962306a36Sopenharmony_ci * rx and conn reset on this specific conn. 27062306a36Sopenharmony_ci * 27162306a36Sopenharmony_ci * We currently assert that only one fragmented message will be sent 27262306a36Sopenharmony_ci * down a connection at a time. This lets us reassemble in the conn 27362306a36Sopenharmony_ci * instead of per-flow which means that we don't have to go digging through 27462306a36Sopenharmony_ci * flows to tear down partial reassembly progress on conn failure and 27562306a36Sopenharmony_ci * we save flow lookup and locking for each frag arrival. It does mean 27662306a36Sopenharmony_ci * that small messages will wait behind large ones. Fragmenting at all 27762306a36Sopenharmony_ci * is only to reduce the memory consumption of pre-posted buffers. 27862306a36Sopenharmony_ci * 27962306a36Sopenharmony_ci * The caller passes in saddr and daddr instead of us getting it from the 28062306a36Sopenharmony_ci * conn. This lets loopback, who only has one conn for both directions, 28162306a36Sopenharmony_ci * tell us which roles the addrs in the conn are playing for this message. 28262306a36Sopenharmony_ci */ 28362306a36Sopenharmony_civoid rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr, 28462306a36Sopenharmony_ci struct in6_addr *daddr, 28562306a36Sopenharmony_ci struct rds_incoming *inc, gfp_t gfp) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci struct rds_sock *rs = NULL; 28862306a36Sopenharmony_ci struct sock *sk; 28962306a36Sopenharmony_ci unsigned long flags; 29062306a36Sopenharmony_ci struct rds_conn_path *cp; 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci inc->i_conn = conn; 29362306a36Sopenharmony_ci inc->i_rx_jiffies = jiffies; 29462306a36Sopenharmony_ci if (conn->c_trans->t_mp_capable) 29562306a36Sopenharmony_ci cp = inc->i_conn_path; 29662306a36Sopenharmony_ci else 29762306a36Sopenharmony_ci cp = &conn->c_path[0]; 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci rdsdebug("conn %p next %llu inc %p seq %llu len %u sport %u dport %u " 30062306a36Sopenharmony_ci "flags 0x%x rx_jiffies %lu\n", conn, 30162306a36Sopenharmony_ci (unsigned long long)cp->cp_next_rx_seq, 30262306a36Sopenharmony_ci inc, 30362306a36Sopenharmony_ci (unsigned long long)be64_to_cpu(inc->i_hdr.h_sequence), 30462306a36Sopenharmony_ci be32_to_cpu(inc->i_hdr.h_len), 30562306a36Sopenharmony_ci be16_to_cpu(inc->i_hdr.h_sport), 30662306a36Sopenharmony_ci be16_to_cpu(inc->i_hdr.h_dport), 30762306a36Sopenharmony_ci inc->i_hdr.h_flags, 30862306a36Sopenharmony_ci inc->i_rx_jiffies); 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci /* 31162306a36Sopenharmony_ci * Sequence numbers should only increase. Messages get their 31262306a36Sopenharmony_ci * sequence number as they're queued in a sending conn. They 31362306a36Sopenharmony_ci * can be dropped, though, if the sending socket is closed before 31462306a36Sopenharmony_ci * they hit the wire. So sequence numbers can skip forward 31562306a36Sopenharmony_ci * under normal operation. They can also drop back in the conn 31662306a36Sopenharmony_ci * failover case as previously sent messages are resent down the 31762306a36Sopenharmony_ci * new instance of a conn. We drop those, otherwise we have 31862306a36Sopenharmony_ci * to assume that the next valid seq does not come after a 31962306a36Sopenharmony_ci * hole in the fragment stream. 32062306a36Sopenharmony_ci * 32162306a36Sopenharmony_ci * The headers don't give us a way to realize if fragments of 32262306a36Sopenharmony_ci * a message have been dropped. We assume that frags that arrive 32362306a36Sopenharmony_ci * to a flow are part of the current message on the flow that is 32462306a36Sopenharmony_ci * being reassembled. This means that senders can't drop messages 32562306a36Sopenharmony_ci * from the sending conn until all their frags are sent. 32662306a36Sopenharmony_ci * 32762306a36Sopenharmony_ci * XXX we could spend more on the wire to get more robust failure 32862306a36Sopenharmony_ci * detection, arguably worth it to avoid data corruption. 32962306a36Sopenharmony_ci */ 33062306a36Sopenharmony_ci if (be64_to_cpu(inc->i_hdr.h_sequence) < cp->cp_next_rx_seq && 33162306a36Sopenharmony_ci (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) { 33262306a36Sopenharmony_ci rds_stats_inc(s_recv_drop_old_seq); 33362306a36Sopenharmony_ci goto out; 33462306a36Sopenharmony_ci } 33562306a36Sopenharmony_ci cp->cp_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) { 33862306a36Sopenharmony_ci if (inc->i_hdr.h_sport == 0) { 33962306a36Sopenharmony_ci rdsdebug("ignore ping with 0 sport from %pI6c\n", 34062306a36Sopenharmony_ci saddr); 34162306a36Sopenharmony_ci goto out; 34262306a36Sopenharmony_ci } 34362306a36Sopenharmony_ci rds_stats_inc(s_recv_ping); 34462306a36Sopenharmony_ci rds_send_pong(cp, inc->i_hdr.h_sport); 34562306a36Sopenharmony_ci /* if this is a handshake ping, start multipath if necessary */ 34662306a36Sopenharmony_ci if (RDS_HS_PROBE(be16_to_cpu(inc->i_hdr.h_sport), 34762306a36Sopenharmony_ci be16_to_cpu(inc->i_hdr.h_dport))) { 34862306a36Sopenharmony_ci rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn); 34962306a36Sopenharmony_ci rds_start_mprds(cp->cp_conn); 35062306a36Sopenharmony_ci } 35162306a36Sopenharmony_ci goto out; 35262306a36Sopenharmony_ci } 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci if (be16_to_cpu(inc->i_hdr.h_dport) == RDS_FLAG_PROBE_PORT && 35562306a36Sopenharmony_ci inc->i_hdr.h_sport == 0) { 35662306a36Sopenharmony_ci rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn); 35762306a36Sopenharmony_ci /* if this is a handshake pong, start multipath if necessary */ 35862306a36Sopenharmony_ci rds_start_mprds(cp->cp_conn); 35962306a36Sopenharmony_ci wake_up(&cp->cp_conn->c_hs_waitq); 36062306a36Sopenharmony_ci goto out; 36162306a36Sopenharmony_ci } 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_bound_if); 36462306a36Sopenharmony_ci if (!rs) { 36562306a36Sopenharmony_ci rds_stats_inc(s_recv_drop_no_sock); 36662306a36Sopenharmony_ci goto out; 36762306a36Sopenharmony_ci } 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci /* Process extension headers */ 37062306a36Sopenharmony_ci rds_recv_incoming_exthdrs(inc, rs); 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci /* We can be racing with rds_release() which marks the socket dead. */ 37362306a36Sopenharmony_ci sk = rds_rs_to_sk(rs); 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci /* serialize with rds_release -> sock_orphan */ 37662306a36Sopenharmony_ci write_lock_irqsave(&rs->rs_recv_lock, flags); 37762306a36Sopenharmony_ci if (!sock_flag(sk, SOCK_DEAD)) { 37862306a36Sopenharmony_ci rdsdebug("adding inc %p to rs %p's recv queue\n", inc, rs); 37962306a36Sopenharmony_ci rds_stats_inc(s_recv_queued); 38062306a36Sopenharmony_ci rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, 38162306a36Sopenharmony_ci be32_to_cpu(inc->i_hdr.h_len), 38262306a36Sopenharmony_ci inc->i_hdr.h_dport); 38362306a36Sopenharmony_ci if (sock_flag(sk, SOCK_RCVTSTAMP)) 38462306a36Sopenharmony_ci inc->i_usercopy.rx_tstamp = ktime_get_real(); 38562306a36Sopenharmony_ci rds_inc_addref(inc); 38662306a36Sopenharmony_ci inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock(); 38762306a36Sopenharmony_ci list_add_tail(&inc->i_item, &rs->rs_recv_queue); 38862306a36Sopenharmony_ci __rds_wake_sk_sleep(sk); 38962306a36Sopenharmony_ci } else { 39062306a36Sopenharmony_ci rds_stats_inc(s_recv_drop_dead_sock); 39162306a36Sopenharmony_ci } 39262306a36Sopenharmony_ci write_unlock_irqrestore(&rs->rs_recv_lock, flags); 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ciout: 39562306a36Sopenharmony_ci if (rs) 39662306a36Sopenharmony_ci rds_sock_put(rs); 39762306a36Sopenharmony_ci} 39862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_recv_incoming); 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci/* 40162306a36Sopenharmony_ci * be very careful here. This is being called as the condition in 40262306a36Sopenharmony_ci * wait_event_*() needs to cope with being called many times. 40362306a36Sopenharmony_ci */ 40462306a36Sopenharmony_cistatic int rds_next_incoming(struct rds_sock *rs, struct rds_incoming **inc) 40562306a36Sopenharmony_ci{ 40662306a36Sopenharmony_ci unsigned long flags; 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci if (!*inc) { 40962306a36Sopenharmony_ci read_lock_irqsave(&rs->rs_recv_lock, flags); 41062306a36Sopenharmony_ci if (!list_empty(&rs->rs_recv_queue)) { 41162306a36Sopenharmony_ci *inc = list_entry(rs->rs_recv_queue.next, 41262306a36Sopenharmony_ci struct rds_incoming, 41362306a36Sopenharmony_ci i_item); 41462306a36Sopenharmony_ci rds_inc_addref(*inc); 41562306a36Sopenharmony_ci } 41662306a36Sopenharmony_ci read_unlock_irqrestore(&rs->rs_recv_lock, flags); 41762306a36Sopenharmony_ci } 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci return *inc != NULL; 42062306a36Sopenharmony_ci} 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_cistatic int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, 42362306a36Sopenharmony_ci int drop) 42462306a36Sopenharmony_ci{ 42562306a36Sopenharmony_ci struct sock *sk = rds_rs_to_sk(rs); 42662306a36Sopenharmony_ci int ret = 0; 42762306a36Sopenharmony_ci unsigned long flags; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci write_lock_irqsave(&rs->rs_recv_lock, flags); 43062306a36Sopenharmony_ci if (!list_empty(&inc->i_item)) { 43162306a36Sopenharmony_ci ret = 1; 43262306a36Sopenharmony_ci if (drop) { 43362306a36Sopenharmony_ci /* XXX make sure this i_conn is reliable */ 43462306a36Sopenharmony_ci rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, 43562306a36Sopenharmony_ci -be32_to_cpu(inc->i_hdr.h_len), 43662306a36Sopenharmony_ci inc->i_hdr.h_dport); 43762306a36Sopenharmony_ci list_del_init(&inc->i_item); 43862306a36Sopenharmony_ci rds_inc_put(inc); 43962306a36Sopenharmony_ci } 44062306a36Sopenharmony_ci } 44162306a36Sopenharmony_ci write_unlock_irqrestore(&rs->rs_recv_lock, flags); 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop); 44462306a36Sopenharmony_ci return ret; 44562306a36Sopenharmony_ci} 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci/* 44862306a36Sopenharmony_ci * Pull errors off the error queue. 44962306a36Sopenharmony_ci * If msghdr is NULL, we will just purge the error queue. 45062306a36Sopenharmony_ci */ 45162306a36Sopenharmony_ciint rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) 45262306a36Sopenharmony_ci{ 45362306a36Sopenharmony_ci struct rds_notifier *notifier; 45462306a36Sopenharmony_ci struct rds_rdma_notify cmsg; 45562306a36Sopenharmony_ci unsigned int count = 0, max_messages = ~0U; 45662306a36Sopenharmony_ci unsigned long flags; 45762306a36Sopenharmony_ci LIST_HEAD(copy); 45862306a36Sopenharmony_ci int err = 0; 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci memset(&cmsg, 0, sizeof(cmsg)); /* fill holes with zero */ 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci /* put_cmsg copies to user space and thus may sleep. We can't do this 46362306a36Sopenharmony_ci * with rs_lock held, so first grab as many notifications as we can stuff 46462306a36Sopenharmony_ci * in the user provided cmsg buffer. We don't try to copy more, to avoid 46562306a36Sopenharmony_ci * losing notifications - except when the buffer is so small that it wouldn't 46662306a36Sopenharmony_ci * even hold a single notification. Then we give him as much of this single 46762306a36Sopenharmony_ci * msg as we can squeeze in, and set MSG_CTRUNC. 46862306a36Sopenharmony_ci */ 46962306a36Sopenharmony_ci if (msghdr) { 47062306a36Sopenharmony_ci max_messages = msghdr->msg_controllen / CMSG_SPACE(sizeof(cmsg)); 47162306a36Sopenharmony_ci if (!max_messages) 47262306a36Sopenharmony_ci max_messages = 1; 47362306a36Sopenharmony_ci } 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci spin_lock_irqsave(&rs->rs_lock, flags); 47662306a36Sopenharmony_ci while (!list_empty(&rs->rs_notify_queue) && count < max_messages) { 47762306a36Sopenharmony_ci notifier = list_entry(rs->rs_notify_queue.next, 47862306a36Sopenharmony_ci struct rds_notifier, n_list); 47962306a36Sopenharmony_ci list_move(¬ifier->n_list, ©); 48062306a36Sopenharmony_ci count++; 48162306a36Sopenharmony_ci } 48262306a36Sopenharmony_ci spin_unlock_irqrestore(&rs->rs_lock, flags); 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci if (!count) 48562306a36Sopenharmony_ci return 0; 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci while (!list_empty(©)) { 48862306a36Sopenharmony_ci notifier = list_entry(copy.next, struct rds_notifier, n_list); 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci if (msghdr) { 49162306a36Sopenharmony_ci cmsg.user_token = notifier->n_user_token; 49262306a36Sopenharmony_ci cmsg.status = notifier->n_status; 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS, 49562306a36Sopenharmony_ci sizeof(cmsg), &cmsg); 49662306a36Sopenharmony_ci if (err) 49762306a36Sopenharmony_ci break; 49862306a36Sopenharmony_ci } 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci list_del_init(¬ifier->n_list); 50162306a36Sopenharmony_ci kfree(notifier); 50262306a36Sopenharmony_ci } 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci /* If we bailed out because of an error in put_cmsg, 50562306a36Sopenharmony_ci * we may be left with one or more notifications that we 50662306a36Sopenharmony_ci * didn't process. Return them to the head of the list. */ 50762306a36Sopenharmony_ci if (!list_empty(©)) { 50862306a36Sopenharmony_ci spin_lock_irqsave(&rs->rs_lock, flags); 50962306a36Sopenharmony_ci list_splice(©, &rs->rs_notify_queue); 51062306a36Sopenharmony_ci spin_unlock_irqrestore(&rs->rs_lock, flags); 51162306a36Sopenharmony_ci } 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci return err; 51462306a36Sopenharmony_ci} 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci/* 51762306a36Sopenharmony_ci * Queue a congestion notification 51862306a36Sopenharmony_ci */ 51962306a36Sopenharmony_cistatic int rds_notify_cong(struct rds_sock *rs, struct msghdr *msghdr) 52062306a36Sopenharmony_ci{ 52162306a36Sopenharmony_ci uint64_t notify = rs->rs_cong_notify; 52262306a36Sopenharmony_ci unsigned long flags; 52362306a36Sopenharmony_ci int err; 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_CONG_UPDATE, 52662306a36Sopenharmony_ci sizeof(notify), ¬ify); 52762306a36Sopenharmony_ci if (err) 52862306a36Sopenharmony_ci return err; 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci spin_lock_irqsave(&rs->rs_lock, flags); 53162306a36Sopenharmony_ci rs->rs_cong_notify &= ~notify; 53262306a36Sopenharmony_ci spin_unlock_irqrestore(&rs->rs_lock, flags); 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci return 0; 53562306a36Sopenharmony_ci} 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci/* 53862306a36Sopenharmony_ci * Receive any control messages. 53962306a36Sopenharmony_ci */ 54062306a36Sopenharmony_cistatic int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, 54162306a36Sopenharmony_ci struct rds_sock *rs) 54262306a36Sopenharmony_ci{ 54362306a36Sopenharmony_ci int ret = 0; 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci if (inc->i_usercopy.rdma_cookie) { 54662306a36Sopenharmony_ci ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST, 54762306a36Sopenharmony_ci sizeof(inc->i_usercopy.rdma_cookie), 54862306a36Sopenharmony_ci &inc->i_usercopy.rdma_cookie); 54962306a36Sopenharmony_ci if (ret) 55062306a36Sopenharmony_ci goto out; 55162306a36Sopenharmony_ci } 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci if ((inc->i_usercopy.rx_tstamp != 0) && 55462306a36Sopenharmony_ci sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) { 55562306a36Sopenharmony_ci struct __kernel_old_timeval tv = 55662306a36Sopenharmony_ci ns_to_kernel_old_timeval(inc->i_usercopy.rx_tstamp); 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci if (!sock_flag(rds_rs_to_sk(rs), SOCK_TSTAMP_NEW)) { 55962306a36Sopenharmony_ci ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, 56062306a36Sopenharmony_ci sizeof(tv), &tv); 56162306a36Sopenharmony_ci } else { 56262306a36Sopenharmony_ci struct __kernel_sock_timeval sk_tv; 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci sk_tv.tv_sec = tv.tv_sec; 56562306a36Sopenharmony_ci sk_tv.tv_usec = tv.tv_usec; 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, 56862306a36Sopenharmony_ci sizeof(sk_tv), &sk_tv); 56962306a36Sopenharmony_ci } 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci if (ret) 57262306a36Sopenharmony_ci goto out; 57362306a36Sopenharmony_ci } 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci if (rs->rs_rx_traces) { 57662306a36Sopenharmony_ci struct rds_cmsg_rx_trace t; 57762306a36Sopenharmony_ci int i, j; 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci memset(&t, 0, sizeof(t)); 58062306a36Sopenharmony_ci inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock(); 58162306a36Sopenharmony_ci t.rx_traces = rs->rs_rx_traces; 58262306a36Sopenharmony_ci for (i = 0; i < rs->rs_rx_traces; i++) { 58362306a36Sopenharmony_ci j = rs->rs_rx_trace[i]; 58462306a36Sopenharmony_ci t.rx_trace_pos[i] = j; 58562306a36Sopenharmony_ci t.rx_trace[i] = inc->i_rx_lat_trace[j + 1] - 58662306a36Sopenharmony_ci inc->i_rx_lat_trace[j]; 58762306a36Sopenharmony_ci } 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RXPATH_LATENCY, 59062306a36Sopenharmony_ci sizeof(t), &t); 59162306a36Sopenharmony_ci if (ret) 59262306a36Sopenharmony_ci goto out; 59362306a36Sopenharmony_ci } 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_ciout: 59662306a36Sopenharmony_ci return ret; 59762306a36Sopenharmony_ci} 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_cistatic bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg) 60062306a36Sopenharmony_ci{ 60162306a36Sopenharmony_ci struct rds_msg_zcopy_queue *q = &rs->rs_zcookie_queue; 60262306a36Sopenharmony_ci struct rds_msg_zcopy_info *info = NULL; 60362306a36Sopenharmony_ci struct rds_zcopy_cookies *done; 60462306a36Sopenharmony_ci unsigned long flags; 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci if (!msg->msg_control) 60762306a36Sopenharmony_ci return false; 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) || 61062306a36Sopenharmony_ci msg->msg_controllen < CMSG_SPACE(sizeof(*done))) 61162306a36Sopenharmony_ci return false; 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci spin_lock_irqsave(&q->lock, flags); 61462306a36Sopenharmony_ci if (!list_empty(&q->zcookie_head)) { 61562306a36Sopenharmony_ci info = list_entry(q->zcookie_head.next, 61662306a36Sopenharmony_ci struct rds_msg_zcopy_info, rs_zcookie_next); 61762306a36Sopenharmony_ci list_del(&info->rs_zcookie_next); 61862306a36Sopenharmony_ci } 61962306a36Sopenharmony_ci spin_unlock_irqrestore(&q->lock, flags); 62062306a36Sopenharmony_ci if (!info) 62162306a36Sopenharmony_ci return false; 62262306a36Sopenharmony_ci done = &info->zcookies; 62362306a36Sopenharmony_ci if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done), 62462306a36Sopenharmony_ci done)) { 62562306a36Sopenharmony_ci spin_lock_irqsave(&q->lock, flags); 62662306a36Sopenharmony_ci list_add(&info->rs_zcookie_next, &q->zcookie_head); 62762306a36Sopenharmony_ci spin_unlock_irqrestore(&q->lock, flags); 62862306a36Sopenharmony_ci return false; 62962306a36Sopenharmony_ci } 63062306a36Sopenharmony_ci kfree(info); 63162306a36Sopenharmony_ci return true; 63262306a36Sopenharmony_ci} 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ciint rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 63562306a36Sopenharmony_ci int msg_flags) 63662306a36Sopenharmony_ci{ 63762306a36Sopenharmony_ci struct sock *sk = sock->sk; 63862306a36Sopenharmony_ci struct rds_sock *rs = rds_sk_to_rs(sk); 63962306a36Sopenharmony_ci long timeo; 64062306a36Sopenharmony_ci int ret = 0, nonblock = msg_flags & MSG_DONTWAIT; 64162306a36Sopenharmony_ci DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); 64262306a36Sopenharmony_ci DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); 64362306a36Sopenharmony_ci struct rds_incoming *inc = NULL; 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci /* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */ 64662306a36Sopenharmony_ci timeo = sock_rcvtimeo(sk, nonblock); 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci if (msg_flags & MSG_OOB) 65162306a36Sopenharmony_ci goto out; 65262306a36Sopenharmony_ci if (msg_flags & MSG_ERRQUEUE) 65362306a36Sopenharmony_ci return sock_recv_errqueue(sk, msg, size, SOL_IP, IP_RECVERR); 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci while (1) { 65662306a36Sopenharmony_ci /* If there are pending notifications, do those - and nothing else */ 65762306a36Sopenharmony_ci if (!list_empty(&rs->rs_notify_queue)) { 65862306a36Sopenharmony_ci ret = rds_notify_queue_get(rs, msg); 65962306a36Sopenharmony_ci break; 66062306a36Sopenharmony_ci } 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci if (rs->rs_cong_notify) { 66362306a36Sopenharmony_ci ret = rds_notify_cong(rs, msg); 66462306a36Sopenharmony_ci break; 66562306a36Sopenharmony_ci } 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci if (!rds_next_incoming(rs, &inc)) { 66862306a36Sopenharmony_ci if (nonblock) { 66962306a36Sopenharmony_ci bool reaped = rds_recvmsg_zcookie(rs, msg); 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci ret = reaped ? 0 : -EAGAIN; 67262306a36Sopenharmony_ci break; 67362306a36Sopenharmony_ci } 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci timeo = wait_event_interruptible_timeout(*sk_sleep(sk), 67662306a36Sopenharmony_ci (!list_empty(&rs->rs_notify_queue) || 67762306a36Sopenharmony_ci rs->rs_cong_notify || 67862306a36Sopenharmony_ci rds_next_incoming(rs, &inc)), timeo); 67962306a36Sopenharmony_ci rdsdebug("recvmsg woke inc %p timeo %ld\n", inc, 68062306a36Sopenharmony_ci timeo); 68162306a36Sopenharmony_ci if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT) 68262306a36Sopenharmony_ci continue; 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci ret = timeo; 68562306a36Sopenharmony_ci if (ret == 0) 68662306a36Sopenharmony_ci ret = -ETIMEDOUT; 68762306a36Sopenharmony_ci break; 68862306a36Sopenharmony_ci } 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_ci rdsdebug("copying inc %p from %pI6c:%u to user\n", inc, 69162306a36Sopenharmony_ci &inc->i_conn->c_faddr, 69262306a36Sopenharmony_ci ntohs(inc->i_hdr.h_sport)); 69362306a36Sopenharmony_ci ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter); 69462306a36Sopenharmony_ci if (ret < 0) 69562306a36Sopenharmony_ci break; 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci /* 69862306a36Sopenharmony_ci * if the message we just copied isn't at the head of the 69962306a36Sopenharmony_ci * recv queue then someone else raced us to return it, try 70062306a36Sopenharmony_ci * to get the next message. 70162306a36Sopenharmony_ci */ 70262306a36Sopenharmony_ci if (!rds_still_queued(rs, inc, !(msg_flags & MSG_PEEK))) { 70362306a36Sopenharmony_ci rds_inc_put(inc); 70462306a36Sopenharmony_ci inc = NULL; 70562306a36Sopenharmony_ci rds_stats_inc(s_recv_deliver_raced); 70662306a36Sopenharmony_ci iov_iter_revert(&msg->msg_iter, ret); 70762306a36Sopenharmony_ci continue; 70862306a36Sopenharmony_ci } 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ci if (ret < be32_to_cpu(inc->i_hdr.h_len)) { 71162306a36Sopenharmony_ci if (msg_flags & MSG_TRUNC) 71262306a36Sopenharmony_ci ret = be32_to_cpu(inc->i_hdr.h_len); 71362306a36Sopenharmony_ci msg->msg_flags |= MSG_TRUNC; 71462306a36Sopenharmony_ci } 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci if (rds_cmsg_recv(inc, msg, rs)) { 71762306a36Sopenharmony_ci ret = -EFAULT; 71862306a36Sopenharmony_ci break; 71962306a36Sopenharmony_ci } 72062306a36Sopenharmony_ci rds_recvmsg_zcookie(rs, msg); 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci rds_stats_inc(s_recv_delivered); 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci if (msg->msg_name) { 72562306a36Sopenharmony_ci if (ipv6_addr_v4mapped(&inc->i_saddr)) { 72662306a36Sopenharmony_ci sin->sin_family = AF_INET; 72762306a36Sopenharmony_ci sin->sin_port = inc->i_hdr.h_sport; 72862306a36Sopenharmony_ci sin->sin_addr.s_addr = 72962306a36Sopenharmony_ci inc->i_saddr.s6_addr32[3]; 73062306a36Sopenharmony_ci memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 73162306a36Sopenharmony_ci msg->msg_namelen = sizeof(*sin); 73262306a36Sopenharmony_ci } else { 73362306a36Sopenharmony_ci sin6->sin6_family = AF_INET6; 73462306a36Sopenharmony_ci sin6->sin6_port = inc->i_hdr.h_sport; 73562306a36Sopenharmony_ci sin6->sin6_addr = inc->i_saddr; 73662306a36Sopenharmony_ci sin6->sin6_flowinfo = 0; 73762306a36Sopenharmony_ci sin6->sin6_scope_id = rs->rs_bound_scope_id; 73862306a36Sopenharmony_ci msg->msg_namelen = sizeof(*sin6); 73962306a36Sopenharmony_ci } 74062306a36Sopenharmony_ci } 74162306a36Sopenharmony_ci break; 74262306a36Sopenharmony_ci } 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci if (inc) 74562306a36Sopenharmony_ci rds_inc_put(inc); 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ciout: 74862306a36Sopenharmony_ci return ret; 74962306a36Sopenharmony_ci} 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci/* 75262306a36Sopenharmony_ci * The socket is being shut down and we're asked to drop messages that were 75362306a36Sopenharmony_ci * queued for recvmsg. The caller has unbound the socket so the receive path 75462306a36Sopenharmony_ci * won't queue any more incoming fragments or messages on the socket. 75562306a36Sopenharmony_ci */ 75662306a36Sopenharmony_civoid rds_clear_recv_queue(struct rds_sock *rs) 75762306a36Sopenharmony_ci{ 75862306a36Sopenharmony_ci struct sock *sk = rds_rs_to_sk(rs); 75962306a36Sopenharmony_ci struct rds_incoming *inc, *tmp; 76062306a36Sopenharmony_ci unsigned long flags; 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci write_lock_irqsave(&rs->rs_recv_lock, flags); 76362306a36Sopenharmony_ci list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) { 76462306a36Sopenharmony_ci rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, 76562306a36Sopenharmony_ci -be32_to_cpu(inc->i_hdr.h_len), 76662306a36Sopenharmony_ci inc->i_hdr.h_dport); 76762306a36Sopenharmony_ci list_del_init(&inc->i_item); 76862306a36Sopenharmony_ci rds_inc_put(inc); 76962306a36Sopenharmony_ci } 77062306a36Sopenharmony_ci write_unlock_irqrestore(&rs->rs_recv_lock, flags); 77162306a36Sopenharmony_ci} 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci/* 77462306a36Sopenharmony_ci * inc->i_saddr isn't used here because it is only set in the receive 77562306a36Sopenharmony_ci * path. 77662306a36Sopenharmony_ci */ 77762306a36Sopenharmony_civoid rds_inc_info_copy(struct rds_incoming *inc, 77862306a36Sopenharmony_ci struct rds_info_iterator *iter, 77962306a36Sopenharmony_ci __be32 saddr, __be32 daddr, int flip) 78062306a36Sopenharmony_ci{ 78162306a36Sopenharmony_ci struct rds_info_message minfo; 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence); 78462306a36Sopenharmony_ci minfo.len = be32_to_cpu(inc->i_hdr.h_len); 78562306a36Sopenharmony_ci minfo.tos = inc->i_conn->c_tos; 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci if (flip) { 78862306a36Sopenharmony_ci minfo.laddr = daddr; 78962306a36Sopenharmony_ci minfo.faddr = saddr; 79062306a36Sopenharmony_ci minfo.lport = inc->i_hdr.h_dport; 79162306a36Sopenharmony_ci minfo.fport = inc->i_hdr.h_sport; 79262306a36Sopenharmony_ci } else { 79362306a36Sopenharmony_ci minfo.laddr = saddr; 79462306a36Sopenharmony_ci minfo.faddr = daddr; 79562306a36Sopenharmony_ci minfo.lport = inc->i_hdr.h_sport; 79662306a36Sopenharmony_ci minfo.fport = inc->i_hdr.h_dport; 79762306a36Sopenharmony_ci } 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci minfo.flags = 0; 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci rds_info_copy(iter, &minfo, sizeof(minfo)); 80262306a36Sopenharmony_ci} 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 80562306a36Sopenharmony_civoid rds6_inc_info_copy(struct rds_incoming *inc, 80662306a36Sopenharmony_ci struct rds_info_iterator *iter, 80762306a36Sopenharmony_ci struct in6_addr *saddr, struct in6_addr *daddr, 80862306a36Sopenharmony_ci int flip) 80962306a36Sopenharmony_ci{ 81062306a36Sopenharmony_ci struct rds6_info_message minfo6; 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_ci minfo6.seq = be64_to_cpu(inc->i_hdr.h_sequence); 81362306a36Sopenharmony_ci minfo6.len = be32_to_cpu(inc->i_hdr.h_len); 81462306a36Sopenharmony_ci minfo6.tos = inc->i_conn->c_tos; 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_ci if (flip) { 81762306a36Sopenharmony_ci minfo6.laddr = *daddr; 81862306a36Sopenharmony_ci minfo6.faddr = *saddr; 81962306a36Sopenharmony_ci minfo6.lport = inc->i_hdr.h_dport; 82062306a36Sopenharmony_ci minfo6.fport = inc->i_hdr.h_sport; 82162306a36Sopenharmony_ci } else { 82262306a36Sopenharmony_ci minfo6.laddr = *saddr; 82362306a36Sopenharmony_ci minfo6.faddr = *daddr; 82462306a36Sopenharmony_ci minfo6.lport = inc->i_hdr.h_sport; 82562306a36Sopenharmony_ci minfo6.fport = inc->i_hdr.h_dport; 82662306a36Sopenharmony_ci } 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci minfo6.flags = 0; 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci rds_info_copy(iter, &minfo6, sizeof(minfo6)); 83162306a36Sopenharmony_ci} 83262306a36Sopenharmony_ci#endif 833