162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2017, Microsoft Corporation. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Author(s): Long Li <longli@microsoft.com> 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci#include <linux/module.h> 862306a36Sopenharmony_ci#include <linux/highmem.h> 962306a36Sopenharmony_ci#include "smbdirect.h" 1062306a36Sopenharmony_ci#include "cifs_debug.h" 1162306a36Sopenharmony_ci#include "cifsproto.h" 1262306a36Sopenharmony_ci#include "smb2proto.h" 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_cistatic struct smbd_response *get_empty_queue_buffer( 1562306a36Sopenharmony_ci struct smbd_connection *info); 1662306a36Sopenharmony_cistatic struct smbd_response *get_receive_buffer( 1762306a36Sopenharmony_ci struct smbd_connection *info); 1862306a36Sopenharmony_cistatic void put_receive_buffer( 1962306a36Sopenharmony_ci struct smbd_connection *info, 2062306a36Sopenharmony_ci struct smbd_response *response); 2162306a36Sopenharmony_cistatic int allocate_receive_buffers(struct smbd_connection *info, int num_buf); 2262306a36Sopenharmony_cistatic void destroy_receive_buffers(struct smbd_connection *info); 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_cistatic void put_empty_packet( 2562306a36Sopenharmony_ci struct smbd_connection *info, struct smbd_response *response); 2662306a36Sopenharmony_cistatic void enqueue_reassembly( 2762306a36Sopenharmony_ci struct smbd_connection *info, 2862306a36Sopenharmony_ci struct smbd_response *response, int data_length); 2962306a36Sopenharmony_cistatic struct smbd_response *_get_first_reassembly( 3062306a36Sopenharmony_ci struct smbd_connection *info); 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_cistatic int smbd_post_recv( 3362306a36Sopenharmony_ci struct smbd_connection *info, 3462306a36Sopenharmony_ci struct smbd_response *response); 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_cistatic int smbd_post_send_empty(struct smbd_connection *info); 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_cistatic void destroy_mr_list(struct smbd_connection *info); 3962306a36Sopenharmony_cistatic int allocate_mr_list(struct smbd_connection *info); 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cistruct smb_extract_to_rdma { 4262306a36Sopenharmony_ci struct ib_sge *sge; 4362306a36Sopenharmony_ci unsigned int nr_sge; 4462306a36Sopenharmony_ci unsigned int max_sge; 4562306a36Sopenharmony_ci struct ib_device *device; 4662306a36Sopenharmony_ci u32 local_dma_lkey; 4762306a36Sopenharmony_ci enum dma_data_direction direction; 4862306a36Sopenharmony_ci}; 4962306a36Sopenharmony_cistatic ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, 5062306a36Sopenharmony_ci struct smb_extract_to_rdma *rdma); 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci/* SMBD version number */ 5362306a36Sopenharmony_ci#define SMBD_V1 0x0100 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci/* Port numbers for SMBD transport */ 5662306a36Sopenharmony_ci#define SMB_PORT 445 5762306a36Sopenharmony_ci#define SMBD_PORT 5445 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci/* Address lookup and resolve timeout in ms */ 6062306a36Sopenharmony_ci#define RDMA_RESOLVE_TIMEOUT 5000 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci/* SMBD negotiation timeout in seconds */ 6362306a36Sopenharmony_ci#define SMBD_NEGOTIATE_TIMEOUT 120 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci/* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */ 6662306a36Sopenharmony_ci#define SMBD_MIN_RECEIVE_SIZE 128 6762306a36Sopenharmony_ci#define SMBD_MIN_FRAGMENTED_SIZE 131072 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci/* 7062306a36Sopenharmony_ci * Default maximum number of RDMA read/write outstanding on this connection 7162306a36Sopenharmony_ci * This value is possibly decreased during QP creation on hardware limit 7262306a36Sopenharmony_ci */ 7362306a36Sopenharmony_ci#define SMBD_CM_RESPONDER_RESOURCES 32 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci/* Maximum number of retries on data transfer operations */ 7662306a36Sopenharmony_ci#define SMBD_CM_RETRY 6 7762306a36Sopenharmony_ci/* No need to retry on Receiver Not Ready since SMBD manages credits */ 7862306a36Sopenharmony_ci#define SMBD_CM_RNR_RETRY 0 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci/* 8162306a36Sopenharmony_ci * User configurable initial values per SMBD transport connection 8262306a36Sopenharmony_ci * as defined in [MS-SMBD] 3.1.1.1 8362306a36Sopenharmony_ci * Those may change after a SMBD negotiation 8462306a36Sopenharmony_ci */ 8562306a36Sopenharmony_ci/* The local peer's maximum number of credits to grant to the peer */ 8662306a36Sopenharmony_ciint smbd_receive_credit_max = 255; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci/* The remote peer's credit request of local peer */ 8962306a36Sopenharmony_ciint smbd_send_credit_target = 255; 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci/* The maximum single message size can be sent to remote peer */ 9262306a36Sopenharmony_ciint smbd_max_send_size = 1364; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci/* The maximum fragmented upper-layer payload receive size supported */ 9562306a36Sopenharmony_ciint smbd_max_fragmented_recv_size = 1024 * 1024; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci/* The maximum single-message size which can be received */ 9862306a36Sopenharmony_ciint smbd_max_receive_size = 1364; 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci/* The timeout to initiate send of a keepalive message on idle */ 10162306a36Sopenharmony_ciint smbd_keep_alive_interval = 120; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci/* 10462306a36Sopenharmony_ci * User configurable initial values for RDMA transport 10562306a36Sopenharmony_ci * The actual values used may be lower and are limited to hardware capabilities 10662306a36Sopenharmony_ci */ 10762306a36Sopenharmony_ci/* Default maximum number of pages in a single RDMA write/read */ 10862306a36Sopenharmony_ciint smbd_max_frmr_depth = 2048; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci/* If payload is less than this byte, use RDMA send/recv not read/write */ 11162306a36Sopenharmony_ciint rdma_readwrite_threshold = 4096; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci/* Transport logging functions 11462306a36Sopenharmony_ci * Logging are defined as classes. They can be OR'ed to define the actual 11562306a36Sopenharmony_ci * logging level via module parameter smbd_logging_class 11662306a36Sopenharmony_ci * e.g. cifs.smbd_logging_class=0xa0 will log all log_rdma_recv() and 11762306a36Sopenharmony_ci * log_rdma_event() 11862306a36Sopenharmony_ci */ 11962306a36Sopenharmony_ci#define LOG_OUTGOING 0x1 12062306a36Sopenharmony_ci#define LOG_INCOMING 0x2 12162306a36Sopenharmony_ci#define LOG_READ 0x4 12262306a36Sopenharmony_ci#define LOG_WRITE 0x8 12362306a36Sopenharmony_ci#define LOG_RDMA_SEND 0x10 12462306a36Sopenharmony_ci#define LOG_RDMA_RECV 0x20 12562306a36Sopenharmony_ci#define LOG_KEEP_ALIVE 0x40 12662306a36Sopenharmony_ci#define LOG_RDMA_EVENT 0x80 12762306a36Sopenharmony_ci#define LOG_RDMA_MR 0x100 12862306a36Sopenharmony_cistatic unsigned int smbd_logging_class; 12962306a36Sopenharmony_cimodule_param(smbd_logging_class, uint, 0644); 13062306a36Sopenharmony_ciMODULE_PARM_DESC(smbd_logging_class, 13162306a36Sopenharmony_ci "Logging class for SMBD transport 0x0 to 0x100"); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci#define ERR 0x0 13462306a36Sopenharmony_ci#define INFO 0x1 13562306a36Sopenharmony_cistatic unsigned int smbd_logging_level = ERR; 13662306a36Sopenharmony_cimodule_param(smbd_logging_level, uint, 0644); 13762306a36Sopenharmony_ciMODULE_PARM_DESC(smbd_logging_level, 13862306a36Sopenharmony_ci "Logging level for SMBD transport, 0 (default): error, 1: info"); 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci#define log_rdma(level, class, fmt, args...) \ 14162306a36Sopenharmony_cido { \ 14262306a36Sopenharmony_ci if (level <= smbd_logging_level || class & smbd_logging_class) \ 14362306a36Sopenharmony_ci cifs_dbg(VFS, "%s:%d " fmt, __func__, __LINE__, ##args);\ 14462306a36Sopenharmony_ci} while (0) 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci#define log_outgoing(level, fmt, args...) \ 14762306a36Sopenharmony_ci log_rdma(level, LOG_OUTGOING, fmt, ##args) 14862306a36Sopenharmony_ci#define log_incoming(level, fmt, args...) \ 14962306a36Sopenharmony_ci log_rdma(level, LOG_INCOMING, fmt, ##args) 15062306a36Sopenharmony_ci#define log_read(level, fmt, args...) log_rdma(level, LOG_READ, fmt, ##args) 15162306a36Sopenharmony_ci#define log_write(level, fmt, args...) log_rdma(level, LOG_WRITE, fmt, ##args) 15262306a36Sopenharmony_ci#define log_rdma_send(level, fmt, args...) \ 15362306a36Sopenharmony_ci log_rdma(level, LOG_RDMA_SEND, fmt, ##args) 15462306a36Sopenharmony_ci#define log_rdma_recv(level, fmt, args...) \ 15562306a36Sopenharmony_ci log_rdma(level, LOG_RDMA_RECV, fmt, ##args) 15662306a36Sopenharmony_ci#define log_keep_alive(level, fmt, args...) \ 15762306a36Sopenharmony_ci log_rdma(level, LOG_KEEP_ALIVE, fmt, ##args) 15862306a36Sopenharmony_ci#define log_rdma_event(level, fmt, args...) \ 15962306a36Sopenharmony_ci log_rdma(level, LOG_RDMA_EVENT, fmt, ##args) 16062306a36Sopenharmony_ci#define log_rdma_mr(level, fmt, args...) \ 16162306a36Sopenharmony_ci log_rdma(level, LOG_RDMA_MR, fmt, ##args) 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_cistatic void smbd_disconnect_rdma_work(struct work_struct *work) 16462306a36Sopenharmony_ci{ 16562306a36Sopenharmony_ci struct smbd_connection *info = 16662306a36Sopenharmony_ci container_of(work, struct smbd_connection, disconnect_work); 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci if (info->transport_status == SMBD_CONNECTED) { 16962306a36Sopenharmony_ci info->transport_status = SMBD_DISCONNECTING; 17062306a36Sopenharmony_ci rdma_disconnect(info->id); 17162306a36Sopenharmony_ci } 17262306a36Sopenharmony_ci} 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_cistatic void smbd_disconnect_rdma_connection(struct smbd_connection *info) 17562306a36Sopenharmony_ci{ 17662306a36Sopenharmony_ci queue_work(info->workqueue, &info->disconnect_work); 17762306a36Sopenharmony_ci} 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci/* Upcall from RDMA CM */ 18062306a36Sopenharmony_cistatic int smbd_conn_upcall( 18162306a36Sopenharmony_ci struct rdma_cm_id *id, struct rdma_cm_event *event) 18262306a36Sopenharmony_ci{ 18362306a36Sopenharmony_ci struct smbd_connection *info = id->context; 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci log_rdma_event(INFO, "event=%d status=%d\n", 18662306a36Sopenharmony_ci event->event, event->status); 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci switch (event->event) { 18962306a36Sopenharmony_ci case RDMA_CM_EVENT_ADDR_RESOLVED: 19062306a36Sopenharmony_ci case RDMA_CM_EVENT_ROUTE_RESOLVED: 19162306a36Sopenharmony_ci info->ri_rc = 0; 19262306a36Sopenharmony_ci complete(&info->ri_done); 19362306a36Sopenharmony_ci break; 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci case RDMA_CM_EVENT_ADDR_ERROR: 19662306a36Sopenharmony_ci info->ri_rc = -EHOSTUNREACH; 19762306a36Sopenharmony_ci complete(&info->ri_done); 19862306a36Sopenharmony_ci break; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci case RDMA_CM_EVENT_ROUTE_ERROR: 20162306a36Sopenharmony_ci info->ri_rc = -ENETUNREACH; 20262306a36Sopenharmony_ci complete(&info->ri_done); 20362306a36Sopenharmony_ci break; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci case RDMA_CM_EVENT_ESTABLISHED: 20662306a36Sopenharmony_ci log_rdma_event(INFO, "connected event=%d\n", event->event); 20762306a36Sopenharmony_ci info->transport_status = SMBD_CONNECTED; 20862306a36Sopenharmony_ci wake_up_interruptible(&info->conn_wait); 20962306a36Sopenharmony_ci break; 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci case RDMA_CM_EVENT_CONNECT_ERROR: 21262306a36Sopenharmony_ci case RDMA_CM_EVENT_UNREACHABLE: 21362306a36Sopenharmony_ci case RDMA_CM_EVENT_REJECTED: 21462306a36Sopenharmony_ci log_rdma_event(INFO, "connecting failed event=%d\n", event->event); 21562306a36Sopenharmony_ci info->transport_status = SMBD_DISCONNECTED; 21662306a36Sopenharmony_ci wake_up_interruptible(&info->conn_wait); 21762306a36Sopenharmony_ci break; 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci case RDMA_CM_EVENT_DEVICE_REMOVAL: 22062306a36Sopenharmony_ci case RDMA_CM_EVENT_DISCONNECTED: 22162306a36Sopenharmony_ci /* This happenes when we fail the negotiation */ 22262306a36Sopenharmony_ci if (info->transport_status == SMBD_NEGOTIATE_FAILED) { 22362306a36Sopenharmony_ci info->transport_status = SMBD_DISCONNECTED; 22462306a36Sopenharmony_ci wake_up(&info->conn_wait); 22562306a36Sopenharmony_ci break; 22662306a36Sopenharmony_ci } 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci info->transport_status = SMBD_DISCONNECTED; 22962306a36Sopenharmony_ci wake_up_interruptible(&info->disconn_wait); 23062306a36Sopenharmony_ci wake_up_interruptible(&info->wait_reassembly_queue); 23162306a36Sopenharmony_ci wake_up_interruptible_all(&info->wait_send_queue); 23262306a36Sopenharmony_ci break; 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci default: 23562306a36Sopenharmony_ci break; 23662306a36Sopenharmony_ci } 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci return 0; 23962306a36Sopenharmony_ci} 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci/* Upcall from RDMA QP */ 24262306a36Sopenharmony_cistatic void 24362306a36Sopenharmony_cismbd_qp_async_error_upcall(struct ib_event *event, void *context) 24462306a36Sopenharmony_ci{ 24562306a36Sopenharmony_ci struct smbd_connection *info = context; 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci log_rdma_event(ERR, "%s on device %s info %p\n", 24862306a36Sopenharmony_ci ib_event_msg(event->event), event->device->name, info); 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci switch (event->event) { 25162306a36Sopenharmony_ci case IB_EVENT_CQ_ERR: 25262306a36Sopenharmony_ci case IB_EVENT_QP_FATAL: 25362306a36Sopenharmony_ci smbd_disconnect_rdma_connection(info); 25462306a36Sopenharmony_ci break; 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci default: 25762306a36Sopenharmony_ci break; 25862306a36Sopenharmony_ci } 25962306a36Sopenharmony_ci} 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_cistatic inline void *smbd_request_payload(struct smbd_request *request) 26262306a36Sopenharmony_ci{ 26362306a36Sopenharmony_ci return (void *)request->packet; 26462306a36Sopenharmony_ci} 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_cistatic inline void *smbd_response_payload(struct smbd_response *response) 26762306a36Sopenharmony_ci{ 26862306a36Sopenharmony_ci return (void *)response->packet; 26962306a36Sopenharmony_ci} 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci/* Called when a RDMA send is done */ 27262306a36Sopenharmony_cistatic void send_done(struct ib_cq *cq, struct ib_wc *wc) 27362306a36Sopenharmony_ci{ 27462306a36Sopenharmony_ci int i; 27562306a36Sopenharmony_ci struct smbd_request *request = 27662306a36Sopenharmony_ci container_of(wc->wr_cqe, struct smbd_request, cqe); 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n", 27962306a36Sopenharmony_ci request, wc->status); 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 28262306a36Sopenharmony_ci log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n", 28362306a36Sopenharmony_ci wc->status, wc->opcode); 28462306a36Sopenharmony_ci smbd_disconnect_rdma_connection(request->info); 28562306a36Sopenharmony_ci } 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci for (i = 0; i < request->num_sge; i++) 28862306a36Sopenharmony_ci ib_dma_unmap_single(request->info->id->device, 28962306a36Sopenharmony_ci request->sge[i].addr, 29062306a36Sopenharmony_ci request->sge[i].length, 29162306a36Sopenharmony_ci DMA_TO_DEVICE); 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci if (atomic_dec_and_test(&request->info->send_pending)) 29462306a36Sopenharmony_ci wake_up(&request->info->wait_send_pending); 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci wake_up(&request->info->wait_post_send); 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci mempool_free(request, request->info->request_mempool); 29962306a36Sopenharmony_ci} 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_cistatic void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp) 30262306a36Sopenharmony_ci{ 30362306a36Sopenharmony_ci log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n", 30462306a36Sopenharmony_ci resp->min_version, resp->max_version, 30562306a36Sopenharmony_ci resp->negotiated_version, resp->credits_requested, 30662306a36Sopenharmony_ci resp->credits_granted, resp->status, 30762306a36Sopenharmony_ci resp->max_readwrite_size, resp->preferred_send_size, 30862306a36Sopenharmony_ci resp->max_receive_size, resp->max_fragmented_size); 30962306a36Sopenharmony_ci} 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci/* 31262306a36Sopenharmony_ci * Process a negotiation response message, according to [MS-SMBD]3.1.5.7 31362306a36Sopenharmony_ci * response, packet_length: the negotiation response message 31462306a36Sopenharmony_ci * return value: true if negotiation is a success, false if failed 31562306a36Sopenharmony_ci */ 31662306a36Sopenharmony_cistatic bool process_negotiation_response( 31762306a36Sopenharmony_ci struct smbd_response *response, int packet_length) 31862306a36Sopenharmony_ci{ 31962306a36Sopenharmony_ci struct smbd_connection *info = response->info; 32062306a36Sopenharmony_ci struct smbd_negotiate_resp *packet = smbd_response_payload(response); 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci if (packet_length < sizeof(struct smbd_negotiate_resp)) { 32362306a36Sopenharmony_ci log_rdma_event(ERR, 32462306a36Sopenharmony_ci "error: packet_length=%d\n", packet_length); 32562306a36Sopenharmony_ci return false; 32662306a36Sopenharmony_ci } 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci if (le16_to_cpu(packet->negotiated_version) != SMBD_V1) { 32962306a36Sopenharmony_ci log_rdma_event(ERR, "error: negotiated_version=%x\n", 33062306a36Sopenharmony_ci le16_to_cpu(packet->negotiated_version)); 33162306a36Sopenharmony_ci return false; 33262306a36Sopenharmony_ci } 33362306a36Sopenharmony_ci info->protocol = le16_to_cpu(packet->negotiated_version); 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci if (packet->credits_requested == 0) { 33662306a36Sopenharmony_ci log_rdma_event(ERR, "error: credits_requested==0\n"); 33762306a36Sopenharmony_ci return false; 33862306a36Sopenharmony_ci } 33962306a36Sopenharmony_ci info->receive_credit_target = le16_to_cpu(packet->credits_requested); 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci if (packet->credits_granted == 0) { 34262306a36Sopenharmony_ci log_rdma_event(ERR, "error: credits_granted==0\n"); 34362306a36Sopenharmony_ci return false; 34462306a36Sopenharmony_ci } 34562306a36Sopenharmony_ci atomic_set(&info->send_credits, le16_to_cpu(packet->credits_granted)); 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci atomic_set(&info->receive_credits, 0); 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci if (le32_to_cpu(packet->preferred_send_size) > info->max_receive_size) { 35062306a36Sopenharmony_ci log_rdma_event(ERR, "error: preferred_send_size=%d\n", 35162306a36Sopenharmony_ci le32_to_cpu(packet->preferred_send_size)); 35262306a36Sopenharmony_ci return false; 35362306a36Sopenharmony_ci } 35462306a36Sopenharmony_ci info->max_receive_size = le32_to_cpu(packet->preferred_send_size); 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) { 35762306a36Sopenharmony_ci log_rdma_event(ERR, "error: max_receive_size=%d\n", 35862306a36Sopenharmony_ci le32_to_cpu(packet->max_receive_size)); 35962306a36Sopenharmony_ci return false; 36062306a36Sopenharmony_ci } 36162306a36Sopenharmony_ci info->max_send_size = min_t(int, info->max_send_size, 36262306a36Sopenharmony_ci le32_to_cpu(packet->max_receive_size)); 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci if (le32_to_cpu(packet->max_fragmented_size) < 36562306a36Sopenharmony_ci SMBD_MIN_FRAGMENTED_SIZE) { 36662306a36Sopenharmony_ci log_rdma_event(ERR, "error: max_fragmented_size=%d\n", 36762306a36Sopenharmony_ci le32_to_cpu(packet->max_fragmented_size)); 36862306a36Sopenharmony_ci return false; 36962306a36Sopenharmony_ci } 37062306a36Sopenharmony_ci info->max_fragmented_send_size = 37162306a36Sopenharmony_ci le32_to_cpu(packet->max_fragmented_size); 37262306a36Sopenharmony_ci info->rdma_readwrite_threshold = 37362306a36Sopenharmony_ci rdma_readwrite_threshold > info->max_fragmented_send_size ? 37462306a36Sopenharmony_ci info->max_fragmented_send_size : 37562306a36Sopenharmony_ci rdma_readwrite_threshold; 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci info->max_readwrite_size = min_t(u32, 37962306a36Sopenharmony_ci le32_to_cpu(packet->max_readwrite_size), 38062306a36Sopenharmony_ci info->max_frmr_depth * PAGE_SIZE); 38162306a36Sopenharmony_ci info->max_frmr_depth = info->max_readwrite_size / PAGE_SIZE; 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci return true; 38462306a36Sopenharmony_ci} 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_cistatic void smbd_post_send_credits(struct work_struct *work) 38762306a36Sopenharmony_ci{ 38862306a36Sopenharmony_ci int ret = 0; 38962306a36Sopenharmony_ci int use_receive_queue = 1; 39062306a36Sopenharmony_ci int rc; 39162306a36Sopenharmony_ci struct smbd_response *response; 39262306a36Sopenharmony_ci struct smbd_connection *info = 39362306a36Sopenharmony_ci container_of(work, struct smbd_connection, 39462306a36Sopenharmony_ci post_send_credits_work); 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci if (info->transport_status != SMBD_CONNECTED) { 39762306a36Sopenharmony_ci wake_up(&info->wait_receive_queues); 39862306a36Sopenharmony_ci return; 39962306a36Sopenharmony_ci } 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci if (info->receive_credit_target > 40262306a36Sopenharmony_ci atomic_read(&info->receive_credits)) { 40362306a36Sopenharmony_ci while (true) { 40462306a36Sopenharmony_ci if (use_receive_queue) 40562306a36Sopenharmony_ci response = get_receive_buffer(info); 40662306a36Sopenharmony_ci else 40762306a36Sopenharmony_ci response = get_empty_queue_buffer(info); 40862306a36Sopenharmony_ci if (!response) { 40962306a36Sopenharmony_ci /* now switch to emtpy packet queue */ 41062306a36Sopenharmony_ci if (use_receive_queue) { 41162306a36Sopenharmony_ci use_receive_queue = 0; 41262306a36Sopenharmony_ci continue; 41362306a36Sopenharmony_ci } else 41462306a36Sopenharmony_ci break; 41562306a36Sopenharmony_ci } 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci response->type = SMBD_TRANSFER_DATA; 41862306a36Sopenharmony_ci response->first_segment = false; 41962306a36Sopenharmony_ci rc = smbd_post_recv(info, response); 42062306a36Sopenharmony_ci if (rc) { 42162306a36Sopenharmony_ci log_rdma_recv(ERR, 42262306a36Sopenharmony_ci "post_recv failed rc=%d\n", rc); 42362306a36Sopenharmony_ci put_receive_buffer(info, response); 42462306a36Sopenharmony_ci break; 42562306a36Sopenharmony_ci } 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci ret++; 42862306a36Sopenharmony_ci } 42962306a36Sopenharmony_ci } 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci spin_lock(&info->lock_new_credits_offered); 43262306a36Sopenharmony_ci info->new_credits_offered += ret; 43362306a36Sopenharmony_ci spin_unlock(&info->lock_new_credits_offered); 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */ 43662306a36Sopenharmony_ci info->send_immediate = true; 43762306a36Sopenharmony_ci if (atomic_read(&info->receive_credits) < 43862306a36Sopenharmony_ci info->receive_credit_target - 1) { 43962306a36Sopenharmony_ci if (info->keep_alive_requested == KEEP_ALIVE_PENDING || 44062306a36Sopenharmony_ci info->send_immediate) { 44162306a36Sopenharmony_ci log_keep_alive(INFO, "send an empty message\n"); 44262306a36Sopenharmony_ci smbd_post_send_empty(info); 44362306a36Sopenharmony_ci } 44462306a36Sopenharmony_ci } 44562306a36Sopenharmony_ci} 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci/* Called from softirq, when recv is done */ 44862306a36Sopenharmony_cistatic void recv_done(struct ib_cq *cq, struct ib_wc *wc) 44962306a36Sopenharmony_ci{ 45062306a36Sopenharmony_ci struct smbd_data_transfer *data_transfer; 45162306a36Sopenharmony_ci struct smbd_response *response = 45262306a36Sopenharmony_ci container_of(wc->wr_cqe, struct smbd_response, cqe); 45362306a36Sopenharmony_ci struct smbd_connection *info = response->info; 45462306a36Sopenharmony_ci int data_length = 0; 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n", 45762306a36Sopenharmony_ci response, response->type, wc->status, wc->opcode, 45862306a36Sopenharmony_ci wc->byte_len, wc->pkey_index); 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 46162306a36Sopenharmony_ci log_rdma_recv(INFO, "wc->status=%d opcode=%d\n", 46262306a36Sopenharmony_ci wc->status, wc->opcode); 46362306a36Sopenharmony_ci smbd_disconnect_rdma_connection(info); 46462306a36Sopenharmony_ci goto error; 46562306a36Sopenharmony_ci } 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci ib_dma_sync_single_for_cpu( 46862306a36Sopenharmony_ci wc->qp->device, 46962306a36Sopenharmony_ci response->sge.addr, 47062306a36Sopenharmony_ci response->sge.length, 47162306a36Sopenharmony_ci DMA_FROM_DEVICE); 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci switch (response->type) { 47462306a36Sopenharmony_ci /* SMBD negotiation response */ 47562306a36Sopenharmony_ci case SMBD_NEGOTIATE_RESP: 47662306a36Sopenharmony_ci dump_smbd_negotiate_resp(smbd_response_payload(response)); 47762306a36Sopenharmony_ci info->full_packet_received = true; 47862306a36Sopenharmony_ci info->negotiate_done = 47962306a36Sopenharmony_ci process_negotiation_response(response, wc->byte_len); 48062306a36Sopenharmony_ci complete(&info->negotiate_completion); 48162306a36Sopenharmony_ci break; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci /* SMBD data transfer packet */ 48462306a36Sopenharmony_ci case SMBD_TRANSFER_DATA: 48562306a36Sopenharmony_ci data_transfer = smbd_response_payload(response); 48662306a36Sopenharmony_ci data_length = le32_to_cpu(data_transfer->data_length); 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci /* 48962306a36Sopenharmony_ci * If this is a packet with data playload place the data in 49062306a36Sopenharmony_ci * reassembly queue and wake up the reading thread 49162306a36Sopenharmony_ci */ 49262306a36Sopenharmony_ci if (data_length) { 49362306a36Sopenharmony_ci if (info->full_packet_received) 49462306a36Sopenharmony_ci response->first_segment = true; 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci if (le32_to_cpu(data_transfer->remaining_data_length)) 49762306a36Sopenharmony_ci info->full_packet_received = false; 49862306a36Sopenharmony_ci else 49962306a36Sopenharmony_ci info->full_packet_received = true; 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci enqueue_reassembly( 50262306a36Sopenharmony_ci info, 50362306a36Sopenharmony_ci response, 50462306a36Sopenharmony_ci data_length); 50562306a36Sopenharmony_ci } else 50662306a36Sopenharmony_ci put_empty_packet(info, response); 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci if (data_length) 50962306a36Sopenharmony_ci wake_up_interruptible(&info->wait_reassembly_queue); 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci atomic_dec(&info->receive_credits); 51262306a36Sopenharmony_ci info->receive_credit_target = 51362306a36Sopenharmony_ci le16_to_cpu(data_transfer->credits_requested); 51462306a36Sopenharmony_ci if (le16_to_cpu(data_transfer->credits_granted)) { 51562306a36Sopenharmony_ci atomic_add(le16_to_cpu(data_transfer->credits_granted), 51662306a36Sopenharmony_ci &info->send_credits); 51762306a36Sopenharmony_ci /* 51862306a36Sopenharmony_ci * We have new send credits granted from remote peer 51962306a36Sopenharmony_ci * If any sender is waiting for credits, unblock it 52062306a36Sopenharmony_ci */ 52162306a36Sopenharmony_ci wake_up_interruptible(&info->wait_send_queue); 52262306a36Sopenharmony_ci } 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci log_incoming(INFO, "data flags %d data_offset %d data_length %d remaining_data_length %d\n", 52562306a36Sopenharmony_ci le16_to_cpu(data_transfer->flags), 52662306a36Sopenharmony_ci le32_to_cpu(data_transfer->data_offset), 52762306a36Sopenharmony_ci le32_to_cpu(data_transfer->data_length), 52862306a36Sopenharmony_ci le32_to_cpu(data_transfer->remaining_data_length)); 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci /* Send a KEEP_ALIVE response right away if requested */ 53162306a36Sopenharmony_ci info->keep_alive_requested = KEEP_ALIVE_NONE; 53262306a36Sopenharmony_ci if (le16_to_cpu(data_transfer->flags) & 53362306a36Sopenharmony_ci SMB_DIRECT_RESPONSE_REQUESTED) { 53462306a36Sopenharmony_ci info->keep_alive_requested = KEEP_ALIVE_PENDING; 53562306a36Sopenharmony_ci } 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci return; 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci default: 54062306a36Sopenharmony_ci log_rdma_recv(ERR, 54162306a36Sopenharmony_ci "unexpected response type=%d\n", response->type); 54262306a36Sopenharmony_ci } 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_cierror: 54562306a36Sopenharmony_ci put_receive_buffer(info, response); 54662306a36Sopenharmony_ci} 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_cistatic struct rdma_cm_id *smbd_create_id( 54962306a36Sopenharmony_ci struct smbd_connection *info, 55062306a36Sopenharmony_ci struct sockaddr *dstaddr, int port) 55162306a36Sopenharmony_ci{ 55262306a36Sopenharmony_ci struct rdma_cm_id *id; 55362306a36Sopenharmony_ci int rc; 55462306a36Sopenharmony_ci __be16 *sport; 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci id = rdma_create_id(&init_net, smbd_conn_upcall, info, 55762306a36Sopenharmony_ci RDMA_PS_TCP, IB_QPT_RC); 55862306a36Sopenharmony_ci if (IS_ERR(id)) { 55962306a36Sopenharmony_ci rc = PTR_ERR(id); 56062306a36Sopenharmony_ci log_rdma_event(ERR, "rdma_create_id() failed %i\n", rc); 56162306a36Sopenharmony_ci return id; 56262306a36Sopenharmony_ci } 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci if (dstaddr->sa_family == AF_INET6) 56562306a36Sopenharmony_ci sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port; 56662306a36Sopenharmony_ci else 56762306a36Sopenharmony_ci sport = &((struct sockaddr_in *)dstaddr)->sin_port; 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci *sport = htons(port); 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci init_completion(&info->ri_done); 57262306a36Sopenharmony_ci info->ri_rc = -ETIMEDOUT; 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr, 57562306a36Sopenharmony_ci RDMA_RESOLVE_TIMEOUT); 57662306a36Sopenharmony_ci if (rc) { 57762306a36Sopenharmony_ci log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc); 57862306a36Sopenharmony_ci goto out; 57962306a36Sopenharmony_ci } 58062306a36Sopenharmony_ci rc = wait_for_completion_interruptible_timeout( 58162306a36Sopenharmony_ci &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); 58262306a36Sopenharmony_ci /* e.g. if interrupted returns -ERESTARTSYS */ 58362306a36Sopenharmony_ci if (rc < 0) { 58462306a36Sopenharmony_ci log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); 58562306a36Sopenharmony_ci goto out; 58662306a36Sopenharmony_ci } 58762306a36Sopenharmony_ci rc = info->ri_rc; 58862306a36Sopenharmony_ci if (rc) { 58962306a36Sopenharmony_ci log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); 59062306a36Sopenharmony_ci goto out; 59162306a36Sopenharmony_ci } 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci info->ri_rc = -ETIMEDOUT; 59462306a36Sopenharmony_ci rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); 59562306a36Sopenharmony_ci if (rc) { 59662306a36Sopenharmony_ci log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc); 59762306a36Sopenharmony_ci goto out; 59862306a36Sopenharmony_ci } 59962306a36Sopenharmony_ci rc = wait_for_completion_interruptible_timeout( 60062306a36Sopenharmony_ci &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); 60162306a36Sopenharmony_ci /* e.g. if interrupted returns -ERESTARTSYS */ 60262306a36Sopenharmony_ci if (rc < 0) { 60362306a36Sopenharmony_ci log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); 60462306a36Sopenharmony_ci goto out; 60562306a36Sopenharmony_ci } 60662306a36Sopenharmony_ci rc = info->ri_rc; 60762306a36Sopenharmony_ci if (rc) { 60862306a36Sopenharmony_ci log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); 60962306a36Sopenharmony_ci goto out; 61062306a36Sopenharmony_ci } 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci return id; 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ciout: 61562306a36Sopenharmony_ci rdma_destroy_id(id); 61662306a36Sopenharmony_ci return ERR_PTR(rc); 61762306a36Sopenharmony_ci} 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci/* 62062306a36Sopenharmony_ci * Test if FRWR (Fast Registration Work Requests) is supported on the device 62162306a36Sopenharmony_ci * This implementation requries FRWR on RDMA read/write 62262306a36Sopenharmony_ci * return value: true if it is supported 62362306a36Sopenharmony_ci */ 62462306a36Sopenharmony_cistatic bool frwr_is_supported(struct ib_device_attr *attrs) 62562306a36Sopenharmony_ci{ 62662306a36Sopenharmony_ci if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 62762306a36Sopenharmony_ci return false; 62862306a36Sopenharmony_ci if (attrs->max_fast_reg_page_list_len == 0) 62962306a36Sopenharmony_ci return false; 63062306a36Sopenharmony_ci return true; 63162306a36Sopenharmony_ci} 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_cistatic int smbd_ia_open( 63462306a36Sopenharmony_ci struct smbd_connection *info, 63562306a36Sopenharmony_ci struct sockaddr *dstaddr, int port) 63662306a36Sopenharmony_ci{ 63762306a36Sopenharmony_ci int rc; 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci info->id = smbd_create_id(info, dstaddr, port); 64062306a36Sopenharmony_ci if (IS_ERR(info->id)) { 64162306a36Sopenharmony_ci rc = PTR_ERR(info->id); 64262306a36Sopenharmony_ci goto out1; 64362306a36Sopenharmony_ci } 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci if (!frwr_is_supported(&info->id->device->attrs)) { 64662306a36Sopenharmony_ci log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n"); 64762306a36Sopenharmony_ci log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", 64862306a36Sopenharmony_ci info->id->device->attrs.device_cap_flags, 64962306a36Sopenharmony_ci info->id->device->attrs.max_fast_reg_page_list_len); 65062306a36Sopenharmony_ci rc = -EPROTONOSUPPORT; 65162306a36Sopenharmony_ci goto out2; 65262306a36Sopenharmony_ci } 65362306a36Sopenharmony_ci info->max_frmr_depth = min_t(int, 65462306a36Sopenharmony_ci smbd_max_frmr_depth, 65562306a36Sopenharmony_ci info->id->device->attrs.max_fast_reg_page_list_len); 65662306a36Sopenharmony_ci info->mr_type = IB_MR_TYPE_MEM_REG; 65762306a36Sopenharmony_ci if (info->id->device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) 65862306a36Sopenharmony_ci info->mr_type = IB_MR_TYPE_SG_GAPS; 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_ci info->pd = ib_alloc_pd(info->id->device, 0); 66162306a36Sopenharmony_ci if (IS_ERR(info->pd)) { 66262306a36Sopenharmony_ci rc = PTR_ERR(info->pd); 66362306a36Sopenharmony_ci log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); 66462306a36Sopenharmony_ci goto out2; 66562306a36Sopenharmony_ci } 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci return 0; 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ciout2: 67062306a36Sopenharmony_ci rdma_destroy_id(info->id); 67162306a36Sopenharmony_ci info->id = NULL; 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ciout1: 67462306a36Sopenharmony_ci return rc; 67562306a36Sopenharmony_ci} 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci/* 67862306a36Sopenharmony_ci * Send a negotiation request message to the peer 67962306a36Sopenharmony_ci * The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3 68062306a36Sopenharmony_ci * After negotiation, the transport is connected and ready for 68162306a36Sopenharmony_ci * carrying upper layer SMB payload 68262306a36Sopenharmony_ci */ 68362306a36Sopenharmony_cistatic int smbd_post_send_negotiate_req(struct smbd_connection *info) 68462306a36Sopenharmony_ci{ 68562306a36Sopenharmony_ci struct ib_send_wr send_wr; 68662306a36Sopenharmony_ci int rc = -ENOMEM; 68762306a36Sopenharmony_ci struct smbd_request *request; 68862306a36Sopenharmony_ci struct smbd_negotiate_req *packet; 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_ci request = mempool_alloc(info->request_mempool, GFP_KERNEL); 69162306a36Sopenharmony_ci if (!request) 69262306a36Sopenharmony_ci return rc; 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci request->info = info; 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci packet = smbd_request_payload(request); 69762306a36Sopenharmony_ci packet->min_version = cpu_to_le16(SMBD_V1); 69862306a36Sopenharmony_ci packet->max_version = cpu_to_le16(SMBD_V1); 69962306a36Sopenharmony_ci packet->reserved = 0; 70062306a36Sopenharmony_ci packet->credits_requested = cpu_to_le16(info->send_credit_target); 70162306a36Sopenharmony_ci packet->preferred_send_size = cpu_to_le32(info->max_send_size); 70262306a36Sopenharmony_ci packet->max_receive_size = cpu_to_le32(info->max_receive_size); 70362306a36Sopenharmony_ci packet->max_fragmented_size = 70462306a36Sopenharmony_ci cpu_to_le32(info->max_fragmented_recv_size); 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_ci request->num_sge = 1; 70762306a36Sopenharmony_ci request->sge[0].addr = ib_dma_map_single( 70862306a36Sopenharmony_ci info->id->device, (void *)packet, 70962306a36Sopenharmony_ci sizeof(*packet), DMA_TO_DEVICE); 71062306a36Sopenharmony_ci if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { 71162306a36Sopenharmony_ci rc = -EIO; 71262306a36Sopenharmony_ci goto dma_mapping_failed; 71362306a36Sopenharmony_ci } 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci request->sge[0].length = sizeof(*packet); 71662306a36Sopenharmony_ci request->sge[0].lkey = info->pd->local_dma_lkey; 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci ib_dma_sync_single_for_device( 71962306a36Sopenharmony_ci info->id->device, request->sge[0].addr, 72062306a36Sopenharmony_ci request->sge[0].length, DMA_TO_DEVICE); 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci request->cqe.done = send_done; 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci send_wr.next = NULL; 72562306a36Sopenharmony_ci send_wr.wr_cqe = &request->cqe; 72662306a36Sopenharmony_ci send_wr.sg_list = request->sge; 72762306a36Sopenharmony_ci send_wr.num_sge = request->num_sge; 72862306a36Sopenharmony_ci send_wr.opcode = IB_WR_SEND; 72962306a36Sopenharmony_ci send_wr.send_flags = IB_SEND_SIGNALED; 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci log_rdma_send(INFO, "sge addr=0x%llx length=%u lkey=0x%x\n", 73262306a36Sopenharmony_ci request->sge[0].addr, 73362306a36Sopenharmony_ci request->sge[0].length, request->sge[0].lkey); 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci atomic_inc(&info->send_pending); 73662306a36Sopenharmony_ci rc = ib_post_send(info->id->qp, &send_wr, NULL); 73762306a36Sopenharmony_ci if (!rc) 73862306a36Sopenharmony_ci return 0; 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci /* if we reach here, post send failed */ 74162306a36Sopenharmony_ci log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); 74262306a36Sopenharmony_ci atomic_dec(&info->send_pending); 74362306a36Sopenharmony_ci ib_dma_unmap_single(info->id->device, request->sge[0].addr, 74462306a36Sopenharmony_ci request->sge[0].length, DMA_TO_DEVICE); 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci smbd_disconnect_rdma_connection(info); 74762306a36Sopenharmony_ci 74862306a36Sopenharmony_cidma_mapping_failed: 74962306a36Sopenharmony_ci mempool_free(request, info->request_mempool); 75062306a36Sopenharmony_ci return rc; 75162306a36Sopenharmony_ci} 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci/* 75462306a36Sopenharmony_ci * Extend the credits to remote peer 75562306a36Sopenharmony_ci * This implements [MS-SMBD] 3.1.5.9 75662306a36Sopenharmony_ci * The idea is that we should extend credits to remote peer as quickly as 75762306a36Sopenharmony_ci * it's allowed, to maintain data flow. We allocate as much receive 75862306a36Sopenharmony_ci * buffer as possible, and extend the receive credits to remote peer 75962306a36Sopenharmony_ci * return value: the new credtis being granted. 76062306a36Sopenharmony_ci */ 76162306a36Sopenharmony_cistatic int manage_credits_prior_sending(struct smbd_connection *info) 76262306a36Sopenharmony_ci{ 76362306a36Sopenharmony_ci int new_credits; 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_ci spin_lock(&info->lock_new_credits_offered); 76662306a36Sopenharmony_ci new_credits = info->new_credits_offered; 76762306a36Sopenharmony_ci info->new_credits_offered = 0; 76862306a36Sopenharmony_ci spin_unlock(&info->lock_new_credits_offered); 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci return new_credits; 77162306a36Sopenharmony_ci} 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci/* 77462306a36Sopenharmony_ci * Check if we need to send a KEEP_ALIVE message 77562306a36Sopenharmony_ci * The idle connection timer triggers a KEEP_ALIVE message when expires 77662306a36Sopenharmony_ci * SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send 77762306a36Sopenharmony_ci * back a response. 77862306a36Sopenharmony_ci * return value: 77962306a36Sopenharmony_ci * 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set 78062306a36Sopenharmony_ci * 0: otherwise 78162306a36Sopenharmony_ci */ 78262306a36Sopenharmony_cistatic int manage_keep_alive_before_sending(struct smbd_connection *info) 78362306a36Sopenharmony_ci{ 78462306a36Sopenharmony_ci if (info->keep_alive_requested == KEEP_ALIVE_PENDING) { 78562306a36Sopenharmony_ci info->keep_alive_requested = KEEP_ALIVE_SENT; 78662306a36Sopenharmony_ci return 1; 78762306a36Sopenharmony_ci } 78862306a36Sopenharmony_ci return 0; 78962306a36Sopenharmony_ci} 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_ci/* Post the send request */ 79262306a36Sopenharmony_cistatic int smbd_post_send(struct smbd_connection *info, 79362306a36Sopenharmony_ci struct smbd_request *request) 79462306a36Sopenharmony_ci{ 79562306a36Sopenharmony_ci struct ib_send_wr send_wr; 79662306a36Sopenharmony_ci int rc, i; 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci for (i = 0; i < request->num_sge; i++) { 79962306a36Sopenharmony_ci log_rdma_send(INFO, 80062306a36Sopenharmony_ci "rdma_request sge[%d] addr=0x%llx length=%u\n", 80162306a36Sopenharmony_ci i, request->sge[i].addr, request->sge[i].length); 80262306a36Sopenharmony_ci ib_dma_sync_single_for_device( 80362306a36Sopenharmony_ci info->id->device, 80462306a36Sopenharmony_ci request->sge[i].addr, 80562306a36Sopenharmony_ci request->sge[i].length, 80662306a36Sopenharmony_ci DMA_TO_DEVICE); 80762306a36Sopenharmony_ci } 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci request->cqe.done = send_done; 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci send_wr.next = NULL; 81262306a36Sopenharmony_ci send_wr.wr_cqe = &request->cqe; 81362306a36Sopenharmony_ci send_wr.sg_list = request->sge; 81462306a36Sopenharmony_ci send_wr.num_sge = request->num_sge; 81562306a36Sopenharmony_ci send_wr.opcode = IB_WR_SEND; 81662306a36Sopenharmony_ci send_wr.send_flags = IB_SEND_SIGNALED; 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci rc = ib_post_send(info->id->qp, &send_wr, NULL); 81962306a36Sopenharmony_ci if (rc) { 82062306a36Sopenharmony_ci log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); 82162306a36Sopenharmony_ci smbd_disconnect_rdma_connection(info); 82262306a36Sopenharmony_ci rc = -EAGAIN; 82362306a36Sopenharmony_ci } else 82462306a36Sopenharmony_ci /* Reset timer for idle connection after packet is sent */ 82562306a36Sopenharmony_ci mod_delayed_work(info->workqueue, &info->idle_timer_work, 82662306a36Sopenharmony_ci info->keep_alive_interval*HZ); 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci return rc; 82962306a36Sopenharmony_ci} 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_cistatic int smbd_post_send_iter(struct smbd_connection *info, 83262306a36Sopenharmony_ci struct iov_iter *iter, 83362306a36Sopenharmony_ci int *_remaining_data_length) 83462306a36Sopenharmony_ci{ 83562306a36Sopenharmony_ci int i, rc; 83662306a36Sopenharmony_ci int header_length; 83762306a36Sopenharmony_ci int data_length; 83862306a36Sopenharmony_ci struct smbd_request *request; 83962306a36Sopenharmony_ci struct smbd_data_transfer *packet; 84062306a36Sopenharmony_ci int new_credits = 0; 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ciwait_credit: 84362306a36Sopenharmony_ci /* Wait for send credits. A SMBD packet needs one credit */ 84462306a36Sopenharmony_ci rc = wait_event_interruptible(info->wait_send_queue, 84562306a36Sopenharmony_ci atomic_read(&info->send_credits) > 0 || 84662306a36Sopenharmony_ci info->transport_status != SMBD_CONNECTED); 84762306a36Sopenharmony_ci if (rc) 84862306a36Sopenharmony_ci goto err_wait_credit; 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci if (info->transport_status != SMBD_CONNECTED) { 85162306a36Sopenharmony_ci log_outgoing(ERR, "disconnected not sending on wait_credit\n"); 85262306a36Sopenharmony_ci rc = -EAGAIN; 85362306a36Sopenharmony_ci goto err_wait_credit; 85462306a36Sopenharmony_ci } 85562306a36Sopenharmony_ci if (unlikely(atomic_dec_return(&info->send_credits) < 0)) { 85662306a36Sopenharmony_ci atomic_inc(&info->send_credits); 85762306a36Sopenharmony_ci goto wait_credit; 85862306a36Sopenharmony_ci } 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ciwait_send_queue: 86162306a36Sopenharmony_ci wait_event(info->wait_post_send, 86262306a36Sopenharmony_ci atomic_read(&info->send_pending) < info->send_credit_target || 86362306a36Sopenharmony_ci info->transport_status != SMBD_CONNECTED); 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci if (info->transport_status != SMBD_CONNECTED) { 86662306a36Sopenharmony_ci log_outgoing(ERR, "disconnected not sending on wait_send_queue\n"); 86762306a36Sopenharmony_ci rc = -EAGAIN; 86862306a36Sopenharmony_ci goto err_wait_send_queue; 86962306a36Sopenharmony_ci } 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci if (unlikely(atomic_inc_return(&info->send_pending) > 87262306a36Sopenharmony_ci info->send_credit_target)) { 87362306a36Sopenharmony_ci atomic_dec(&info->send_pending); 87462306a36Sopenharmony_ci goto wait_send_queue; 87562306a36Sopenharmony_ci } 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci request = mempool_alloc(info->request_mempool, GFP_KERNEL); 87862306a36Sopenharmony_ci if (!request) { 87962306a36Sopenharmony_ci rc = -ENOMEM; 88062306a36Sopenharmony_ci goto err_alloc; 88162306a36Sopenharmony_ci } 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci request->info = info; 88462306a36Sopenharmony_ci memset(request->sge, 0, sizeof(request->sge)); 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci /* Fill in the data payload to find out how much data we can add */ 88762306a36Sopenharmony_ci if (iter) { 88862306a36Sopenharmony_ci struct smb_extract_to_rdma extract = { 88962306a36Sopenharmony_ci .nr_sge = 1, 89062306a36Sopenharmony_ci .max_sge = SMBDIRECT_MAX_SEND_SGE, 89162306a36Sopenharmony_ci .sge = request->sge, 89262306a36Sopenharmony_ci .device = info->id->device, 89362306a36Sopenharmony_ci .local_dma_lkey = info->pd->local_dma_lkey, 89462306a36Sopenharmony_ci .direction = DMA_TO_DEVICE, 89562306a36Sopenharmony_ci }; 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length, 89862306a36Sopenharmony_ci &extract); 89962306a36Sopenharmony_ci if (rc < 0) 90062306a36Sopenharmony_ci goto err_dma; 90162306a36Sopenharmony_ci data_length = rc; 90262306a36Sopenharmony_ci request->num_sge = extract.nr_sge; 90362306a36Sopenharmony_ci *_remaining_data_length -= data_length; 90462306a36Sopenharmony_ci } else { 90562306a36Sopenharmony_ci data_length = 0; 90662306a36Sopenharmony_ci request->num_sge = 1; 90762306a36Sopenharmony_ci } 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci /* Fill in the packet header */ 91062306a36Sopenharmony_ci packet = smbd_request_payload(request); 91162306a36Sopenharmony_ci packet->credits_requested = cpu_to_le16(info->send_credit_target); 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci new_credits = manage_credits_prior_sending(info); 91462306a36Sopenharmony_ci atomic_add(new_credits, &info->receive_credits); 91562306a36Sopenharmony_ci packet->credits_granted = cpu_to_le16(new_credits); 91662306a36Sopenharmony_ci 91762306a36Sopenharmony_ci info->send_immediate = false; 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci packet->flags = 0; 92062306a36Sopenharmony_ci if (manage_keep_alive_before_sending(info)) 92162306a36Sopenharmony_ci packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED); 92262306a36Sopenharmony_ci 92362306a36Sopenharmony_ci packet->reserved = 0; 92462306a36Sopenharmony_ci if (!data_length) 92562306a36Sopenharmony_ci packet->data_offset = 0; 92662306a36Sopenharmony_ci else 92762306a36Sopenharmony_ci packet->data_offset = cpu_to_le32(24); 92862306a36Sopenharmony_ci packet->data_length = cpu_to_le32(data_length); 92962306a36Sopenharmony_ci packet->remaining_data_length = cpu_to_le32(*_remaining_data_length); 93062306a36Sopenharmony_ci packet->padding = 0; 93162306a36Sopenharmony_ci 93262306a36Sopenharmony_ci log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 93362306a36Sopenharmony_ci le16_to_cpu(packet->credits_requested), 93462306a36Sopenharmony_ci le16_to_cpu(packet->credits_granted), 93562306a36Sopenharmony_ci le32_to_cpu(packet->data_offset), 93662306a36Sopenharmony_ci le32_to_cpu(packet->data_length), 93762306a36Sopenharmony_ci le32_to_cpu(packet->remaining_data_length)); 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci /* Map the packet to DMA */ 94062306a36Sopenharmony_ci header_length = sizeof(struct smbd_data_transfer); 94162306a36Sopenharmony_ci /* If this is a packet without payload, don't send padding */ 94262306a36Sopenharmony_ci if (!data_length) 94362306a36Sopenharmony_ci header_length = offsetof(struct smbd_data_transfer, padding); 94462306a36Sopenharmony_ci 94562306a36Sopenharmony_ci request->sge[0].addr = ib_dma_map_single(info->id->device, 94662306a36Sopenharmony_ci (void *)packet, 94762306a36Sopenharmony_ci header_length, 94862306a36Sopenharmony_ci DMA_TO_DEVICE); 94962306a36Sopenharmony_ci if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { 95062306a36Sopenharmony_ci rc = -EIO; 95162306a36Sopenharmony_ci request->sge[0].addr = 0; 95262306a36Sopenharmony_ci goto err_dma; 95362306a36Sopenharmony_ci } 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci request->sge[0].length = header_length; 95662306a36Sopenharmony_ci request->sge[0].lkey = info->pd->local_dma_lkey; 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci rc = smbd_post_send(info, request); 95962306a36Sopenharmony_ci if (!rc) 96062306a36Sopenharmony_ci return 0; 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_cierr_dma: 96362306a36Sopenharmony_ci for (i = 0; i < request->num_sge; i++) 96462306a36Sopenharmony_ci if (request->sge[i].addr) 96562306a36Sopenharmony_ci ib_dma_unmap_single(info->id->device, 96662306a36Sopenharmony_ci request->sge[i].addr, 96762306a36Sopenharmony_ci request->sge[i].length, 96862306a36Sopenharmony_ci DMA_TO_DEVICE); 96962306a36Sopenharmony_ci mempool_free(request, info->request_mempool); 97062306a36Sopenharmony_ci 97162306a36Sopenharmony_ci /* roll back receive credits and credits to be offered */ 97262306a36Sopenharmony_ci spin_lock(&info->lock_new_credits_offered); 97362306a36Sopenharmony_ci info->new_credits_offered += new_credits; 97462306a36Sopenharmony_ci spin_unlock(&info->lock_new_credits_offered); 97562306a36Sopenharmony_ci atomic_sub(new_credits, &info->receive_credits); 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_cierr_alloc: 97862306a36Sopenharmony_ci if (atomic_dec_and_test(&info->send_pending)) 97962306a36Sopenharmony_ci wake_up(&info->wait_send_pending); 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_cierr_wait_send_queue: 98262306a36Sopenharmony_ci /* roll back send credits and pending */ 98362306a36Sopenharmony_ci atomic_inc(&info->send_credits); 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_cierr_wait_credit: 98662306a36Sopenharmony_ci return rc; 98762306a36Sopenharmony_ci} 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci/* 99062306a36Sopenharmony_ci * Send an empty message 99162306a36Sopenharmony_ci * Empty message is used to extend credits to peer to for keep live 99262306a36Sopenharmony_ci * while there is no upper layer payload to send at the time 99362306a36Sopenharmony_ci */ 99462306a36Sopenharmony_cistatic int smbd_post_send_empty(struct smbd_connection *info) 99562306a36Sopenharmony_ci{ 99662306a36Sopenharmony_ci int remaining_data_length = 0; 99762306a36Sopenharmony_ci 99862306a36Sopenharmony_ci info->count_send_empty++; 99962306a36Sopenharmony_ci return smbd_post_send_iter(info, NULL, &remaining_data_length); 100062306a36Sopenharmony_ci} 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci/* 100362306a36Sopenharmony_ci * Post a receive request to the transport 100462306a36Sopenharmony_ci * The remote peer can only send data when a receive request is posted 100562306a36Sopenharmony_ci * The interaction is controlled by send/receive credit system 100662306a36Sopenharmony_ci */ 100762306a36Sopenharmony_cistatic int smbd_post_recv( 100862306a36Sopenharmony_ci struct smbd_connection *info, struct smbd_response *response) 100962306a36Sopenharmony_ci{ 101062306a36Sopenharmony_ci struct ib_recv_wr recv_wr; 101162306a36Sopenharmony_ci int rc = -EIO; 101262306a36Sopenharmony_ci 101362306a36Sopenharmony_ci response->sge.addr = ib_dma_map_single( 101462306a36Sopenharmony_ci info->id->device, response->packet, 101562306a36Sopenharmony_ci info->max_receive_size, DMA_FROM_DEVICE); 101662306a36Sopenharmony_ci if (ib_dma_mapping_error(info->id->device, response->sge.addr)) 101762306a36Sopenharmony_ci return rc; 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci response->sge.length = info->max_receive_size; 102062306a36Sopenharmony_ci response->sge.lkey = info->pd->local_dma_lkey; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci response->cqe.done = recv_done; 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci recv_wr.wr_cqe = &response->cqe; 102562306a36Sopenharmony_ci recv_wr.next = NULL; 102662306a36Sopenharmony_ci recv_wr.sg_list = &response->sge; 102762306a36Sopenharmony_ci recv_wr.num_sge = 1; 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci rc = ib_post_recv(info->id->qp, &recv_wr, NULL); 103062306a36Sopenharmony_ci if (rc) { 103162306a36Sopenharmony_ci ib_dma_unmap_single(info->id->device, response->sge.addr, 103262306a36Sopenharmony_ci response->sge.length, DMA_FROM_DEVICE); 103362306a36Sopenharmony_ci smbd_disconnect_rdma_connection(info); 103462306a36Sopenharmony_ci log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); 103562306a36Sopenharmony_ci } 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_ci return rc; 103862306a36Sopenharmony_ci} 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci/* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ 104162306a36Sopenharmony_cistatic int smbd_negotiate(struct smbd_connection *info) 104262306a36Sopenharmony_ci{ 104362306a36Sopenharmony_ci int rc; 104462306a36Sopenharmony_ci struct smbd_response *response = get_receive_buffer(info); 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ci response->type = SMBD_NEGOTIATE_RESP; 104762306a36Sopenharmony_ci rc = smbd_post_recv(info, response); 104862306a36Sopenharmony_ci log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n", 104962306a36Sopenharmony_ci rc, response->sge.addr, 105062306a36Sopenharmony_ci response->sge.length, response->sge.lkey); 105162306a36Sopenharmony_ci if (rc) 105262306a36Sopenharmony_ci return rc; 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci init_completion(&info->negotiate_completion); 105562306a36Sopenharmony_ci info->negotiate_done = false; 105662306a36Sopenharmony_ci rc = smbd_post_send_negotiate_req(info); 105762306a36Sopenharmony_ci if (rc) 105862306a36Sopenharmony_ci return rc; 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci rc = wait_for_completion_interruptible_timeout( 106162306a36Sopenharmony_ci &info->negotiate_completion, SMBD_NEGOTIATE_TIMEOUT * HZ); 106262306a36Sopenharmony_ci log_rdma_event(INFO, "wait_for_completion_timeout rc=%d\n", rc); 106362306a36Sopenharmony_ci 106462306a36Sopenharmony_ci if (info->negotiate_done) 106562306a36Sopenharmony_ci return 0; 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci if (rc == 0) 106862306a36Sopenharmony_ci rc = -ETIMEDOUT; 106962306a36Sopenharmony_ci else if (rc == -ERESTARTSYS) 107062306a36Sopenharmony_ci rc = -EINTR; 107162306a36Sopenharmony_ci else 107262306a36Sopenharmony_ci rc = -ENOTCONN; 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ci return rc; 107562306a36Sopenharmony_ci} 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_cistatic void put_empty_packet( 107862306a36Sopenharmony_ci struct smbd_connection *info, struct smbd_response *response) 107962306a36Sopenharmony_ci{ 108062306a36Sopenharmony_ci spin_lock(&info->empty_packet_queue_lock); 108162306a36Sopenharmony_ci list_add_tail(&response->list, &info->empty_packet_queue); 108262306a36Sopenharmony_ci info->count_empty_packet_queue++; 108362306a36Sopenharmony_ci spin_unlock(&info->empty_packet_queue_lock); 108462306a36Sopenharmony_ci 108562306a36Sopenharmony_ci queue_work(info->workqueue, &info->post_send_credits_work); 108662306a36Sopenharmony_ci} 108762306a36Sopenharmony_ci 108862306a36Sopenharmony_ci/* 108962306a36Sopenharmony_ci * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1 109062306a36Sopenharmony_ci * This is a queue for reassembling upper layer payload and present to upper 109162306a36Sopenharmony_ci * layer. All the inncoming payload go to the reassembly queue, regardless of 109262306a36Sopenharmony_ci * if reassembly is required. The uuper layer code reads from the queue for all 109362306a36Sopenharmony_ci * incoming payloads. 109462306a36Sopenharmony_ci * Put a received packet to the reassembly queue 109562306a36Sopenharmony_ci * response: the packet received 109662306a36Sopenharmony_ci * data_length: the size of payload in this packet 109762306a36Sopenharmony_ci */ 109862306a36Sopenharmony_cistatic void enqueue_reassembly( 109962306a36Sopenharmony_ci struct smbd_connection *info, 110062306a36Sopenharmony_ci struct smbd_response *response, 110162306a36Sopenharmony_ci int data_length) 110262306a36Sopenharmony_ci{ 110362306a36Sopenharmony_ci spin_lock(&info->reassembly_queue_lock); 110462306a36Sopenharmony_ci list_add_tail(&response->list, &info->reassembly_queue); 110562306a36Sopenharmony_ci info->reassembly_queue_length++; 110662306a36Sopenharmony_ci /* 110762306a36Sopenharmony_ci * Make sure reassembly_data_length is updated after list and 110862306a36Sopenharmony_ci * reassembly_queue_length are updated. On the dequeue side 110962306a36Sopenharmony_ci * reassembly_data_length is checked without a lock to determine 111062306a36Sopenharmony_ci * if reassembly_queue_length and list is up to date 111162306a36Sopenharmony_ci */ 111262306a36Sopenharmony_ci virt_wmb(); 111362306a36Sopenharmony_ci info->reassembly_data_length += data_length; 111462306a36Sopenharmony_ci spin_unlock(&info->reassembly_queue_lock); 111562306a36Sopenharmony_ci info->count_reassembly_queue++; 111662306a36Sopenharmony_ci info->count_enqueue_reassembly_queue++; 111762306a36Sopenharmony_ci} 111862306a36Sopenharmony_ci 111962306a36Sopenharmony_ci/* 112062306a36Sopenharmony_ci * Get the first entry at the front of reassembly queue 112162306a36Sopenharmony_ci * Caller is responsible for locking 112262306a36Sopenharmony_ci * return value: the first entry if any, NULL if queue is empty 112362306a36Sopenharmony_ci */ 112462306a36Sopenharmony_cistatic struct smbd_response *_get_first_reassembly(struct smbd_connection *info) 112562306a36Sopenharmony_ci{ 112662306a36Sopenharmony_ci struct smbd_response *ret = NULL; 112762306a36Sopenharmony_ci 112862306a36Sopenharmony_ci if (!list_empty(&info->reassembly_queue)) { 112962306a36Sopenharmony_ci ret = list_first_entry( 113062306a36Sopenharmony_ci &info->reassembly_queue, 113162306a36Sopenharmony_ci struct smbd_response, list); 113262306a36Sopenharmony_ci } 113362306a36Sopenharmony_ci return ret; 113462306a36Sopenharmony_ci} 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_cistatic struct smbd_response *get_empty_queue_buffer( 113762306a36Sopenharmony_ci struct smbd_connection *info) 113862306a36Sopenharmony_ci{ 113962306a36Sopenharmony_ci struct smbd_response *ret = NULL; 114062306a36Sopenharmony_ci unsigned long flags; 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci spin_lock_irqsave(&info->empty_packet_queue_lock, flags); 114362306a36Sopenharmony_ci if (!list_empty(&info->empty_packet_queue)) { 114462306a36Sopenharmony_ci ret = list_first_entry( 114562306a36Sopenharmony_ci &info->empty_packet_queue, 114662306a36Sopenharmony_ci struct smbd_response, list); 114762306a36Sopenharmony_ci list_del(&ret->list); 114862306a36Sopenharmony_ci info->count_empty_packet_queue--; 114962306a36Sopenharmony_ci } 115062306a36Sopenharmony_ci spin_unlock_irqrestore(&info->empty_packet_queue_lock, flags); 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci return ret; 115362306a36Sopenharmony_ci} 115462306a36Sopenharmony_ci 115562306a36Sopenharmony_ci/* 115662306a36Sopenharmony_ci * Get a receive buffer 115762306a36Sopenharmony_ci * For each remote send, we need to post a receive. The receive buffers are 115862306a36Sopenharmony_ci * pre-allocated in advance. 115962306a36Sopenharmony_ci * return value: the receive buffer, NULL if none is available 116062306a36Sopenharmony_ci */ 116162306a36Sopenharmony_cistatic struct smbd_response *get_receive_buffer(struct smbd_connection *info) 116262306a36Sopenharmony_ci{ 116362306a36Sopenharmony_ci struct smbd_response *ret = NULL; 116462306a36Sopenharmony_ci unsigned long flags; 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci spin_lock_irqsave(&info->receive_queue_lock, flags); 116762306a36Sopenharmony_ci if (!list_empty(&info->receive_queue)) { 116862306a36Sopenharmony_ci ret = list_first_entry( 116962306a36Sopenharmony_ci &info->receive_queue, 117062306a36Sopenharmony_ci struct smbd_response, list); 117162306a36Sopenharmony_ci list_del(&ret->list); 117262306a36Sopenharmony_ci info->count_receive_queue--; 117362306a36Sopenharmony_ci info->count_get_receive_buffer++; 117462306a36Sopenharmony_ci } 117562306a36Sopenharmony_ci spin_unlock_irqrestore(&info->receive_queue_lock, flags); 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_ci return ret; 117862306a36Sopenharmony_ci} 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_ci/* 118162306a36Sopenharmony_ci * Return a receive buffer 118262306a36Sopenharmony_ci * Upon returning of a receive buffer, we can post new receive and extend 118362306a36Sopenharmony_ci * more receive credits to remote peer. This is done immediately after a 118462306a36Sopenharmony_ci * receive buffer is returned. 118562306a36Sopenharmony_ci */ 118662306a36Sopenharmony_cistatic void put_receive_buffer( 118762306a36Sopenharmony_ci struct smbd_connection *info, struct smbd_response *response) 118862306a36Sopenharmony_ci{ 118962306a36Sopenharmony_ci unsigned long flags; 119062306a36Sopenharmony_ci 119162306a36Sopenharmony_ci ib_dma_unmap_single(info->id->device, response->sge.addr, 119262306a36Sopenharmony_ci response->sge.length, DMA_FROM_DEVICE); 119362306a36Sopenharmony_ci 119462306a36Sopenharmony_ci spin_lock_irqsave(&info->receive_queue_lock, flags); 119562306a36Sopenharmony_ci list_add_tail(&response->list, &info->receive_queue); 119662306a36Sopenharmony_ci info->count_receive_queue++; 119762306a36Sopenharmony_ci info->count_put_receive_buffer++; 119862306a36Sopenharmony_ci spin_unlock_irqrestore(&info->receive_queue_lock, flags); 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ci queue_work(info->workqueue, &info->post_send_credits_work); 120162306a36Sopenharmony_ci} 120262306a36Sopenharmony_ci 120362306a36Sopenharmony_ci/* Preallocate all receive buffer on transport establishment */ 120462306a36Sopenharmony_cistatic int allocate_receive_buffers(struct smbd_connection *info, int num_buf) 120562306a36Sopenharmony_ci{ 120662306a36Sopenharmony_ci int i; 120762306a36Sopenharmony_ci struct smbd_response *response; 120862306a36Sopenharmony_ci 120962306a36Sopenharmony_ci INIT_LIST_HEAD(&info->reassembly_queue); 121062306a36Sopenharmony_ci spin_lock_init(&info->reassembly_queue_lock); 121162306a36Sopenharmony_ci info->reassembly_data_length = 0; 121262306a36Sopenharmony_ci info->reassembly_queue_length = 0; 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci INIT_LIST_HEAD(&info->receive_queue); 121562306a36Sopenharmony_ci spin_lock_init(&info->receive_queue_lock); 121662306a36Sopenharmony_ci info->count_receive_queue = 0; 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci INIT_LIST_HEAD(&info->empty_packet_queue); 121962306a36Sopenharmony_ci spin_lock_init(&info->empty_packet_queue_lock); 122062306a36Sopenharmony_ci info->count_empty_packet_queue = 0; 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci init_waitqueue_head(&info->wait_receive_queues); 122362306a36Sopenharmony_ci 122462306a36Sopenharmony_ci for (i = 0; i < num_buf; i++) { 122562306a36Sopenharmony_ci response = mempool_alloc(info->response_mempool, GFP_KERNEL); 122662306a36Sopenharmony_ci if (!response) 122762306a36Sopenharmony_ci goto allocate_failed; 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_ci response->info = info; 123062306a36Sopenharmony_ci list_add_tail(&response->list, &info->receive_queue); 123162306a36Sopenharmony_ci info->count_receive_queue++; 123262306a36Sopenharmony_ci } 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci return 0; 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_ciallocate_failed: 123762306a36Sopenharmony_ci while (!list_empty(&info->receive_queue)) { 123862306a36Sopenharmony_ci response = list_first_entry( 123962306a36Sopenharmony_ci &info->receive_queue, 124062306a36Sopenharmony_ci struct smbd_response, list); 124162306a36Sopenharmony_ci list_del(&response->list); 124262306a36Sopenharmony_ci info->count_receive_queue--; 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci mempool_free(response, info->response_mempool); 124562306a36Sopenharmony_ci } 124662306a36Sopenharmony_ci return -ENOMEM; 124762306a36Sopenharmony_ci} 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_cistatic void destroy_receive_buffers(struct smbd_connection *info) 125062306a36Sopenharmony_ci{ 125162306a36Sopenharmony_ci struct smbd_response *response; 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_ci while ((response = get_receive_buffer(info))) 125462306a36Sopenharmony_ci mempool_free(response, info->response_mempool); 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci while ((response = get_empty_queue_buffer(info))) 125762306a36Sopenharmony_ci mempool_free(response, info->response_mempool); 125862306a36Sopenharmony_ci} 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci/* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ 126162306a36Sopenharmony_cistatic void idle_connection_timer(struct work_struct *work) 126262306a36Sopenharmony_ci{ 126362306a36Sopenharmony_ci struct smbd_connection *info = container_of( 126462306a36Sopenharmony_ci work, struct smbd_connection, 126562306a36Sopenharmony_ci idle_timer_work.work); 126662306a36Sopenharmony_ci 126762306a36Sopenharmony_ci if (info->keep_alive_requested != KEEP_ALIVE_NONE) { 126862306a36Sopenharmony_ci log_keep_alive(ERR, 126962306a36Sopenharmony_ci "error status info->keep_alive_requested=%d\n", 127062306a36Sopenharmony_ci info->keep_alive_requested); 127162306a36Sopenharmony_ci smbd_disconnect_rdma_connection(info); 127262306a36Sopenharmony_ci return; 127362306a36Sopenharmony_ci } 127462306a36Sopenharmony_ci 127562306a36Sopenharmony_ci log_keep_alive(INFO, "about to send an empty idle message\n"); 127662306a36Sopenharmony_ci smbd_post_send_empty(info); 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_ci /* Setup the next idle timeout work */ 127962306a36Sopenharmony_ci queue_delayed_work(info->workqueue, &info->idle_timer_work, 128062306a36Sopenharmony_ci info->keep_alive_interval*HZ); 128162306a36Sopenharmony_ci} 128262306a36Sopenharmony_ci 128362306a36Sopenharmony_ci/* 128462306a36Sopenharmony_ci * Destroy the transport and related RDMA and memory resources 128562306a36Sopenharmony_ci * Need to go through all the pending counters and make sure on one is using 128662306a36Sopenharmony_ci * the transport while it is destroyed 128762306a36Sopenharmony_ci */ 128862306a36Sopenharmony_civoid smbd_destroy(struct TCP_Server_Info *server) 128962306a36Sopenharmony_ci{ 129062306a36Sopenharmony_ci struct smbd_connection *info = server->smbd_conn; 129162306a36Sopenharmony_ci struct smbd_response *response; 129262306a36Sopenharmony_ci unsigned long flags; 129362306a36Sopenharmony_ci 129462306a36Sopenharmony_ci if (!info) { 129562306a36Sopenharmony_ci log_rdma_event(INFO, "rdma session already destroyed\n"); 129662306a36Sopenharmony_ci return; 129762306a36Sopenharmony_ci } 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_ci log_rdma_event(INFO, "destroying rdma session\n"); 130062306a36Sopenharmony_ci if (info->transport_status != SMBD_DISCONNECTED) { 130162306a36Sopenharmony_ci rdma_disconnect(server->smbd_conn->id); 130262306a36Sopenharmony_ci log_rdma_event(INFO, "wait for transport being disconnected\n"); 130362306a36Sopenharmony_ci wait_event_interruptible( 130462306a36Sopenharmony_ci info->disconn_wait, 130562306a36Sopenharmony_ci info->transport_status == SMBD_DISCONNECTED); 130662306a36Sopenharmony_ci } 130762306a36Sopenharmony_ci 130862306a36Sopenharmony_ci log_rdma_event(INFO, "destroying qp\n"); 130962306a36Sopenharmony_ci ib_drain_qp(info->id->qp); 131062306a36Sopenharmony_ci rdma_destroy_qp(info->id); 131162306a36Sopenharmony_ci 131262306a36Sopenharmony_ci log_rdma_event(INFO, "cancelling idle timer\n"); 131362306a36Sopenharmony_ci cancel_delayed_work_sync(&info->idle_timer_work); 131462306a36Sopenharmony_ci 131562306a36Sopenharmony_ci log_rdma_event(INFO, "wait for all send posted to IB to finish\n"); 131662306a36Sopenharmony_ci wait_event(info->wait_send_pending, 131762306a36Sopenharmony_ci atomic_read(&info->send_pending) == 0); 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci /* It's not possible for upper layer to get to reassembly */ 132062306a36Sopenharmony_ci log_rdma_event(INFO, "drain the reassembly queue\n"); 132162306a36Sopenharmony_ci do { 132262306a36Sopenharmony_ci spin_lock_irqsave(&info->reassembly_queue_lock, flags); 132362306a36Sopenharmony_ci response = _get_first_reassembly(info); 132462306a36Sopenharmony_ci if (response) { 132562306a36Sopenharmony_ci list_del(&response->list); 132662306a36Sopenharmony_ci spin_unlock_irqrestore( 132762306a36Sopenharmony_ci &info->reassembly_queue_lock, flags); 132862306a36Sopenharmony_ci put_receive_buffer(info, response); 132962306a36Sopenharmony_ci } else 133062306a36Sopenharmony_ci spin_unlock_irqrestore( 133162306a36Sopenharmony_ci &info->reassembly_queue_lock, flags); 133262306a36Sopenharmony_ci } while (response); 133362306a36Sopenharmony_ci info->reassembly_data_length = 0; 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci log_rdma_event(INFO, "free receive buffers\n"); 133662306a36Sopenharmony_ci wait_event(info->wait_receive_queues, 133762306a36Sopenharmony_ci info->count_receive_queue + info->count_empty_packet_queue 133862306a36Sopenharmony_ci == info->receive_credit_max); 133962306a36Sopenharmony_ci destroy_receive_buffers(info); 134062306a36Sopenharmony_ci 134162306a36Sopenharmony_ci /* 134262306a36Sopenharmony_ci * For performance reasons, memory registration and deregistration 134362306a36Sopenharmony_ci * are not locked by srv_mutex. It is possible some processes are 134462306a36Sopenharmony_ci * blocked on transport srv_mutex while holding memory registration. 134562306a36Sopenharmony_ci * Release the transport srv_mutex to allow them to hit the failure 134662306a36Sopenharmony_ci * path when sending data, and then release memory registartions. 134762306a36Sopenharmony_ci */ 134862306a36Sopenharmony_ci log_rdma_event(INFO, "freeing mr list\n"); 134962306a36Sopenharmony_ci wake_up_interruptible_all(&info->wait_mr); 135062306a36Sopenharmony_ci while (atomic_read(&info->mr_used_count)) { 135162306a36Sopenharmony_ci cifs_server_unlock(server); 135262306a36Sopenharmony_ci msleep(1000); 135362306a36Sopenharmony_ci cifs_server_lock(server); 135462306a36Sopenharmony_ci } 135562306a36Sopenharmony_ci destroy_mr_list(info); 135662306a36Sopenharmony_ci 135762306a36Sopenharmony_ci ib_free_cq(info->send_cq); 135862306a36Sopenharmony_ci ib_free_cq(info->recv_cq); 135962306a36Sopenharmony_ci ib_dealloc_pd(info->pd); 136062306a36Sopenharmony_ci rdma_destroy_id(info->id); 136162306a36Sopenharmony_ci 136262306a36Sopenharmony_ci /* free mempools */ 136362306a36Sopenharmony_ci mempool_destroy(info->request_mempool); 136462306a36Sopenharmony_ci kmem_cache_destroy(info->request_cache); 136562306a36Sopenharmony_ci 136662306a36Sopenharmony_ci mempool_destroy(info->response_mempool); 136762306a36Sopenharmony_ci kmem_cache_destroy(info->response_cache); 136862306a36Sopenharmony_ci 136962306a36Sopenharmony_ci info->transport_status = SMBD_DESTROYED; 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci destroy_workqueue(info->workqueue); 137262306a36Sopenharmony_ci log_rdma_event(INFO, "rdma session destroyed\n"); 137362306a36Sopenharmony_ci kfree(info); 137462306a36Sopenharmony_ci server->smbd_conn = NULL; 137562306a36Sopenharmony_ci} 137662306a36Sopenharmony_ci 137762306a36Sopenharmony_ci/* 137862306a36Sopenharmony_ci * Reconnect this SMBD connection, called from upper layer 137962306a36Sopenharmony_ci * return value: 0 on success, or actual error code 138062306a36Sopenharmony_ci */ 138162306a36Sopenharmony_ciint smbd_reconnect(struct TCP_Server_Info *server) 138262306a36Sopenharmony_ci{ 138362306a36Sopenharmony_ci log_rdma_event(INFO, "reconnecting rdma session\n"); 138462306a36Sopenharmony_ci 138562306a36Sopenharmony_ci if (!server->smbd_conn) { 138662306a36Sopenharmony_ci log_rdma_event(INFO, "rdma session already destroyed\n"); 138762306a36Sopenharmony_ci goto create_conn; 138862306a36Sopenharmony_ci } 138962306a36Sopenharmony_ci 139062306a36Sopenharmony_ci /* 139162306a36Sopenharmony_ci * This is possible if transport is disconnected and we haven't received 139262306a36Sopenharmony_ci * notification from RDMA, but upper layer has detected timeout 139362306a36Sopenharmony_ci */ 139462306a36Sopenharmony_ci if (server->smbd_conn->transport_status == SMBD_CONNECTED) { 139562306a36Sopenharmony_ci log_rdma_event(INFO, "disconnecting transport\n"); 139662306a36Sopenharmony_ci smbd_destroy(server); 139762306a36Sopenharmony_ci } 139862306a36Sopenharmony_ci 139962306a36Sopenharmony_cicreate_conn: 140062306a36Sopenharmony_ci log_rdma_event(INFO, "creating rdma session\n"); 140162306a36Sopenharmony_ci server->smbd_conn = smbd_get_connection( 140262306a36Sopenharmony_ci server, (struct sockaddr *) &server->dstaddr); 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_ci if (server->smbd_conn) { 140562306a36Sopenharmony_ci cifs_dbg(VFS, "RDMA transport re-established\n"); 140662306a36Sopenharmony_ci trace_smb3_smbd_connect_done(server->hostname, server->conn_id, &server->dstaddr); 140762306a36Sopenharmony_ci return 0; 140862306a36Sopenharmony_ci } 140962306a36Sopenharmony_ci trace_smb3_smbd_connect_err(server->hostname, server->conn_id, &server->dstaddr); 141062306a36Sopenharmony_ci return -ENOENT; 141162306a36Sopenharmony_ci} 141262306a36Sopenharmony_ci 141362306a36Sopenharmony_cistatic void destroy_caches_and_workqueue(struct smbd_connection *info) 141462306a36Sopenharmony_ci{ 141562306a36Sopenharmony_ci destroy_receive_buffers(info); 141662306a36Sopenharmony_ci destroy_workqueue(info->workqueue); 141762306a36Sopenharmony_ci mempool_destroy(info->response_mempool); 141862306a36Sopenharmony_ci kmem_cache_destroy(info->response_cache); 141962306a36Sopenharmony_ci mempool_destroy(info->request_mempool); 142062306a36Sopenharmony_ci kmem_cache_destroy(info->request_cache); 142162306a36Sopenharmony_ci} 142262306a36Sopenharmony_ci 142362306a36Sopenharmony_ci#define MAX_NAME_LEN 80 142462306a36Sopenharmony_cistatic int allocate_caches_and_workqueue(struct smbd_connection *info) 142562306a36Sopenharmony_ci{ 142662306a36Sopenharmony_ci char name[MAX_NAME_LEN]; 142762306a36Sopenharmony_ci int rc; 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_ci scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info); 143062306a36Sopenharmony_ci info->request_cache = 143162306a36Sopenharmony_ci kmem_cache_create( 143262306a36Sopenharmony_ci name, 143362306a36Sopenharmony_ci sizeof(struct smbd_request) + 143462306a36Sopenharmony_ci sizeof(struct smbd_data_transfer), 143562306a36Sopenharmony_ci 0, SLAB_HWCACHE_ALIGN, NULL); 143662306a36Sopenharmony_ci if (!info->request_cache) 143762306a36Sopenharmony_ci return -ENOMEM; 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci info->request_mempool = 144062306a36Sopenharmony_ci mempool_create(info->send_credit_target, mempool_alloc_slab, 144162306a36Sopenharmony_ci mempool_free_slab, info->request_cache); 144262306a36Sopenharmony_ci if (!info->request_mempool) 144362306a36Sopenharmony_ci goto out1; 144462306a36Sopenharmony_ci 144562306a36Sopenharmony_ci scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info); 144662306a36Sopenharmony_ci info->response_cache = 144762306a36Sopenharmony_ci kmem_cache_create( 144862306a36Sopenharmony_ci name, 144962306a36Sopenharmony_ci sizeof(struct smbd_response) + 145062306a36Sopenharmony_ci info->max_receive_size, 145162306a36Sopenharmony_ci 0, SLAB_HWCACHE_ALIGN, NULL); 145262306a36Sopenharmony_ci if (!info->response_cache) 145362306a36Sopenharmony_ci goto out2; 145462306a36Sopenharmony_ci 145562306a36Sopenharmony_ci info->response_mempool = 145662306a36Sopenharmony_ci mempool_create(info->receive_credit_max, mempool_alloc_slab, 145762306a36Sopenharmony_ci mempool_free_slab, info->response_cache); 145862306a36Sopenharmony_ci if (!info->response_mempool) 145962306a36Sopenharmony_ci goto out3; 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_ci scnprintf(name, MAX_NAME_LEN, "smbd_%p", info); 146262306a36Sopenharmony_ci info->workqueue = create_workqueue(name); 146362306a36Sopenharmony_ci if (!info->workqueue) 146462306a36Sopenharmony_ci goto out4; 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci rc = allocate_receive_buffers(info, info->receive_credit_max); 146762306a36Sopenharmony_ci if (rc) { 146862306a36Sopenharmony_ci log_rdma_event(ERR, "failed to allocate receive buffers\n"); 146962306a36Sopenharmony_ci goto out5; 147062306a36Sopenharmony_ci } 147162306a36Sopenharmony_ci 147262306a36Sopenharmony_ci return 0; 147362306a36Sopenharmony_ci 147462306a36Sopenharmony_ciout5: 147562306a36Sopenharmony_ci destroy_workqueue(info->workqueue); 147662306a36Sopenharmony_ciout4: 147762306a36Sopenharmony_ci mempool_destroy(info->response_mempool); 147862306a36Sopenharmony_ciout3: 147962306a36Sopenharmony_ci kmem_cache_destroy(info->response_cache); 148062306a36Sopenharmony_ciout2: 148162306a36Sopenharmony_ci mempool_destroy(info->request_mempool); 148262306a36Sopenharmony_ciout1: 148362306a36Sopenharmony_ci kmem_cache_destroy(info->request_cache); 148462306a36Sopenharmony_ci return -ENOMEM; 148562306a36Sopenharmony_ci} 148662306a36Sopenharmony_ci 148762306a36Sopenharmony_ci/* Create a SMBD connection, called by upper layer */ 148862306a36Sopenharmony_cistatic struct smbd_connection *_smbd_get_connection( 148962306a36Sopenharmony_ci struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port) 149062306a36Sopenharmony_ci{ 149162306a36Sopenharmony_ci int rc; 149262306a36Sopenharmony_ci struct smbd_connection *info; 149362306a36Sopenharmony_ci struct rdma_conn_param conn_param; 149462306a36Sopenharmony_ci struct ib_qp_init_attr qp_attr; 149562306a36Sopenharmony_ci struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; 149662306a36Sopenharmony_ci struct ib_port_immutable port_immutable; 149762306a36Sopenharmony_ci u32 ird_ord_hdr[2]; 149862306a36Sopenharmony_ci 149962306a36Sopenharmony_ci info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL); 150062306a36Sopenharmony_ci if (!info) 150162306a36Sopenharmony_ci return NULL; 150262306a36Sopenharmony_ci 150362306a36Sopenharmony_ci info->transport_status = SMBD_CONNECTING; 150462306a36Sopenharmony_ci rc = smbd_ia_open(info, dstaddr, port); 150562306a36Sopenharmony_ci if (rc) { 150662306a36Sopenharmony_ci log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); 150762306a36Sopenharmony_ci goto create_id_failed; 150862306a36Sopenharmony_ci } 150962306a36Sopenharmony_ci 151062306a36Sopenharmony_ci if (smbd_send_credit_target > info->id->device->attrs.max_cqe || 151162306a36Sopenharmony_ci smbd_send_credit_target > info->id->device->attrs.max_qp_wr) { 151262306a36Sopenharmony_ci log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 151362306a36Sopenharmony_ci smbd_send_credit_target, 151462306a36Sopenharmony_ci info->id->device->attrs.max_cqe, 151562306a36Sopenharmony_ci info->id->device->attrs.max_qp_wr); 151662306a36Sopenharmony_ci goto config_failed; 151762306a36Sopenharmony_ci } 151862306a36Sopenharmony_ci 151962306a36Sopenharmony_ci if (smbd_receive_credit_max > info->id->device->attrs.max_cqe || 152062306a36Sopenharmony_ci smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) { 152162306a36Sopenharmony_ci log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 152262306a36Sopenharmony_ci smbd_receive_credit_max, 152362306a36Sopenharmony_ci info->id->device->attrs.max_cqe, 152462306a36Sopenharmony_ci info->id->device->attrs.max_qp_wr); 152562306a36Sopenharmony_ci goto config_failed; 152662306a36Sopenharmony_ci } 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ci info->receive_credit_max = smbd_receive_credit_max; 152962306a36Sopenharmony_ci info->send_credit_target = smbd_send_credit_target; 153062306a36Sopenharmony_ci info->max_send_size = smbd_max_send_size; 153162306a36Sopenharmony_ci info->max_fragmented_recv_size = smbd_max_fragmented_recv_size; 153262306a36Sopenharmony_ci info->max_receive_size = smbd_max_receive_size; 153362306a36Sopenharmony_ci info->keep_alive_interval = smbd_keep_alive_interval; 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_ci if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || 153662306a36Sopenharmony_ci info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) { 153762306a36Sopenharmony_ci log_rdma_event(ERR, 153862306a36Sopenharmony_ci "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", 153962306a36Sopenharmony_ci IB_DEVICE_NAME_MAX, 154062306a36Sopenharmony_ci info->id->device->name, 154162306a36Sopenharmony_ci info->id->device->attrs.max_send_sge, 154262306a36Sopenharmony_ci info->id->device->attrs.max_recv_sge); 154362306a36Sopenharmony_ci goto config_failed; 154462306a36Sopenharmony_ci } 154562306a36Sopenharmony_ci 154662306a36Sopenharmony_ci info->send_cq = NULL; 154762306a36Sopenharmony_ci info->recv_cq = NULL; 154862306a36Sopenharmony_ci info->send_cq = 154962306a36Sopenharmony_ci ib_alloc_cq_any(info->id->device, info, 155062306a36Sopenharmony_ci info->send_credit_target, IB_POLL_SOFTIRQ); 155162306a36Sopenharmony_ci if (IS_ERR(info->send_cq)) { 155262306a36Sopenharmony_ci info->send_cq = NULL; 155362306a36Sopenharmony_ci goto alloc_cq_failed; 155462306a36Sopenharmony_ci } 155562306a36Sopenharmony_ci 155662306a36Sopenharmony_ci info->recv_cq = 155762306a36Sopenharmony_ci ib_alloc_cq_any(info->id->device, info, 155862306a36Sopenharmony_ci info->receive_credit_max, IB_POLL_SOFTIRQ); 155962306a36Sopenharmony_ci if (IS_ERR(info->recv_cq)) { 156062306a36Sopenharmony_ci info->recv_cq = NULL; 156162306a36Sopenharmony_ci goto alloc_cq_failed; 156262306a36Sopenharmony_ci } 156362306a36Sopenharmony_ci 156462306a36Sopenharmony_ci memset(&qp_attr, 0, sizeof(qp_attr)); 156562306a36Sopenharmony_ci qp_attr.event_handler = smbd_qp_async_error_upcall; 156662306a36Sopenharmony_ci qp_attr.qp_context = info; 156762306a36Sopenharmony_ci qp_attr.cap.max_send_wr = info->send_credit_target; 156862306a36Sopenharmony_ci qp_attr.cap.max_recv_wr = info->receive_credit_max; 156962306a36Sopenharmony_ci qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE; 157062306a36Sopenharmony_ci qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE; 157162306a36Sopenharmony_ci qp_attr.cap.max_inline_data = 0; 157262306a36Sopenharmony_ci qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 157362306a36Sopenharmony_ci qp_attr.qp_type = IB_QPT_RC; 157462306a36Sopenharmony_ci qp_attr.send_cq = info->send_cq; 157562306a36Sopenharmony_ci qp_attr.recv_cq = info->recv_cq; 157662306a36Sopenharmony_ci qp_attr.port_num = ~0; 157762306a36Sopenharmony_ci 157862306a36Sopenharmony_ci rc = rdma_create_qp(info->id, info->pd, &qp_attr); 157962306a36Sopenharmony_ci if (rc) { 158062306a36Sopenharmony_ci log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc); 158162306a36Sopenharmony_ci goto create_qp_failed; 158262306a36Sopenharmony_ci } 158362306a36Sopenharmony_ci 158462306a36Sopenharmony_ci memset(&conn_param, 0, sizeof(conn_param)); 158562306a36Sopenharmony_ci conn_param.initiator_depth = 0; 158662306a36Sopenharmony_ci 158762306a36Sopenharmony_ci conn_param.responder_resources = 158862306a36Sopenharmony_ci info->id->device->attrs.max_qp_rd_atom 158962306a36Sopenharmony_ci < SMBD_CM_RESPONDER_RESOURCES ? 159062306a36Sopenharmony_ci info->id->device->attrs.max_qp_rd_atom : 159162306a36Sopenharmony_ci SMBD_CM_RESPONDER_RESOURCES; 159262306a36Sopenharmony_ci info->responder_resources = conn_param.responder_resources; 159362306a36Sopenharmony_ci log_rdma_mr(INFO, "responder_resources=%d\n", 159462306a36Sopenharmony_ci info->responder_resources); 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_ci /* Need to send IRD/ORD in private data for iWARP */ 159762306a36Sopenharmony_ci info->id->device->ops.get_port_immutable( 159862306a36Sopenharmony_ci info->id->device, info->id->port_num, &port_immutable); 159962306a36Sopenharmony_ci if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { 160062306a36Sopenharmony_ci ird_ord_hdr[0] = info->responder_resources; 160162306a36Sopenharmony_ci ird_ord_hdr[1] = 1; 160262306a36Sopenharmony_ci conn_param.private_data = ird_ord_hdr; 160362306a36Sopenharmony_ci conn_param.private_data_len = sizeof(ird_ord_hdr); 160462306a36Sopenharmony_ci } else { 160562306a36Sopenharmony_ci conn_param.private_data = NULL; 160662306a36Sopenharmony_ci conn_param.private_data_len = 0; 160762306a36Sopenharmony_ci } 160862306a36Sopenharmony_ci 160962306a36Sopenharmony_ci conn_param.retry_count = SMBD_CM_RETRY; 161062306a36Sopenharmony_ci conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY; 161162306a36Sopenharmony_ci conn_param.flow_control = 0; 161262306a36Sopenharmony_ci 161362306a36Sopenharmony_ci log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", 161462306a36Sopenharmony_ci &addr_in->sin_addr, port); 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_ci init_waitqueue_head(&info->conn_wait); 161762306a36Sopenharmony_ci init_waitqueue_head(&info->disconn_wait); 161862306a36Sopenharmony_ci init_waitqueue_head(&info->wait_reassembly_queue); 161962306a36Sopenharmony_ci rc = rdma_connect(info->id, &conn_param); 162062306a36Sopenharmony_ci if (rc) { 162162306a36Sopenharmony_ci log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); 162262306a36Sopenharmony_ci goto rdma_connect_failed; 162362306a36Sopenharmony_ci } 162462306a36Sopenharmony_ci 162562306a36Sopenharmony_ci wait_event_interruptible( 162662306a36Sopenharmony_ci info->conn_wait, info->transport_status != SMBD_CONNECTING); 162762306a36Sopenharmony_ci 162862306a36Sopenharmony_ci if (info->transport_status != SMBD_CONNECTED) { 162962306a36Sopenharmony_ci log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); 163062306a36Sopenharmony_ci goto rdma_connect_failed; 163162306a36Sopenharmony_ci } 163262306a36Sopenharmony_ci 163362306a36Sopenharmony_ci log_rdma_event(INFO, "rdma_connect connected\n"); 163462306a36Sopenharmony_ci 163562306a36Sopenharmony_ci rc = allocate_caches_and_workqueue(info); 163662306a36Sopenharmony_ci if (rc) { 163762306a36Sopenharmony_ci log_rdma_event(ERR, "cache allocation failed\n"); 163862306a36Sopenharmony_ci goto allocate_cache_failed; 163962306a36Sopenharmony_ci } 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_ci init_waitqueue_head(&info->wait_send_queue); 164262306a36Sopenharmony_ci INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); 164362306a36Sopenharmony_ci queue_delayed_work(info->workqueue, &info->idle_timer_work, 164462306a36Sopenharmony_ci info->keep_alive_interval*HZ); 164562306a36Sopenharmony_ci 164662306a36Sopenharmony_ci init_waitqueue_head(&info->wait_send_pending); 164762306a36Sopenharmony_ci atomic_set(&info->send_pending, 0); 164862306a36Sopenharmony_ci 164962306a36Sopenharmony_ci init_waitqueue_head(&info->wait_post_send); 165062306a36Sopenharmony_ci 165162306a36Sopenharmony_ci INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work); 165262306a36Sopenharmony_ci INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits); 165362306a36Sopenharmony_ci info->new_credits_offered = 0; 165462306a36Sopenharmony_ci spin_lock_init(&info->lock_new_credits_offered); 165562306a36Sopenharmony_ci 165662306a36Sopenharmony_ci rc = smbd_negotiate(info); 165762306a36Sopenharmony_ci if (rc) { 165862306a36Sopenharmony_ci log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc); 165962306a36Sopenharmony_ci goto negotiation_failed; 166062306a36Sopenharmony_ci } 166162306a36Sopenharmony_ci 166262306a36Sopenharmony_ci rc = allocate_mr_list(info); 166362306a36Sopenharmony_ci if (rc) { 166462306a36Sopenharmony_ci log_rdma_mr(ERR, "memory registration allocation failed\n"); 166562306a36Sopenharmony_ci goto allocate_mr_failed; 166662306a36Sopenharmony_ci } 166762306a36Sopenharmony_ci 166862306a36Sopenharmony_ci return info; 166962306a36Sopenharmony_ci 167062306a36Sopenharmony_ciallocate_mr_failed: 167162306a36Sopenharmony_ci /* At this point, need to a full transport shutdown */ 167262306a36Sopenharmony_ci server->smbd_conn = info; 167362306a36Sopenharmony_ci smbd_destroy(server); 167462306a36Sopenharmony_ci return NULL; 167562306a36Sopenharmony_ci 167662306a36Sopenharmony_cinegotiation_failed: 167762306a36Sopenharmony_ci cancel_delayed_work_sync(&info->idle_timer_work); 167862306a36Sopenharmony_ci destroy_caches_and_workqueue(info); 167962306a36Sopenharmony_ci info->transport_status = SMBD_NEGOTIATE_FAILED; 168062306a36Sopenharmony_ci init_waitqueue_head(&info->conn_wait); 168162306a36Sopenharmony_ci rdma_disconnect(info->id); 168262306a36Sopenharmony_ci wait_event(info->conn_wait, 168362306a36Sopenharmony_ci info->transport_status == SMBD_DISCONNECTED); 168462306a36Sopenharmony_ci 168562306a36Sopenharmony_ciallocate_cache_failed: 168662306a36Sopenharmony_cirdma_connect_failed: 168762306a36Sopenharmony_ci rdma_destroy_qp(info->id); 168862306a36Sopenharmony_ci 168962306a36Sopenharmony_cicreate_qp_failed: 169062306a36Sopenharmony_cialloc_cq_failed: 169162306a36Sopenharmony_ci if (info->send_cq) 169262306a36Sopenharmony_ci ib_free_cq(info->send_cq); 169362306a36Sopenharmony_ci if (info->recv_cq) 169462306a36Sopenharmony_ci ib_free_cq(info->recv_cq); 169562306a36Sopenharmony_ci 169662306a36Sopenharmony_ciconfig_failed: 169762306a36Sopenharmony_ci ib_dealloc_pd(info->pd); 169862306a36Sopenharmony_ci rdma_destroy_id(info->id); 169962306a36Sopenharmony_ci 170062306a36Sopenharmony_cicreate_id_failed: 170162306a36Sopenharmony_ci kfree(info); 170262306a36Sopenharmony_ci return NULL; 170362306a36Sopenharmony_ci} 170462306a36Sopenharmony_ci 170562306a36Sopenharmony_cistruct smbd_connection *smbd_get_connection( 170662306a36Sopenharmony_ci struct TCP_Server_Info *server, struct sockaddr *dstaddr) 170762306a36Sopenharmony_ci{ 170862306a36Sopenharmony_ci struct smbd_connection *ret; 170962306a36Sopenharmony_ci int port = SMBD_PORT; 171062306a36Sopenharmony_ci 171162306a36Sopenharmony_citry_again: 171262306a36Sopenharmony_ci ret = _smbd_get_connection(server, dstaddr, port); 171362306a36Sopenharmony_ci 171462306a36Sopenharmony_ci /* Try SMB_PORT if SMBD_PORT doesn't work */ 171562306a36Sopenharmony_ci if (!ret && port == SMBD_PORT) { 171662306a36Sopenharmony_ci port = SMB_PORT; 171762306a36Sopenharmony_ci goto try_again; 171862306a36Sopenharmony_ci } 171962306a36Sopenharmony_ci return ret; 172062306a36Sopenharmony_ci} 172162306a36Sopenharmony_ci 172262306a36Sopenharmony_ci/* 172362306a36Sopenharmony_ci * Receive data from receive reassembly queue 172462306a36Sopenharmony_ci * All the incoming data packets are placed in reassembly queue 172562306a36Sopenharmony_ci * buf: the buffer to read data into 172662306a36Sopenharmony_ci * size: the length of data to read 172762306a36Sopenharmony_ci * return value: actual data read 172862306a36Sopenharmony_ci * Note: this implementation copies the data from reassebmly queue to receive 172962306a36Sopenharmony_ci * buffers used by upper layer. This is not the optimal code path. A better way 173062306a36Sopenharmony_ci * to do it is to not have upper layer allocate its receive buffers but rather 173162306a36Sopenharmony_ci * borrow the buffer from reassembly queue, and return it after data is 173262306a36Sopenharmony_ci * consumed. But this will require more changes to upper layer code, and also 173362306a36Sopenharmony_ci * need to consider packet boundaries while they still being reassembled. 173462306a36Sopenharmony_ci */ 173562306a36Sopenharmony_cistatic int smbd_recv_buf(struct smbd_connection *info, char *buf, 173662306a36Sopenharmony_ci unsigned int size) 173762306a36Sopenharmony_ci{ 173862306a36Sopenharmony_ci struct smbd_response *response; 173962306a36Sopenharmony_ci struct smbd_data_transfer *data_transfer; 174062306a36Sopenharmony_ci int to_copy, to_read, data_read, offset; 174162306a36Sopenharmony_ci u32 data_length, remaining_data_length, data_offset; 174262306a36Sopenharmony_ci int rc; 174362306a36Sopenharmony_ci 174462306a36Sopenharmony_ciagain: 174562306a36Sopenharmony_ci /* 174662306a36Sopenharmony_ci * No need to hold the reassembly queue lock all the time as we are 174762306a36Sopenharmony_ci * the only one reading from the front of the queue. The transport 174862306a36Sopenharmony_ci * may add more entries to the back of the queue at the same time 174962306a36Sopenharmony_ci */ 175062306a36Sopenharmony_ci log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size, 175162306a36Sopenharmony_ci info->reassembly_data_length); 175262306a36Sopenharmony_ci if (info->reassembly_data_length >= size) { 175362306a36Sopenharmony_ci int queue_length; 175462306a36Sopenharmony_ci int queue_removed = 0; 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci /* 175762306a36Sopenharmony_ci * Need to make sure reassembly_data_length is read before 175862306a36Sopenharmony_ci * reading reassembly_queue_length and calling 175962306a36Sopenharmony_ci * _get_first_reassembly. This call is lock free 176062306a36Sopenharmony_ci * as we never read at the end of the queue which are being 176162306a36Sopenharmony_ci * updated in SOFTIRQ as more data is received 176262306a36Sopenharmony_ci */ 176362306a36Sopenharmony_ci virt_rmb(); 176462306a36Sopenharmony_ci queue_length = info->reassembly_queue_length; 176562306a36Sopenharmony_ci data_read = 0; 176662306a36Sopenharmony_ci to_read = size; 176762306a36Sopenharmony_ci offset = info->first_entry_offset; 176862306a36Sopenharmony_ci while (data_read < size) { 176962306a36Sopenharmony_ci response = _get_first_reassembly(info); 177062306a36Sopenharmony_ci data_transfer = smbd_response_payload(response); 177162306a36Sopenharmony_ci data_length = le32_to_cpu(data_transfer->data_length); 177262306a36Sopenharmony_ci remaining_data_length = 177362306a36Sopenharmony_ci le32_to_cpu( 177462306a36Sopenharmony_ci data_transfer->remaining_data_length); 177562306a36Sopenharmony_ci data_offset = le32_to_cpu(data_transfer->data_offset); 177662306a36Sopenharmony_ci 177762306a36Sopenharmony_ci /* 177862306a36Sopenharmony_ci * The upper layer expects RFC1002 length at the 177962306a36Sopenharmony_ci * beginning of the payload. Return it to indicate 178062306a36Sopenharmony_ci * the total length of the packet. This minimize the 178162306a36Sopenharmony_ci * change to upper layer packet processing logic. This 178262306a36Sopenharmony_ci * will be eventually remove when an intermediate 178362306a36Sopenharmony_ci * transport layer is added 178462306a36Sopenharmony_ci */ 178562306a36Sopenharmony_ci if (response->first_segment && size == 4) { 178662306a36Sopenharmony_ci unsigned int rfc1002_len = 178762306a36Sopenharmony_ci data_length + remaining_data_length; 178862306a36Sopenharmony_ci *((__be32 *)buf) = cpu_to_be32(rfc1002_len); 178962306a36Sopenharmony_ci data_read = 4; 179062306a36Sopenharmony_ci response->first_segment = false; 179162306a36Sopenharmony_ci log_read(INFO, "returning rfc1002 length %d\n", 179262306a36Sopenharmony_ci rfc1002_len); 179362306a36Sopenharmony_ci goto read_rfc1002_done; 179462306a36Sopenharmony_ci } 179562306a36Sopenharmony_ci 179662306a36Sopenharmony_ci to_copy = min_t(int, data_length - offset, to_read); 179762306a36Sopenharmony_ci memcpy( 179862306a36Sopenharmony_ci buf + data_read, 179962306a36Sopenharmony_ci (char *)data_transfer + data_offset + offset, 180062306a36Sopenharmony_ci to_copy); 180162306a36Sopenharmony_ci 180262306a36Sopenharmony_ci /* move on to the next buffer? */ 180362306a36Sopenharmony_ci if (to_copy == data_length - offset) { 180462306a36Sopenharmony_ci queue_length--; 180562306a36Sopenharmony_ci /* 180662306a36Sopenharmony_ci * No need to lock if we are not at the 180762306a36Sopenharmony_ci * end of the queue 180862306a36Sopenharmony_ci */ 180962306a36Sopenharmony_ci if (queue_length) 181062306a36Sopenharmony_ci list_del(&response->list); 181162306a36Sopenharmony_ci else { 181262306a36Sopenharmony_ci spin_lock_irq( 181362306a36Sopenharmony_ci &info->reassembly_queue_lock); 181462306a36Sopenharmony_ci list_del(&response->list); 181562306a36Sopenharmony_ci spin_unlock_irq( 181662306a36Sopenharmony_ci &info->reassembly_queue_lock); 181762306a36Sopenharmony_ci } 181862306a36Sopenharmony_ci queue_removed++; 181962306a36Sopenharmony_ci info->count_reassembly_queue--; 182062306a36Sopenharmony_ci info->count_dequeue_reassembly_queue++; 182162306a36Sopenharmony_ci put_receive_buffer(info, response); 182262306a36Sopenharmony_ci offset = 0; 182362306a36Sopenharmony_ci log_read(INFO, "put_receive_buffer offset=0\n"); 182462306a36Sopenharmony_ci } else 182562306a36Sopenharmony_ci offset += to_copy; 182662306a36Sopenharmony_ci 182762306a36Sopenharmony_ci to_read -= to_copy; 182862306a36Sopenharmony_ci data_read += to_copy; 182962306a36Sopenharmony_ci 183062306a36Sopenharmony_ci log_read(INFO, "_get_first_reassembly memcpy %d bytes data_transfer_length-offset=%d after that to_read=%d data_read=%d offset=%d\n", 183162306a36Sopenharmony_ci to_copy, data_length - offset, 183262306a36Sopenharmony_ci to_read, data_read, offset); 183362306a36Sopenharmony_ci } 183462306a36Sopenharmony_ci 183562306a36Sopenharmony_ci spin_lock_irq(&info->reassembly_queue_lock); 183662306a36Sopenharmony_ci info->reassembly_data_length -= data_read; 183762306a36Sopenharmony_ci info->reassembly_queue_length -= queue_removed; 183862306a36Sopenharmony_ci spin_unlock_irq(&info->reassembly_queue_lock); 183962306a36Sopenharmony_ci 184062306a36Sopenharmony_ci info->first_entry_offset = offset; 184162306a36Sopenharmony_ci log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 184262306a36Sopenharmony_ci data_read, info->reassembly_data_length, 184362306a36Sopenharmony_ci info->first_entry_offset); 184462306a36Sopenharmony_ciread_rfc1002_done: 184562306a36Sopenharmony_ci return data_read; 184662306a36Sopenharmony_ci } 184762306a36Sopenharmony_ci 184862306a36Sopenharmony_ci log_read(INFO, "wait_event on more data\n"); 184962306a36Sopenharmony_ci rc = wait_event_interruptible( 185062306a36Sopenharmony_ci info->wait_reassembly_queue, 185162306a36Sopenharmony_ci info->reassembly_data_length >= size || 185262306a36Sopenharmony_ci info->transport_status != SMBD_CONNECTED); 185362306a36Sopenharmony_ci /* Don't return any data if interrupted */ 185462306a36Sopenharmony_ci if (rc) 185562306a36Sopenharmony_ci return rc; 185662306a36Sopenharmony_ci 185762306a36Sopenharmony_ci if (info->transport_status != SMBD_CONNECTED) { 185862306a36Sopenharmony_ci log_read(ERR, "disconnected\n"); 185962306a36Sopenharmony_ci return -ECONNABORTED; 186062306a36Sopenharmony_ci } 186162306a36Sopenharmony_ci 186262306a36Sopenharmony_ci goto again; 186362306a36Sopenharmony_ci} 186462306a36Sopenharmony_ci 186562306a36Sopenharmony_ci/* 186662306a36Sopenharmony_ci * Receive a page from receive reassembly queue 186762306a36Sopenharmony_ci * page: the page to read data into 186862306a36Sopenharmony_ci * to_read: the length of data to read 186962306a36Sopenharmony_ci * return value: actual data read 187062306a36Sopenharmony_ci */ 187162306a36Sopenharmony_cistatic int smbd_recv_page(struct smbd_connection *info, 187262306a36Sopenharmony_ci struct page *page, unsigned int page_offset, 187362306a36Sopenharmony_ci unsigned int to_read) 187462306a36Sopenharmony_ci{ 187562306a36Sopenharmony_ci int ret; 187662306a36Sopenharmony_ci char *to_address; 187762306a36Sopenharmony_ci void *page_address; 187862306a36Sopenharmony_ci 187962306a36Sopenharmony_ci /* make sure we have the page ready for read */ 188062306a36Sopenharmony_ci ret = wait_event_interruptible( 188162306a36Sopenharmony_ci info->wait_reassembly_queue, 188262306a36Sopenharmony_ci info->reassembly_data_length >= to_read || 188362306a36Sopenharmony_ci info->transport_status != SMBD_CONNECTED); 188462306a36Sopenharmony_ci if (ret) 188562306a36Sopenharmony_ci return ret; 188662306a36Sopenharmony_ci 188762306a36Sopenharmony_ci /* now we can read from reassembly queue and not sleep */ 188862306a36Sopenharmony_ci page_address = kmap_atomic(page); 188962306a36Sopenharmony_ci to_address = (char *) page_address + page_offset; 189062306a36Sopenharmony_ci 189162306a36Sopenharmony_ci log_read(INFO, "reading from page=%p address=%p to_read=%d\n", 189262306a36Sopenharmony_ci page, to_address, to_read); 189362306a36Sopenharmony_ci 189462306a36Sopenharmony_ci ret = smbd_recv_buf(info, to_address, to_read); 189562306a36Sopenharmony_ci kunmap_atomic(page_address); 189662306a36Sopenharmony_ci 189762306a36Sopenharmony_ci return ret; 189862306a36Sopenharmony_ci} 189962306a36Sopenharmony_ci 190062306a36Sopenharmony_ci/* 190162306a36Sopenharmony_ci * Receive data from transport 190262306a36Sopenharmony_ci * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC 190362306a36Sopenharmony_ci * return: total bytes read, or 0. SMB Direct will not do partial read. 190462306a36Sopenharmony_ci */ 190562306a36Sopenharmony_ciint smbd_recv(struct smbd_connection *info, struct msghdr *msg) 190662306a36Sopenharmony_ci{ 190762306a36Sopenharmony_ci char *buf; 190862306a36Sopenharmony_ci struct page *page; 190962306a36Sopenharmony_ci unsigned int to_read, page_offset; 191062306a36Sopenharmony_ci int rc; 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci if (iov_iter_rw(&msg->msg_iter) == WRITE) { 191362306a36Sopenharmony_ci /* It's a bug in upper layer to get there */ 191462306a36Sopenharmony_ci cifs_dbg(VFS, "Invalid msg iter dir %u\n", 191562306a36Sopenharmony_ci iov_iter_rw(&msg->msg_iter)); 191662306a36Sopenharmony_ci rc = -EINVAL; 191762306a36Sopenharmony_ci goto out; 191862306a36Sopenharmony_ci } 191962306a36Sopenharmony_ci 192062306a36Sopenharmony_ci switch (iov_iter_type(&msg->msg_iter)) { 192162306a36Sopenharmony_ci case ITER_KVEC: 192262306a36Sopenharmony_ci buf = msg->msg_iter.kvec->iov_base; 192362306a36Sopenharmony_ci to_read = msg->msg_iter.kvec->iov_len; 192462306a36Sopenharmony_ci rc = smbd_recv_buf(info, buf, to_read); 192562306a36Sopenharmony_ci break; 192662306a36Sopenharmony_ci 192762306a36Sopenharmony_ci case ITER_BVEC: 192862306a36Sopenharmony_ci page = msg->msg_iter.bvec->bv_page; 192962306a36Sopenharmony_ci page_offset = msg->msg_iter.bvec->bv_offset; 193062306a36Sopenharmony_ci to_read = msg->msg_iter.bvec->bv_len; 193162306a36Sopenharmony_ci rc = smbd_recv_page(info, page, page_offset, to_read); 193262306a36Sopenharmony_ci break; 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_ci default: 193562306a36Sopenharmony_ci /* It's a bug in upper layer to get there */ 193662306a36Sopenharmony_ci cifs_dbg(VFS, "Invalid msg type %d\n", 193762306a36Sopenharmony_ci iov_iter_type(&msg->msg_iter)); 193862306a36Sopenharmony_ci rc = -EINVAL; 193962306a36Sopenharmony_ci } 194062306a36Sopenharmony_ci 194162306a36Sopenharmony_ciout: 194262306a36Sopenharmony_ci /* SMBDirect will read it all or nothing */ 194362306a36Sopenharmony_ci if (rc > 0) 194462306a36Sopenharmony_ci msg->msg_iter.count = 0; 194562306a36Sopenharmony_ci return rc; 194662306a36Sopenharmony_ci} 194762306a36Sopenharmony_ci 194862306a36Sopenharmony_ci/* 194962306a36Sopenharmony_ci * Send data to transport 195062306a36Sopenharmony_ci * Each rqst is transported as a SMBDirect payload 195162306a36Sopenharmony_ci * rqst: the data to write 195262306a36Sopenharmony_ci * return value: 0 if successfully write, otherwise error code 195362306a36Sopenharmony_ci */ 195462306a36Sopenharmony_ciint smbd_send(struct TCP_Server_Info *server, 195562306a36Sopenharmony_ci int num_rqst, struct smb_rqst *rqst_array) 195662306a36Sopenharmony_ci{ 195762306a36Sopenharmony_ci struct smbd_connection *info = server->smbd_conn; 195862306a36Sopenharmony_ci struct smb_rqst *rqst; 195962306a36Sopenharmony_ci struct iov_iter iter; 196062306a36Sopenharmony_ci unsigned int remaining_data_length, klen; 196162306a36Sopenharmony_ci int rc, i, rqst_idx; 196262306a36Sopenharmony_ci 196362306a36Sopenharmony_ci if (info->transport_status != SMBD_CONNECTED) 196462306a36Sopenharmony_ci return -EAGAIN; 196562306a36Sopenharmony_ci 196662306a36Sopenharmony_ci /* 196762306a36Sopenharmony_ci * Add in the page array if there is one. The caller needs to set 196862306a36Sopenharmony_ci * rq_tailsz to PAGE_SIZE when the buffer has multiple pages and 196962306a36Sopenharmony_ci * ends at page boundary 197062306a36Sopenharmony_ci */ 197162306a36Sopenharmony_ci remaining_data_length = 0; 197262306a36Sopenharmony_ci for (i = 0; i < num_rqst; i++) 197362306a36Sopenharmony_ci remaining_data_length += smb_rqst_len(server, &rqst_array[i]); 197462306a36Sopenharmony_ci 197562306a36Sopenharmony_ci if (unlikely(remaining_data_length > info->max_fragmented_send_size)) { 197662306a36Sopenharmony_ci /* assertion: payload never exceeds negotiated maximum */ 197762306a36Sopenharmony_ci log_write(ERR, "payload size %d > max size %d\n", 197862306a36Sopenharmony_ci remaining_data_length, info->max_fragmented_send_size); 197962306a36Sopenharmony_ci return -EINVAL; 198062306a36Sopenharmony_ci } 198162306a36Sopenharmony_ci 198262306a36Sopenharmony_ci log_write(INFO, "num_rqst=%d total length=%u\n", 198362306a36Sopenharmony_ci num_rqst, remaining_data_length); 198462306a36Sopenharmony_ci 198562306a36Sopenharmony_ci rqst_idx = 0; 198662306a36Sopenharmony_ci do { 198762306a36Sopenharmony_ci rqst = &rqst_array[rqst_idx]; 198862306a36Sopenharmony_ci 198962306a36Sopenharmony_ci cifs_dbg(FYI, "Sending smb (RDMA): idx=%d smb_len=%lu\n", 199062306a36Sopenharmony_ci rqst_idx, smb_rqst_len(server, rqst)); 199162306a36Sopenharmony_ci for (i = 0; i < rqst->rq_nvec; i++) 199262306a36Sopenharmony_ci dump_smb(rqst->rq_iov[i].iov_base, rqst->rq_iov[i].iov_len); 199362306a36Sopenharmony_ci 199462306a36Sopenharmony_ci log_write(INFO, "RDMA-WR[%u] nvec=%d len=%u iter=%zu rqlen=%lu\n", 199562306a36Sopenharmony_ci rqst_idx, rqst->rq_nvec, remaining_data_length, 199662306a36Sopenharmony_ci iov_iter_count(&rqst->rq_iter), smb_rqst_len(server, rqst)); 199762306a36Sopenharmony_ci 199862306a36Sopenharmony_ci /* Send the metadata pages. */ 199962306a36Sopenharmony_ci klen = 0; 200062306a36Sopenharmony_ci for (i = 0; i < rqst->rq_nvec; i++) 200162306a36Sopenharmony_ci klen += rqst->rq_iov[i].iov_len; 200262306a36Sopenharmony_ci iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); 200362306a36Sopenharmony_ci 200462306a36Sopenharmony_ci rc = smbd_post_send_iter(info, &iter, &remaining_data_length); 200562306a36Sopenharmony_ci if (rc < 0) 200662306a36Sopenharmony_ci break; 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_ci if (iov_iter_count(&rqst->rq_iter) > 0) { 200962306a36Sopenharmony_ci /* And then the data pages if there are any */ 201062306a36Sopenharmony_ci rc = smbd_post_send_iter(info, &rqst->rq_iter, 201162306a36Sopenharmony_ci &remaining_data_length); 201262306a36Sopenharmony_ci if (rc < 0) 201362306a36Sopenharmony_ci break; 201462306a36Sopenharmony_ci } 201562306a36Sopenharmony_ci 201662306a36Sopenharmony_ci } while (++rqst_idx < num_rqst); 201762306a36Sopenharmony_ci 201862306a36Sopenharmony_ci /* 201962306a36Sopenharmony_ci * As an optimization, we don't wait for individual I/O to finish 202062306a36Sopenharmony_ci * before sending the next one. 202162306a36Sopenharmony_ci * Send them all and wait for pending send count to get to 0 202262306a36Sopenharmony_ci * that means all the I/Os have been out and we are good to return 202362306a36Sopenharmony_ci */ 202462306a36Sopenharmony_ci 202562306a36Sopenharmony_ci wait_event(info->wait_send_pending, 202662306a36Sopenharmony_ci atomic_read(&info->send_pending) == 0); 202762306a36Sopenharmony_ci 202862306a36Sopenharmony_ci return rc; 202962306a36Sopenharmony_ci} 203062306a36Sopenharmony_ci 203162306a36Sopenharmony_cistatic void register_mr_done(struct ib_cq *cq, struct ib_wc *wc) 203262306a36Sopenharmony_ci{ 203362306a36Sopenharmony_ci struct smbd_mr *mr; 203462306a36Sopenharmony_ci struct ib_cqe *cqe; 203562306a36Sopenharmony_ci 203662306a36Sopenharmony_ci if (wc->status) { 203762306a36Sopenharmony_ci log_rdma_mr(ERR, "status=%d\n", wc->status); 203862306a36Sopenharmony_ci cqe = wc->wr_cqe; 203962306a36Sopenharmony_ci mr = container_of(cqe, struct smbd_mr, cqe); 204062306a36Sopenharmony_ci smbd_disconnect_rdma_connection(mr->conn); 204162306a36Sopenharmony_ci } 204262306a36Sopenharmony_ci} 204362306a36Sopenharmony_ci 204462306a36Sopenharmony_ci/* 204562306a36Sopenharmony_ci * The work queue function that recovers MRs 204662306a36Sopenharmony_ci * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used 204762306a36Sopenharmony_ci * again. Both calls are slow, so finish them in a workqueue. This will not 204862306a36Sopenharmony_ci * block I/O path. 204962306a36Sopenharmony_ci * There is one workqueue that recovers MRs, there is no need to lock as the 205062306a36Sopenharmony_ci * I/O requests calling smbd_register_mr will never update the links in the 205162306a36Sopenharmony_ci * mr_list. 205262306a36Sopenharmony_ci */ 205362306a36Sopenharmony_cistatic void smbd_mr_recovery_work(struct work_struct *work) 205462306a36Sopenharmony_ci{ 205562306a36Sopenharmony_ci struct smbd_connection *info = 205662306a36Sopenharmony_ci container_of(work, struct smbd_connection, mr_recovery_work); 205762306a36Sopenharmony_ci struct smbd_mr *smbdirect_mr; 205862306a36Sopenharmony_ci int rc; 205962306a36Sopenharmony_ci 206062306a36Sopenharmony_ci list_for_each_entry(smbdirect_mr, &info->mr_list, list) { 206162306a36Sopenharmony_ci if (smbdirect_mr->state == MR_ERROR) { 206262306a36Sopenharmony_ci 206362306a36Sopenharmony_ci /* recover this MR entry */ 206462306a36Sopenharmony_ci rc = ib_dereg_mr(smbdirect_mr->mr); 206562306a36Sopenharmony_ci if (rc) { 206662306a36Sopenharmony_ci log_rdma_mr(ERR, 206762306a36Sopenharmony_ci "ib_dereg_mr failed rc=%x\n", 206862306a36Sopenharmony_ci rc); 206962306a36Sopenharmony_ci smbd_disconnect_rdma_connection(info); 207062306a36Sopenharmony_ci continue; 207162306a36Sopenharmony_ci } 207262306a36Sopenharmony_ci 207362306a36Sopenharmony_ci smbdirect_mr->mr = ib_alloc_mr( 207462306a36Sopenharmony_ci info->pd, info->mr_type, 207562306a36Sopenharmony_ci info->max_frmr_depth); 207662306a36Sopenharmony_ci if (IS_ERR(smbdirect_mr->mr)) { 207762306a36Sopenharmony_ci log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", 207862306a36Sopenharmony_ci info->mr_type, 207962306a36Sopenharmony_ci info->max_frmr_depth); 208062306a36Sopenharmony_ci smbd_disconnect_rdma_connection(info); 208162306a36Sopenharmony_ci continue; 208262306a36Sopenharmony_ci } 208362306a36Sopenharmony_ci } else 208462306a36Sopenharmony_ci /* This MR is being used, don't recover it */ 208562306a36Sopenharmony_ci continue; 208662306a36Sopenharmony_ci 208762306a36Sopenharmony_ci smbdirect_mr->state = MR_READY; 208862306a36Sopenharmony_ci 208962306a36Sopenharmony_ci /* smbdirect_mr->state is updated by this function 209062306a36Sopenharmony_ci * and is read and updated by I/O issuing CPUs trying 209162306a36Sopenharmony_ci * to get a MR, the call to atomic_inc_return 209262306a36Sopenharmony_ci * implicates a memory barrier and guarantees this 209362306a36Sopenharmony_ci * value is updated before waking up any calls to 209462306a36Sopenharmony_ci * get_mr() from the I/O issuing CPUs 209562306a36Sopenharmony_ci */ 209662306a36Sopenharmony_ci if (atomic_inc_return(&info->mr_ready_count) == 1) 209762306a36Sopenharmony_ci wake_up_interruptible(&info->wait_mr); 209862306a36Sopenharmony_ci } 209962306a36Sopenharmony_ci} 210062306a36Sopenharmony_ci 210162306a36Sopenharmony_cistatic void destroy_mr_list(struct smbd_connection *info) 210262306a36Sopenharmony_ci{ 210362306a36Sopenharmony_ci struct smbd_mr *mr, *tmp; 210462306a36Sopenharmony_ci 210562306a36Sopenharmony_ci cancel_work_sync(&info->mr_recovery_work); 210662306a36Sopenharmony_ci list_for_each_entry_safe(mr, tmp, &info->mr_list, list) { 210762306a36Sopenharmony_ci if (mr->state == MR_INVALIDATED) 210862306a36Sopenharmony_ci ib_dma_unmap_sg(info->id->device, mr->sgt.sgl, 210962306a36Sopenharmony_ci mr->sgt.nents, mr->dir); 211062306a36Sopenharmony_ci ib_dereg_mr(mr->mr); 211162306a36Sopenharmony_ci kfree(mr->sgt.sgl); 211262306a36Sopenharmony_ci kfree(mr); 211362306a36Sopenharmony_ci } 211462306a36Sopenharmony_ci} 211562306a36Sopenharmony_ci 211662306a36Sopenharmony_ci/* 211762306a36Sopenharmony_ci * Allocate MRs used for RDMA read/write 211862306a36Sopenharmony_ci * The number of MRs will not exceed hardware capability in responder_resources 211962306a36Sopenharmony_ci * All MRs are kept in mr_list. The MR can be recovered after it's used 212062306a36Sopenharmony_ci * Recovery is done in smbd_mr_recovery_work. The content of list entry changes 212162306a36Sopenharmony_ci * as MRs are used and recovered for I/O, but the list links will not change 212262306a36Sopenharmony_ci */ 212362306a36Sopenharmony_cistatic int allocate_mr_list(struct smbd_connection *info) 212462306a36Sopenharmony_ci{ 212562306a36Sopenharmony_ci int i; 212662306a36Sopenharmony_ci struct smbd_mr *smbdirect_mr, *tmp; 212762306a36Sopenharmony_ci 212862306a36Sopenharmony_ci INIT_LIST_HEAD(&info->mr_list); 212962306a36Sopenharmony_ci init_waitqueue_head(&info->wait_mr); 213062306a36Sopenharmony_ci spin_lock_init(&info->mr_list_lock); 213162306a36Sopenharmony_ci atomic_set(&info->mr_ready_count, 0); 213262306a36Sopenharmony_ci atomic_set(&info->mr_used_count, 0); 213362306a36Sopenharmony_ci init_waitqueue_head(&info->wait_for_mr_cleanup); 213462306a36Sopenharmony_ci INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work); 213562306a36Sopenharmony_ci /* Allocate more MRs (2x) than hardware responder_resources */ 213662306a36Sopenharmony_ci for (i = 0; i < info->responder_resources * 2; i++) { 213762306a36Sopenharmony_ci smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); 213862306a36Sopenharmony_ci if (!smbdirect_mr) 213962306a36Sopenharmony_ci goto out; 214062306a36Sopenharmony_ci smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type, 214162306a36Sopenharmony_ci info->max_frmr_depth); 214262306a36Sopenharmony_ci if (IS_ERR(smbdirect_mr->mr)) { 214362306a36Sopenharmony_ci log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", 214462306a36Sopenharmony_ci info->mr_type, info->max_frmr_depth); 214562306a36Sopenharmony_ci goto out; 214662306a36Sopenharmony_ci } 214762306a36Sopenharmony_ci smbdirect_mr->sgt.sgl = kcalloc(info->max_frmr_depth, 214862306a36Sopenharmony_ci sizeof(struct scatterlist), 214962306a36Sopenharmony_ci GFP_KERNEL); 215062306a36Sopenharmony_ci if (!smbdirect_mr->sgt.sgl) { 215162306a36Sopenharmony_ci log_rdma_mr(ERR, "failed to allocate sgl\n"); 215262306a36Sopenharmony_ci ib_dereg_mr(smbdirect_mr->mr); 215362306a36Sopenharmony_ci goto out; 215462306a36Sopenharmony_ci } 215562306a36Sopenharmony_ci smbdirect_mr->state = MR_READY; 215662306a36Sopenharmony_ci smbdirect_mr->conn = info; 215762306a36Sopenharmony_ci 215862306a36Sopenharmony_ci list_add_tail(&smbdirect_mr->list, &info->mr_list); 215962306a36Sopenharmony_ci atomic_inc(&info->mr_ready_count); 216062306a36Sopenharmony_ci } 216162306a36Sopenharmony_ci return 0; 216262306a36Sopenharmony_ci 216362306a36Sopenharmony_ciout: 216462306a36Sopenharmony_ci kfree(smbdirect_mr); 216562306a36Sopenharmony_ci 216662306a36Sopenharmony_ci list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) { 216762306a36Sopenharmony_ci list_del(&smbdirect_mr->list); 216862306a36Sopenharmony_ci ib_dereg_mr(smbdirect_mr->mr); 216962306a36Sopenharmony_ci kfree(smbdirect_mr->sgt.sgl); 217062306a36Sopenharmony_ci kfree(smbdirect_mr); 217162306a36Sopenharmony_ci } 217262306a36Sopenharmony_ci return -ENOMEM; 217362306a36Sopenharmony_ci} 217462306a36Sopenharmony_ci 217562306a36Sopenharmony_ci/* 217662306a36Sopenharmony_ci * Get a MR from mr_list. This function waits until there is at least one 217762306a36Sopenharmony_ci * MR available in the list. It may access the list while the 217862306a36Sopenharmony_ci * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock 217962306a36Sopenharmony_ci * as they never modify the same places. However, there may be several CPUs 218062306a36Sopenharmony_ci * issueing I/O trying to get MR at the same time, mr_list_lock is used to 218162306a36Sopenharmony_ci * protect this situation. 218262306a36Sopenharmony_ci */ 218362306a36Sopenharmony_cistatic struct smbd_mr *get_mr(struct smbd_connection *info) 218462306a36Sopenharmony_ci{ 218562306a36Sopenharmony_ci struct smbd_mr *ret; 218662306a36Sopenharmony_ci int rc; 218762306a36Sopenharmony_ciagain: 218862306a36Sopenharmony_ci rc = wait_event_interruptible(info->wait_mr, 218962306a36Sopenharmony_ci atomic_read(&info->mr_ready_count) || 219062306a36Sopenharmony_ci info->transport_status != SMBD_CONNECTED); 219162306a36Sopenharmony_ci if (rc) { 219262306a36Sopenharmony_ci log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc); 219362306a36Sopenharmony_ci return NULL; 219462306a36Sopenharmony_ci } 219562306a36Sopenharmony_ci 219662306a36Sopenharmony_ci if (info->transport_status != SMBD_CONNECTED) { 219762306a36Sopenharmony_ci log_rdma_mr(ERR, "info->transport_status=%x\n", 219862306a36Sopenharmony_ci info->transport_status); 219962306a36Sopenharmony_ci return NULL; 220062306a36Sopenharmony_ci } 220162306a36Sopenharmony_ci 220262306a36Sopenharmony_ci spin_lock(&info->mr_list_lock); 220362306a36Sopenharmony_ci list_for_each_entry(ret, &info->mr_list, list) { 220462306a36Sopenharmony_ci if (ret->state == MR_READY) { 220562306a36Sopenharmony_ci ret->state = MR_REGISTERED; 220662306a36Sopenharmony_ci spin_unlock(&info->mr_list_lock); 220762306a36Sopenharmony_ci atomic_dec(&info->mr_ready_count); 220862306a36Sopenharmony_ci atomic_inc(&info->mr_used_count); 220962306a36Sopenharmony_ci return ret; 221062306a36Sopenharmony_ci } 221162306a36Sopenharmony_ci } 221262306a36Sopenharmony_ci 221362306a36Sopenharmony_ci spin_unlock(&info->mr_list_lock); 221462306a36Sopenharmony_ci /* 221562306a36Sopenharmony_ci * It is possible that we could fail to get MR because other processes may 221662306a36Sopenharmony_ci * try to acquire a MR at the same time. If this is the case, retry it. 221762306a36Sopenharmony_ci */ 221862306a36Sopenharmony_ci goto again; 221962306a36Sopenharmony_ci} 222062306a36Sopenharmony_ci 222162306a36Sopenharmony_ci/* 222262306a36Sopenharmony_ci * Transcribe the pages from an iterator into an MR scatterlist. 222362306a36Sopenharmony_ci */ 222462306a36Sopenharmony_cistatic int smbd_iter_to_mr(struct smbd_connection *info, 222562306a36Sopenharmony_ci struct iov_iter *iter, 222662306a36Sopenharmony_ci struct sg_table *sgt, 222762306a36Sopenharmony_ci unsigned int max_sg) 222862306a36Sopenharmony_ci{ 222962306a36Sopenharmony_ci int ret; 223062306a36Sopenharmony_ci 223162306a36Sopenharmony_ci memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist)); 223262306a36Sopenharmony_ci 223362306a36Sopenharmony_ci ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); 223462306a36Sopenharmony_ci WARN_ON(ret < 0); 223562306a36Sopenharmony_ci if (sgt->nents > 0) 223662306a36Sopenharmony_ci sg_mark_end(&sgt->sgl[sgt->nents - 1]); 223762306a36Sopenharmony_ci return ret; 223862306a36Sopenharmony_ci} 223962306a36Sopenharmony_ci 224062306a36Sopenharmony_ci/* 224162306a36Sopenharmony_ci * Register memory for RDMA read/write 224262306a36Sopenharmony_ci * iter: the buffer to register memory with 224362306a36Sopenharmony_ci * writing: true if this is a RDMA write (SMB read), false for RDMA read 224462306a36Sopenharmony_ci * need_invalidate: true if this MR needs to be locally invalidated after I/O 224562306a36Sopenharmony_ci * return value: the MR registered, NULL if failed. 224662306a36Sopenharmony_ci */ 224762306a36Sopenharmony_cistruct smbd_mr *smbd_register_mr(struct smbd_connection *info, 224862306a36Sopenharmony_ci struct iov_iter *iter, 224962306a36Sopenharmony_ci bool writing, bool need_invalidate) 225062306a36Sopenharmony_ci{ 225162306a36Sopenharmony_ci struct smbd_mr *smbdirect_mr; 225262306a36Sopenharmony_ci int rc, num_pages; 225362306a36Sopenharmony_ci enum dma_data_direction dir; 225462306a36Sopenharmony_ci struct ib_reg_wr *reg_wr; 225562306a36Sopenharmony_ci 225662306a36Sopenharmony_ci num_pages = iov_iter_npages(iter, info->max_frmr_depth + 1); 225762306a36Sopenharmony_ci if (num_pages > info->max_frmr_depth) { 225862306a36Sopenharmony_ci log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n", 225962306a36Sopenharmony_ci num_pages, info->max_frmr_depth); 226062306a36Sopenharmony_ci WARN_ON_ONCE(1); 226162306a36Sopenharmony_ci return NULL; 226262306a36Sopenharmony_ci } 226362306a36Sopenharmony_ci 226462306a36Sopenharmony_ci smbdirect_mr = get_mr(info); 226562306a36Sopenharmony_ci if (!smbdirect_mr) { 226662306a36Sopenharmony_ci log_rdma_mr(ERR, "get_mr returning NULL\n"); 226762306a36Sopenharmony_ci return NULL; 226862306a36Sopenharmony_ci } 226962306a36Sopenharmony_ci 227062306a36Sopenharmony_ci dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 227162306a36Sopenharmony_ci smbdirect_mr->dir = dir; 227262306a36Sopenharmony_ci smbdirect_mr->need_invalidate = need_invalidate; 227362306a36Sopenharmony_ci smbdirect_mr->sgt.nents = 0; 227462306a36Sopenharmony_ci smbdirect_mr->sgt.orig_nents = 0; 227562306a36Sopenharmony_ci 227662306a36Sopenharmony_ci log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n", 227762306a36Sopenharmony_ci num_pages, iov_iter_count(iter), info->max_frmr_depth); 227862306a36Sopenharmony_ci smbd_iter_to_mr(info, iter, &smbdirect_mr->sgt, info->max_frmr_depth); 227962306a36Sopenharmony_ci 228062306a36Sopenharmony_ci rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgt.sgl, 228162306a36Sopenharmony_ci smbdirect_mr->sgt.nents, dir); 228262306a36Sopenharmony_ci if (!rc) { 228362306a36Sopenharmony_ci log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", 228462306a36Sopenharmony_ci num_pages, dir, rc); 228562306a36Sopenharmony_ci goto dma_map_error; 228662306a36Sopenharmony_ci } 228762306a36Sopenharmony_ci 228862306a36Sopenharmony_ci rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgt.sgl, 228962306a36Sopenharmony_ci smbdirect_mr->sgt.nents, NULL, PAGE_SIZE); 229062306a36Sopenharmony_ci if (rc != smbdirect_mr->sgt.nents) { 229162306a36Sopenharmony_ci log_rdma_mr(ERR, 229262306a36Sopenharmony_ci "ib_map_mr_sg failed rc = %d nents = %x\n", 229362306a36Sopenharmony_ci rc, smbdirect_mr->sgt.nents); 229462306a36Sopenharmony_ci goto map_mr_error; 229562306a36Sopenharmony_ci } 229662306a36Sopenharmony_ci 229762306a36Sopenharmony_ci ib_update_fast_reg_key(smbdirect_mr->mr, 229862306a36Sopenharmony_ci ib_inc_rkey(smbdirect_mr->mr->rkey)); 229962306a36Sopenharmony_ci reg_wr = &smbdirect_mr->wr; 230062306a36Sopenharmony_ci reg_wr->wr.opcode = IB_WR_REG_MR; 230162306a36Sopenharmony_ci smbdirect_mr->cqe.done = register_mr_done; 230262306a36Sopenharmony_ci reg_wr->wr.wr_cqe = &smbdirect_mr->cqe; 230362306a36Sopenharmony_ci reg_wr->wr.num_sge = 0; 230462306a36Sopenharmony_ci reg_wr->wr.send_flags = IB_SEND_SIGNALED; 230562306a36Sopenharmony_ci reg_wr->mr = smbdirect_mr->mr; 230662306a36Sopenharmony_ci reg_wr->key = smbdirect_mr->mr->rkey; 230762306a36Sopenharmony_ci reg_wr->access = writing ? 230862306a36Sopenharmony_ci IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 230962306a36Sopenharmony_ci IB_ACCESS_REMOTE_READ; 231062306a36Sopenharmony_ci 231162306a36Sopenharmony_ci /* 231262306a36Sopenharmony_ci * There is no need for waiting for complemtion on ib_post_send 231362306a36Sopenharmony_ci * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution 231462306a36Sopenharmony_ci * on the next ib_post_send when we actaully send I/O to remote peer 231562306a36Sopenharmony_ci */ 231662306a36Sopenharmony_ci rc = ib_post_send(info->id->qp, ®_wr->wr, NULL); 231762306a36Sopenharmony_ci if (!rc) 231862306a36Sopenharmony_ci return smbdirect_mr; 231962306a36Sopenharmony_ci 232062306a36Sopenharmony_ci log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", 232162306a36Sopenharmony_ci rc, reg_wr->key); 232262306a36Sopenharmony_ci 232362306a36Sopenharmony_ci /* If all failed, attempt to recover this MR by setting it MR_ERROR*/ 232462306a36Sopenharmony_cimap_mr_error: 232562306a36Sopenharmony_ci ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgt.sgl, 232662306a36Sopenharmony_ci smbdirect_mr->sgt.nents, smbdirect_mr->dir); 232762306a36Sopenharmony_ci 232862306a36Sopenharmony_cidma_map_error: 232962306a36Sopenharmony_ci smbdirect_mr->state = MR_ERROR; 233062306a36Sopenharmony_ci if (atomic_dec_and_test(&info->mr_used_count)) 233162306a36Sopenharmony_ci wake_up(&info->wait_for_mr_cleanup); 233262306a36Sopenharmony_ci 233362306a36Sopenharmony_ci smbd_disconnect_rdma_connection(info); 233462306a36Sopenharmony_ci 233562306a36Sopenharmony_ci return NULL; 233662306a36Sopenharmony_ci} 233762306a36Sopenharmony_ci 233862306a36Sopenharmony_cistatic void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) 233962306a36Sopenharmony_ci{ 234062306a36Sopenharmony_ci struct smbd_mr *smbdirect_mr; 234162306a36Sopenharmony_ci struct ib_cqe *cqe; 234262306a36Sopenharmony_ci 234362306a36Sopenharmony_ci cqe = wc->wr_cqe; 234462306a36Sopenharmony_ci smbdirect_mr = container_of(cqe, struct smbd_mr, cqe); 234562306a36Sopenharmony_ci smbdirect_mr->state = MR_INVALIDATED; 234662306a36Sopenharmony_ci if (wc->status != IB_WC_SUCCESS) { 234762306a36Sopenharmony_ci log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status); 234862306a36Sopenharmony_ci smbdirect_mr->state = MR_ERROR; 234962306a36Sopenharmony_ci } 235062306a36Sopenharmony_ci complete(&smbdirect_mr->invalidate_done); 235162306a36Sopenharmony_ci} 235262306a36Sopenharmony_ci 235362306a36Sopenharmony_ci/* 235462306a36Sopenharmony_ci * Deregister a MR after I/O is done 235562306a36Sopenharmony_ci * This function may wait if remote invalidation is not used 235662306a36Sopenharmony_ci * and we have to locally invalidate the buffer to prevent data is being 235762306a36Sopenharmony_ci * modified by remote peer after upper layer consumes it 235862306a36Sopenharmony_ci */ 235962306a36Sopenharmony_ciint smbd_deregister_mr(struct smbd_mr *smbdirect_mr) 236062306a36Sopenharmony_ci{ 236162306a36Sopenharmony_ci struct ib_send_wr *wr; 236262306a36Sopenharmony_ci struct smbd_connection *info = smbdirect_mr->conn; 236362306a36Sopenharmony_ci int rc = 0; 236462306a36Sopenharmony_ci 236562306a36Sopenharmony_ci if (smbdirect_mr->need_invalidate) { 236662306a36Sopenharmony_ci /* Need to finish local invalidation before returning */ 236762306a36Sopenharmony_ci wr = &smbdirect_mr->inv_wr; 236862306a36Sopenharmony_ci wr->opcode = IB_WR_LOCAL_INV; 236962306a36Sopenharmony_ci smbdirect_mr->cqe.done = local_inv_done; 237062306a36Sopenharmony_ci wr->wr_cqe = &smbdirect_mr->cqe; 237162306a36Sopenharmony_ci wr->num_sge = 0; 237262306a36Sopenharmony_ci wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey; 237362306a36Sopenharmony_ci wr->send_flags = IB_SEND_SIGNALED; 237462306a36Sopenharmony_ci 237562306a36Sopenharmony_ci init_completion(&smbdirect_mr->invalidate_done); 237662306a36Sopenharmony_ci rc = ib_post_send(info->id->qp, wr, NULL); 237762306a36Sopenharmony_ci if (rc) { 237862306a36Sopenharmony_ci log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); 237962306a36Sopenharmony_ci smbd_disconnect_rdma_connection(info); 238062306a36Sopenharmony_ci goto done; 238162306a36Sopenharmony_ci } 238262306a36Sopenharmony_ci wait_for_completion(&smbdirect_mr->invalidate_done); 238362306a36Sopenharmony_ci smbdirect_mr->need_invalidate = false; 238462306a36Sopenharmony_ci } else 238562306a36Sopenharmony_ci /* 238662306a36Sopenharmony_ci * For remote invalidation, just set it to MR_INVALIDATED 238762306a36Sopenharmony_ci * and defer to mr_recovery_work to recover the MR for next use 238862306a36Sopenharmony_ci */ 238962306a36Sopenharmony_ci smbdirect_mr->state = MR_INVALIDATED; 239062306a36Sopenharmony_ci 239162306a36Sopenharmony_ci if (smbdirect_mr->state == MR_INVALIDATED) { 239262306a36Sopenharmony_ci ib_dma_unmap_sg( 239362306a36Sopenharmony_ci info->id->device, smbdirect_mr->sgt.sgl, 239462306a36Sopenharmony_ci smbdirect_mr->sgt.nents, 239562306a36Sopenharmony_ci smbdirect_mr->dir); 239662306a36Sopenharmony_ci smbdirect_mr->state = MR_READY; 239762306a36Sopenharmony_ci if (atomic_inc_return(&info->mr_ready_count) == 1) 239862306a36Sopenharmony_ci wake_up_interruptible(&info->wait_mr); 239962306a36Sopenharmony_ci } else 240062306a36Sopenharmony_ci /* 240162306a36Sopenharmony_ci * Schedule the work to do MR recovery for future I/Os MR 240262306a36Sopenharmony_ci * recovery is slow and don't want it to block current I/O 240362306a36Sopenharmony_ci */ 240462306a36Sopenharmony_ci queue_work(info->workqueue, &info->mr_recovery_work); 240562306a36Sopenharmony_ci 240662306a36Sopenharmony_cidone: 240762306a36Sopenharmony_ci if (atomic_dec_and_test(&info->mr_used_count)) 240862306a36Sopenharmony_ci wake_up(&info->wait_for_mr_cleanup); 240962306a36Sopenharmony_ci 241062306a36Sopenharmony_ci return rc; 241162306a36Sopenharmony_ci} 241262306a36Sopenharmony_ci 241362306a36Sopenharmony_cistatic bool smb_set_sge(struct smb_extract_to_rdma *rdma, 241462306a36Sopenharmony_ci struct page *lowest_page, size_t off, size_t len) 241562306a36Sopenharmony_ci{ 241662306a36Sopenharmony_ci struct ib_sge *sge = &rdma->sge[rdma->nr_sge]; 241762306a36Sopenharmony_ci u64 addr; 241862306a36Sopenharmony_ci 241962306a36Sopenharmony_ci addr = ib_dma_map_page(rdma->device, lowest_page, 242062306a36Sopenharmony_ci off, len, rdma->direction); 242162306a36Sopenharmony_ci if (ib_dma_mapping_error(rdma->device, addr)) 242262306a36Sopenharmony_ci return false; 242362306a36Sopenharmony_ci 242462306a36Sopenharmony_ci sge->addr = addr; 242562306a36Sopenharmony_ci sge->length = len; 242662306a36Sopenharmony_ci sge->lkey = rdma->local_dma_lkey; 242762306a36Sopenharmony_ci rdma->nr_sge++; 242862306a36Sopenharmony_ci return true; 242962306a36Sopenharmony_ci} 243062306a36Sopenharmony_ci 243162306a36Sopenharmony_ci/* 243262306a36Sopenharmony_ci * Extract page fragments from a BVEC-class iterator and add them to an RDMA 243362306a36Sopenharmony_ci * element list. The pages are not pinned. 243462306a36Sopenharmony_ci */ 243562306a36Sopenharmony_cistatic ssize_t smb_extract_bvec_to_rdma(struct iov_iter *iter, 243662306a36Sopenharmony_ci struct smb_extract_to_rdma *rdma, 243762306a36Sopenharmony_ci ssize_t maxsize) 243862306a36Sopenharmony_ci{ 243962306a36Sopenharmony_ci const struct bio_vec *bv = iter->bvec; 244062306a36Sopenharmony_ci unsigned long start = iter->iov_offset; 244162306a36Sopenharmony_ci unsigned int i; 244262306a36Sopenharmony_ci ssize_t ret = 0; 244362306a36Sopenharmony_ci 244462306a36Sopenharmony_ci for (i = 0; i < iter->nr_segs; i++) { 244562306a36Sopenharmony_ci size_t off, len; 244662306a36Sopenharmony_ci 244762306a36Sopenharmony_ci len = bv[i].bv_len; 244862306a36Sopenharmony_ci if (start >= len) { 244962306a36Sopenharmony_ci start -= len; 245062306a36Sopenharmony_ci continue; 245162306a36Sopenharmony_ci } 245262306a36Sopenharmony_ci 245362306a36Sopenharmony_ci len = min_t(size_t, maxsize, len - start); 245462306a36Sopenharmony_ci off = bv[i].bv_offset + start; 245562306a36Sopenharmony_ci 245662306a36Sopenharmony_ci if (!smb_set_sge(rdma, bv[i].bv_page, off, len)) 245762306a36Sopenharmony_ci return -EIO; 245862306a36Sopenharmony_ci 245962306a36Sopenharmony_ci ret += len; 246062306a36Sopenharmony_ci maxsize -= len; 246162306a36Sopenharmony_ci if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 246262306a36Sopenharmony_ci break; 246362306a36Sopenharmony_ci start = 0; 246462306a36Sopenharmony_ci } 246562306a36Sopenharmony_ci 246662306a36Sopenharmony_ci return ret; 246762306a36Sopenharmony_ci} 246862306a36Sopenharmony_ci 246962306a36Sopenharmony_ci/* 247062306a36Sopenharmony_ci * Extract fragments from a KVEC-class iterator and add them to an RDMA list. 247162306a36Sopenharmony_ci * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers. 247262306a36Sopenharmony_ci * The pages are not pinned. 247362306a36Sopenharmony_ci */ 247462306a36Sopenharmony_cistatic ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter, 247562306a36Sopenharmony_ci struct smb_extract_to_rdma *rdma, 247662306a36Sopenharmony_ci ssize_t maxsize) 247762306a36Sopenharmony_ci{ 247862306a36Sopenharmony_ci const struct kvec *kv = iter->kvec; 247962306a36Sopenharmony_ci unsigned long start = iter->iov_offset; 248062306a36Sopenharmony_ci unsigned int i; 248162306a36Sopenharmony_ci ssize_t ret = 0; 248262306a36Sopenharmony_ci 248362306a36Sopenharmony_ci for (i = 0; i < iter->nr_segs; i++) { 248462306a36Sopenharmony_ci struct page *page; 248562306a36Sopenharmony_ci unsigned long kaddr; 248662306a36Sopenharmony_ci size_t off, len, seg; 248762306a36Sopenharmony_ci 248862306a36Sopenharmony_ci len = kv[i].iov_len; 248962306a36Sopenharmony_ci if (start >= len) { 249062306a36Sopenharmony_ci start -= len; 249162306a36Sopenharmony_ci continue; 249262306a36Sopenharmony_ci } 249362306a36Sopenharmony_ci 249462306a36Sopenharmony_ci kaddr = (unsigned long)kv[i].iov_base + start; 249562306a36Sopenharmony_ci off = kaddr & ~PAGE_MASK; 249662306a36Sopenharmony_ci len = min_t(size_t, maxsize, len - start); 249762306a36Sopenharmony_ci kaddr &= PAGE_MASK; 249862306a36Sopenharmony_ci 249962306a36Sopenharmony_ci maxsize -= len; 250062306a36Sopenharmony_ci do { 250162306a36Sopenharmony_ci seg = min_t(size_t, len, PAGE_SIZE - off); 250262306a36Sopenharmony_ci 250362306a36Sopenharmony_ci if (is_vmalloc_or_module_addr((void *)kaddr)) 250462306a36Sopenharmony_ci page = vmalloc_to_page((void *)kaddr); 250562306a36Sopenharmony_ci else 250662306a36Sopenharmony_ci page = virt_to_page((void *)kaddr); 250762306a36Sopenharmony_ci 250862306a36Sopenharmony_ci if (!smb_set_sge(rdma, page, off, seg)) 250962306a36Sopenharmony_ci return -EIO; 251062306a36Sopenharmony_ci 251162306a36Sopenharmony_ci ret += seg; 251262306a36Sopenharmony_ci len -= seg; 251362306a36Sopenharmony_ci kaddr += PAGE_SIZE; 251462306a36Sopenharmony_ci off = 0; 251562306a36Sopenharmony_ci } while (len > 0 && rdma->nr_sge < rdma->max_sge); 251662306a36Sopenharmony_ci 251762306a36Sopenharmony_ci if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 251862306a36Sopenharmony_ci break; 251962306a36Sopenharmony_ci start = 0; 252062306a36Sopenharmony_ci } 252162306a36Sopenharmony_ci 252262306a36Sopenharmony_ci return ret; 252362306a36Sopenharmony_ci} 252462306a36Sopenharmony_ci 252562306a36Sopenharmony_ci/* 252662306a36Sopenharmony_ci * Extract folio fragments from an XARRAY-class iterator and add them to an 252762306a36Sopenharmony_ci * RDMA list. The folios are not pinned. 252862306a36Sopenharmony_ci */ 252962306a36Sopenharmony_cistatic ssize_t smb_extract_xarray_to_rdma(struct iov_iter *iter, 253062306a36Sopenharmony_ci struct smb_extract_to_rdma *rdma, 253162306a36Sopenharmony_ci ssize_t maxsize) 253262306a36Sopenharmony_ci{ 253362306a36Sopenharmony_ci struct xarray *xa = iter->xarray; 253462306a36Sopenharmony_ci struct folio *folio; 253562306a36Sopenharmony_ci loff_t start = iter->xarray_start + iter->iov_offset; 253662306a36Sopenharmony_ci pgoff_t index = start / PAGE_SIZE; 253762306a36Sopenharmony_ci ssize_t ret = 0; 253862306a36Sopenharmony_ci size_t off, len; 253962306a36Sopenharmony_ci XA_STATE(xas, xa, index); 254062306a36Sopenharmony_ci 254162306a36Sopenharmony_ci rcu_read_lock(); 254262306a36Sopenharmony_ci 254362306a36Sopenharmony_ci xas_for_each(&xas, folio, ULONG_MAX) { 254462306a36Sopenharmony_ci if (xas_retry(&xas, folio)) 254562306a36Sopenharmony_ci continue; 254662306a36Sopenharmony_ci if (WARN_ON(xa_is_value(folio))) 254762306a36Sopenharmony_ci break; 254862306a36Sopenharmony_ci if (WARN_ON(folio_test_hugetlb(folio))) 254962306a36Sopenharmony_ci break; 255062306a36Sopenharmony_ci 255162306a36Sopenharmony_ci off = offset_in_folio(folio, start); 255262306a36Sopenharmony_ci len = min_t(size_t, maxsize, folio_size(folio) - off); 255362306a36Sopenharmony_ci 255462306a36Sopenharmony_ci if (!smb_set_sge(rdma, folio_page(folio, 0), off, len)) { 255562306a36Sopenharmony_ci rcu_read_unlock(); 255662306a36Sopenharmony_ci return -EIO; 255762306a36Sopenharmony_ci } 255862306a36Sopenharmony_ci 255962306a36Sopenharmony_ci maxsize -= len; 256062306a36Sopenharmony_ci ret += len; 256162306a36Sopenharmony_ci if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 256262306a36Sopenharmony_ci break; 256362306a36Sopenharmony_ci } 256462306a36Sopenharmony_ci 256562306a36Sopenharmony_ci rcu_read_unlock(); 256662306a36Sopenharmony_ci return ret; 256762306a36Sopenharmony_ci} 256862306a36Sopenharmony_ci 256962306a36Sopenharmony_ci/* 257062306a36Sopenharmony_ci * Extract page fragments from up to the given amount of the source iterator 257162306a36Sopenharmony_ci * and build up an RDMA list that refers to all of those bits. The RDMA list 257262306a36Sopenharmony_ci * is appended to, up to the maximum number of elements set in the parameter 257362306a36Sopenharmony_ci * block. 257462306a36Sopenharmony_ci * 257562306a36Sopenharmony_ci * The extracted page fragments are not pinned or ref'd in any way; if an 257662306a36Sopenharmony_ci * IOVEC/UBUF-type iterator is to be used, it should be converted to a 257762306a36Sopenharmony_ci * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some 257862306a36Sopenharmony_ci * way. 257962306a36Sopenharmony_ci */ 258062306a36Sopenharmony_cistatic ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, 258162306a36Sopenharmony_ci struct smb_extract_to_rdma *rdma) 258262306a36Sopenharmony_ci{ 258362306a36Sopenharmony_ci ssize_t ret; 258462306a36Sopenharmony_ci int before = rdma->nr_sge; 258562306a36Sopenharmony_ci 258662306a36Sopenharmony_ci switch (iov_iter_type(iter)) { 258762306a36Sopenharmony_ci case ITER_BVEC: 258862306a36Sopenharmony_ci ret = smb_extract_bvec_to_rdma(iter, rdma, len); 258962306a36Sopenharmony_ci break; 259062306a36Sopenharmony_ci case ITER_KVEC: 259162306a36Sopenharmony_ci ret = smb_extract_kvec_to_rdma(iter, rdma, len); 259262306a36Sopenharmony_ci break; 259362306a36Sopenharmony_ci case ITER_XARRAY: 259462306a36Sopenharmony_ci ret = smb_extract_xarray_to_rdma(iter, rdma, len); 259562306a36Sopenharmony_ci break; 259662306a36Sopenharmony_ci default: 259762306a36Sopenharmony_ci WARN_ON_ONCE(1); 259862306a36Sopenharmony_ci return -EIO; 259962306a36Sopenharmony_ci } 260062306a36Sopenharmony_ci 260162306a36Sopenharmony_ci if (ret > 0) { 260262306a36Sopenharmony_ci iov_iter_advance(iter, ret); 260362306a36Sopenharmony_ci } else if (ret < 0) { 260462306a36Sopenharmony_ci while (rdma->nr_sge > before) { 260562306a36Sopenharmony_ci struct ib_sge *sge = &rdma->sge[rdma->nr_sge--]; 260662306a36Sopenharmony_ci 260762306a36Sopenharmony_ci ib_dma_unmap_single(rdma->device, sge->addr, sge->length, 260862306a36Sopenharmony_ci rdma->direction); 260962306a36Sopenharmony_ci sge->addr = 0; 261062306a36Sopenharmony_ci } 261162306a36Sopenharmony_ci } 261262306a36Sopenharmony_ci 261362306a36Sopenharmony_ci return ret; 261462306a36Sopenharmony_ci} 2615