162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci#include <linux/ceph/ceph_debug.h> 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci#include <linux/module.h> 662306a36Sopenharmony_ci#include <linux/err.h> 762306a36Sopenharmony_ci#include <linux/highmem.h> 862306a36Sopenharmony_ci#include <linux/mm.h> 962306a36Sopenharmony_ci#include <linux/pagemap.h> 1062306a36Sopenharmony_ci#include <linux/slab.h> 1162306a36Sopenharmony_ci#include <linux/uaccess.h> 1262306a36Sopenharmony_ci#ifdef CONFIG_BLOCK 1362306a36Sopenharmony_ci#include <linux/bio.h> 1462306a36Sopenharmony_ci#endif 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#include <linux/ceph/ceph_features.h> 1762306a36Sopenharmony_ci#include <linux/ceph/libceph.h> 1862306a36Sopenharmony_ci#include <linux/ceph/osd_client.h> 1962306a36Sopenharmony_ci#include <linux/ceph/messenger.h> 2062306a36Sopenharmony_ci#include <linux/ceph/decode.h> 2162306a36Sopenharmony_ci#include <linux/ceph/auth.h> 2262306a36Sopenharmony_ci#include <linux/ceph/pagelist.h> 2362306a36Sopenharmony_ci#include <linux/ceph/striper.h> 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#define OSD_OPREPLY_FRONT_LEN 512 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_cistatic struct kmem_cache *ceph_osd_request_cache; 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_cistatic const struct ceph_connection_operations osd_con_ops; 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci/* 3262306a36Sopenharmony_ci * Implement client access to distributed object storage cluster. 3362306a36Sopenharmony_ci * 3462306a36Sopenharmony_ci * All data objects are stored within a cluster/cloud of OSDs, or 3562306a36Sopenharmony_ci * "object storage devices." (Note that Ceph OSDs have _nothing_ to 3662306a36Sopenharmony_ci * do with the T10 OSD extensions to SCSI.) Ceph OSDs are simply 3762306a36Sopenharmony_ci * remote daemons serving up and coordinating consistent and safe 3862306a36Sopenharmony_ci * access to storage. 3962306a36Sopenharmony_ci * 4062306a36Sopenharmony_ci * Cluster membership and the mapping of data objects onto storage devices 4162306a36Sopenharmony_ci * are described by the osd map. 4262306a36Sopenharmony_ci * 4362306a36Sopenharmony_ci * We keep track of pending OSD requests (read, write), resubmit 4462306a36Sopenharmony_ci * requests to different OSDs when the cluster topology/data layout 4562306a36Sopenharmony_ci * change, or retry the affected requests when the communications 4662306a36Sopenharmony_ci * channel with an OSD is reset. 4762306a36Sopenharmony_ci */ 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_cistatic void link_request(struct ceph_osd *osd, struct ceph_osd_request *req); 5062306a36Sopenharmony_cistatic void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req); 5162306a36Sopenharmony_cistatic void link_linger(struct ceph_osd *osd, 5262306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq); 5362306a36Sopenharmony_cistatic void unlink_linger(struct ceph_osd *osd, 5462306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq); 5562306a36Sopenharmony_cistatic void clear_backoffs(struct ceph_osd *osd); 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci#if 1 5862306a36Sopenharmony_cistatic inline bool rwsem_is_wrlocked(struct rw_semaphore *sem) 5962306a36Sopenharmony_ci{ 6062306a36Sopenharmony_ci bool wrlocked = true; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci if (unlikely(down_read_trylock(sem))) { 6362306a36Sopenharmony_ci wrlocked = false; 6462306a36Sopenharmony_ci up_read(sem); 6562306a36Sopenharmony_ci } 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci return wrlocked; 6862306a36Sopenharmony_ci} 6962306a36Sopenharmony_cistatic inline void verify_osdc_locked(struct ceph_osd_client *osdc) 7062306a36Sopenharmony_ci{ 7162306a36Sopenharmony_ci WARN_ON(!rwsem_is_locked(&osdc->lock)); 7262306a36Sopenharmony_ci} 7362306a36Sopenharmony_cistatic inline void verify_osdc_wrlocked(struct ceph_osd_client *osdc) 7462306a36Sopenharmony_ci{ 7562306a36Sopenharmony_ci WARN_ON(!rwsem_is_wrlocked(&osdc->lock)); 7662306a36Sopenharmony_ci} 7762306a36Sopenharmony_cistatic inline void verify_osd_locked(struct ceph_osd *osd) 7862306a36Sopenharmony_ci{ 7962306a36Sopenharmony_ci struct ceph_osd_client *osdc = osd->o_osdc; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci WARN_ON(!(mutex_is_locked(&osd->lock) && 8262306a36Sopenharmony_ci rwsem_is_locked(&osdc->lock)) && 8362306a36Sopenharmony_ci !rwsem_is_wrlocked(&osdc->lock)); 8462306a36Sopenharmony_ci} 8562306a36Sopenharmony_cistatic inline void verify_lreq_locked(struct ceph_osd_linger_request *lreq) 8662306a36Sopenharmony_ci{ 8762306a36Sopenharmony_ci WARN_ON(!mutex_is_locked(&lreq->lock)); 8862306a36Sopenharmony_ci} 8962306a36Sopenharmony_ci#else 9062306a36Sopenharmony_cistatic inline void verify_osdc_locked(struct ceph_osd_client *osdc) { } 9162306a36Sopenharmony_cistatic inline void verify_osdc_wrlocked(struct ceph_osd_client *osdc) { } 9262306a36Sopenharmony_cistatic inline void verify_osd_locked(struct ceph_osd *osd) { } 9362306a36Sopenharmony_cistatic inline void verify_lreq_locked(struct ceph_osd_linger_request *lreq) { } 9462306a36Sopenharmony_ci#endif 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci/* 9762306a36Sopenharmony_ci * calculate the mapping of a file extent onto an object, and fill out the 9862306a36Sopenharmony_ci * request accordingly. shorten extent as necessary if it crosses an 9962306a36Sopenharmony_ci * object boundary. 10062306a36Sopenharmony_ci * 10162306a36Sopenharmony_ci * fill osd op in request message. 10262306a36Sopenharmony_ci */ 10362306a36Sopenharmony_cistatic int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen, 10462306a36Sopenharmony_ci u64 *objnum, u64 *objoff, u64 *objlen) 10562306a36Sopenharmony_ci{ 10662306a36Sopenharmony_ci u64 orig_len = *plen; 10762306a36Sopenharmony_ci u32 xlen; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci /* object extent? */ 11062306a36Sopenharmony_ci ceph_calc_file_object_mapping(layout, off, orig_len, objnum, 11162306a36Sopenharmony_ci objoff, &xlen); 11262306a36Sopenharmony_ci *objlen = xlen; 11362306a36Sopenharmony_ci if (*objlen < orig_len) { 11462306a36Sopenharmony_ci *plen = *objlen; 11562306a36Sopenharmony_ci dout(" skipping last %llu, final file extent %llu~%llu\n", 11662306a36Sopenharmony_ci orig_len - *plen, off, *plen); 11762306a36Sopenharmony_ci } 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen); 12062306a36Sopenharmony_ci return 0; 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_cistatic void ceph_osd_data_init(struct ceph_osd_data *osd_data) 12462306a36Sopenharmony_ci{ 12562306a36Sopenharmony_ci memset(osd_data, 0, sizeof (*osd_data)); 12662306a36Sopenharmony_ci osd_data->type = CEPH_OSD_DATA_TYPE_NONE; 12762306a36Sopenharmony_ci} 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci/* 13062306a36Sopenharmony_ci * Consumes @pages if @own_pages is true. 13162306a36Sopenharmony_ci */ 13262306a36Sopenharmony_cistatic void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data, 13362306a36Sopenharmony_ci struct page **pages, u64 length, u32 alignment, 13462306a36Sopenharmony_ci bool pages_from_pool, bool own_pages) 13562306a36Sopenharmony_ci{ 13662306a36Sopenharmony_ci osd_data->type = CEPH_OSD_DATA_TYPE_PAGES; 13762306a36Sopenharmony_ci osd_data->pages = pages; 13862306a36Sopenharmony_ci osd_data->length = length; 13962306a36Sopenharmony_ci osd_data->alignment = alignment; 14062306a36Sopenharmony_ci osd_data->pages_from_pool = pages_from_pool; 14162306a36Sopenharmony_ci osd_data->own_pages = own_pages; 14262306a36Sopenharmony_ci} 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci/* 14562306a36Sopenharmony_ci * Consumes a ref on @pagelist. 14662306a36Sopenharmony_ci */ 14762306a36Sopenharmony_cistatic void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data, 14862306a36Sopenharmony_ci struct ceph_pagelist *pagelist) 14962306a36Sopenharmony_ci{ 15062306a36Sopenharmony_ci osd_data->type = CEPH_OSD_DATA_TYPE_PAGELIST; 15162306a36Sopenharmony_ci osd_data->pagelist = pagelist; 15262306a36Sopenharmony_ci} 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci#ifdef CONFIG_BLOCK 15562306a36Sopenharmony_cistatic void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data, 15662306a36Sopenharmony_ci struct ceph_bio_iter *bio_pos, 15762306a36Sopenharmony_ci u32 bio_length) 15862306a36Sopenharmony_ci{ 15962306a36Sopenharmony_ci osd_data->type = CEPH_OSD_DATA_TYPE_BIO; 16062306a36Sopenharmony_ci osd_data->bio_pos = *bio_pos; 16162306a36Sopenharmony_ci osd_data->bio_length = bio_length; 16262306a36Sopenharmony_ci} 16362306a36Sopenharmony_ci#endif /* CONFIG_BLOCK */ 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_cistatic void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data, 16662306a36Sopenharmony_ci struct ceph_bvec_iter *bvec_pos, 16762306a36Sopenharmony_ci u32 num_bvecs) 16862306a36Sopenharmony_ci{ 16962306a36Sopenharmony_ci osd_data->type = CEPH_OSD_DATA_TYPE_BVECS; 17062306a36Sopenharmony_ci osd_data->bvec_pos = *bvec_pos; 17162306a36Sopenharmony_ci osd_data->num_bvecs = num_bvecs; 17262306a36Sopenharmony_ci} 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_cistatic void ceph_osd_iter_init(struct ceph_osd_data *osd_data, 17562306a36Sopenharmony_ci struct iov_iter *iter) 17662306a36Sopenharmony_ci{ 17762306a36Sopenharmony_ci osd_data->type = CEPH_OSD_DATA_TYPE_ITER; 17862306a36Sopenharmony_ci osd_data->iter = *iter; 17962306a36Sopenharmony_ci} 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_cistatic struct ceph_osd_data * 18262306a36Sopenharmony_ciosd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which) 18362306a36Sopenharmony_ci{ 18462306a36Sopenharmony_ci BUG_ON(which >= osd_req->r_num_ops); 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci return &osd_req->r_ops[which].raw_data_in; 18762306a36Sopenharmony_ci} 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_cistruct ceph_osd_data * 19062306a36Sopenharmony_ciosd_req_op_extent_osd_data(struct ceph_osd_request *osd_req, 19162306a36Sopenharmony_ci unsigned int which) 19262306a36Sopenharmony_ci{ 19362306a36Sopenharmony_ci return osd_req_op_data(osd_req, which, extent, osd_data); 19462306a36Sopenharmony_ci} 19562306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_extent_osd_data); 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_civoid osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req, 19862306a36Sopenharmony_ci unsigned int which, struct page **pages, 19962306a36Sopenharmony_ci u64 length, u32 alignment, 20062306a36Sopenharmony_ci bool pages_from_pool, bool own_pages) 20162306a36Sopenharmony_ci{ 20262306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci osd_data = osd_req_op_raw_data_in(osd_req, which); 20562306a36Sopenharmony_ci ceph_osd_data_pages_init(osd_data, pages, length, alignment, 20662306a36Sopenharmony_ci pages_from_pool, own_pages); 20762306a36Sopenharmony_ci} 20862306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_raw_data_in_pages); 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_civoid osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req, 21162306a36Sopenharmony_ci unsigned int which, struct page **pages, 21262306a36Sopenharmony_ci u64 length, u32 alignment, 21362306a36Sopenharmony_ci bool pages_from_pool, bool own_pages) 21462306a36Sopenharmony_ci{ 21562306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci osd_data = osd_req_op_data(osd_req, which, extent, osd_data); 21862306a36Sopenharmony_ci ceph_osd_data_pages_init(osd_data, pages, length, alignment, 21962306a36Sopenharmony_ci pages_from_pool, own_pages); 22062306a36Sopenharmony_ci} 22162306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_extent_osd_data_pages); 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_civoid osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req, 22462306a36Sopenharmony_ci unsigned int which, struct ceph_pagelist *pagelist) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci osd_data = osd_req_op_data(osd_req, which, extent, osd_data); 22962306a36Sopenharmony_ci ceph_osd_data_pagelist_init(osd_data, pagelist); 23062306a36Sopenharmony_ci} 23162306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist); 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci#ifdef CONFIG_BLOCK 23462306a36Sopenharmony_civoid osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, 23562306a36Sopenharmony_ci unsigned int which, 23662306a36Sopenharmony_ci struct ceph_bio_iter *bio_pos, 23762306a36Sopenharmony_ci u32 bio_length) 23862306a36Sopenharmony_ci{ 23962306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci osd_data = osd_req_op_data(osd_req, which, extent, osd_data); 24262306a36Sopenharmony_ci ceph_osd_data_bio_init(osd_data, bio_pos, bio_length); 24362306a36Sopenharmony_ci} 24462306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_extent_osd_data_bio); 24562306a36Sopenharmony_ci#endif /* CONFIG_BLOCK */ 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_civoid osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, 24862306a36Sopenharmony_ci unsigned int which, 24962306a36Sopenharmony_ci struct bio_vec *bvecs, u32 num_bvecs, 25062306a36Sopenharmony_ci u32 bytes) 25162306a36Sopenharmony_ci{ 25262306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 25362306a36Sopenharmony_ci struct ceph_bvec_iter it = { 25462306a36Sopenharmony_ci .bvecs = bvecs, 25562306a36Sopenharmony_ci .iter = { .bi_size = bytes }, 25662306a36Sopenharmony_ci }; 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci osd_data = osd_req_op_data(osd_req, which, extent, osd_data); 25962306a36Sopenharmony_ci ceph_osd_data_bvecs_init(osd_data, &it, num_bvecs); 26062306a36Sopenharmony_ci} 26162306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_extent_osd_data_bvecs); 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_civoid osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, 26462306a36Sopenharmony_ci unsigned int which, 26562306a36Sopenharmony_ci struct ceph_bvec_iter *bvec_pos) 26662306a36Sopenharmony_ci{ 26762306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci osd_data = osd_req_op_data(osd_req, which, extent, osd_data); 27062306a36Sopenharmony_ci ceph_osd_data_bvecs_init(osd_data, bvec_pos, 0); 27162306a36Sopenharmony_ci} 27262306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos); 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci/** 27562306a36Sopenharmony_ci * osd_req_op_extent_osd_iter - Set up an operation with an iterator buffer 27662306a36Sopenharmony_ci * @osd_req: The request to set up 27762306a36Sopenharmony_ci * @which: Index of the operation in which to set the iter 27862306a36Sopenharmony_ci * @iter: The buffer iterator 27962306a36Sopenharmony_ci */ 28062306a36Sopenharmony_civoid osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, 28162306a36Sopenharmony_ci unsigned int which, struct iov_iter *iter) 28262306a36Sopenharmony_ci{ 28362306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci osd_data = osd_req_op_data(osd_req, which, extent, osd_data); 28662306a36Sopenharmony_ci ceph_osd_iter_init(osd_data, iter); 28762306a36Sopenharmony_ci} 28862306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_extent_osd_iter); 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_cistatic void osd_req_op_cls_request_info_pagelist( 29162306a36Sopenharmony_ci struct ceph_osd_request *osd_req, 29262306a36Sopenharmony_ci unsigned int which, struct ceph_pagelist *pagelist) 29362306a36Sopenharmony_ci{ 29462306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci osd_data = osd_req_op_data(osd_req, which, cls, request_info); 29762306a36Sopenharmony_ci ceph_osd_data_pagelist_init(osd_data, pagelist); 29862306a36Sopenharmony_ci} 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_civoid osd_req_op_cls_request_data_pagelist( 30162306a36Sopenharmony_ci struct ceph_osd_request *osd_req, 30262306a36Sopenharmony_ci unsigned int which, struct ceph_pagelist *pagelist) 30362306a36Sopenharmony_ci{ 30462306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci osd_data = osd_req_op_data(osd_req, which, cls, request_data); 30762306a36Sopenharmony_ci ceph_osd_data_pagelist_init(osd_data, pagelist); 30862306a36Sopenharmony_ci osd_req->r_ops[which].cls.indata_len += pagelist->length; 30962306a36Sopenharmony_ci osd_req->r_ops[which].indata_len += pagelist->length; 31062306a36Sopenharmony_ci} 31162306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist); 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_civoid osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req, 31462306a36Sopenharmony_ci unsigned int which, struct page **pages, u64 length, 31562306a36Sopenharmony_ci u32 alignment, bool pages_from_pool, bool own_pages) 31662306a36Sopenharmony_ci{ 31762306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci osd_data = osd_req_op_data(osd_req, which, cls, request_data); 32062306a36Sopenharmony_ci ceph_osd_data_pages_init(osd_data, pages, length, alignment, 32162306a36Sopenharmony_ci pages_from_pool, own_pages); 32262306a36Sopenharmony_ci osd_req->r_ops[which].cls.indata_len += length; 32362306a36Sopenharmony_ci osd_req->r_ops[which].indata_len += length; 32462306a36Sopenharmony_ci} 32562306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_cls_request_data_pages); 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_civoid osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, 32862306a36Sopenharmony_ci unsigned int which, 32962306a36Sopenharmony_ci struct bio_vec *bvecs, u32 num_bvecs, 33062306a36Sopenharmony_ci u32 bytes) 33162306a36Sopenharmony_ci{ 33262306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 33362306a36Sopenharmony_ci struct ceph_bvec_iter it = { 33462306a36Sopenharmony_ci .bvecs = bvecs, 33562306a36Sopenharmony_ci .iter = { .bi_size = bytes }, 33662306a36Sopenharmony_ci }; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci osd_data = osd_req_op_data(osd_req, which, cls, request_data); 33962306a36Sopenharmony_ci ceph_osd_data_bvecs_init(osd_data, &it, num_bvecs); 34062306a36Sopenharmony_ci osd_req->r_ops[which].cls.indata_len += bytes; 34162306a36Sopenharmony_ci osd_req->r_ops[which].indata_len += bytes; 34262306a36Sopenharmony_ci} 34362306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_cls_request_data_bvecs); 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_civoid osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req, 34662306a36Sopenharmony_ci unsigned int which, struct page **pages, u64 length, 34762306a36Sopenharmony_ci u32 alignment, bool pages_from_pool, bool own_pages) 34862306a36Sopenharmony_ci{ 34962306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci osd_data = osd_req_op_data(osd_req, which, cls, response_data); 35262306a36Sopenharmony_ci ceph_osd_data_pages_init(osd_data, pages, length, alignment, 35362306a36Sopenharmony_ci pages_from_pool, own_pages); 35462306a36Sopenharmony_ci} 35562306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_cls_response_data_pages); 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_cistatic u64 ceph_osd_data_length(struct ceph_osd_data *osd_data) 35862306a36Sopenharmony_ci{ 35962306a36Sopenharmony_ci switch (osd_data->type) { 36062306a36Sopenharmony_ci case CEPH_OSD_DATA_TYPE_NONE: 36162306a36Sopenharmony_ci return 0; 36262306a36Sopenharmony_ci case CEPH_OSD_DATA_TYPE_PAGES: 36362306a36Sopenharmony_ci return osd_data->length; 36462306a36Sopenharmony_ci case CEPH_OSD_DATA_TYPE_PAGELIST: 36562306a36Sopenharmony_ci return (u64)osd_data->pagelist->length; 36662306a36Sopenharmony_ci#ifdef CONFIG_BLOCK 36762306a36Sopenharmony_ci case CEPH_OSD_DATA_TYPE_BIO: 36862306a36Sopenharmony_ci return (u64)osd_data->bio_length; 36962306a36Sopenharmony_ci#endif /* CONFIG_BLOCK */ 37062306a36Sopenharmony_ci case CEPH_OSD_DATA_TYPE_BVECS: 37162306a36Sopenharmony_ci return osd_data->bvec_pos.iter.bi_size; 37262306a36Sopenharmony_ci case CEPH_OSD_DATA_TYPE_ITER: 37362306a36Sopenharmony_ci return iov_iter_count(&osd_data->iter); 37462306a36Sopenharmony_ci default: 37562306a36Sopenharmony_ci WARN(true, "unrecognized data type %d\n", (int)osd_data->type); 37662306a36Sopenharmony_ci return 0; 37762306a36Sopenharmony_ci } 37862306a36Sopenharmony_ci} 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_cistatic void ceph_osd_data_release(struct ceph_osd_data *osd_data) 38162306a36Sopenharmony_ci{ 38262306a36Sopenharmony_ci if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES && osd_data->own_pages) { 38362306a36Sopenharmony_ci int num_pages; 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci num_pages = calc_pages_for((u64)osd_data->alignment, 38662306a36Sopenharmony_ci (u64)osd_data->length); 38762306a36Sopenharmony_ci ceph_release_page_vector(osd_data->pages, num_pages); 38862306a36Sopenharmony_ci } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) { 38962306a36Sopenharmony_ci ceph_pagelist_release(osd_data->pagelist); 39062306a36Sopenharmony_ci } 39162306a36Sopenharmony_ci ceph_osd_data_init(osd_data); 39262306a36Sopenharmony_ci} 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_cistatic void osd_req_op_data_release(struct ceph_osd_request *osd_req, 39562306a36Sopenharmony_ci unsigned int which) 39662306a36Sopenharmony_ci{ 39762306a36Sopenharmony_ci struct ceph_osd_req_op *op; 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci BUG_ON(which >= osd_req->r_num_ops); 40062306a36Sopenharmony_ci op = &osd_req->r_ops[which]; 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci switch (op->op) { 40362306a36Sopenharmony_ci case CEPH_OSD_OP_READ: 40462306a36Sopenharmony_ci case CEPH_OSD_OP_SPARSE_READ: 40562306a36Sopenharmony_ci case CEPH_OSD_OP_WRITE: 40662306a36Sopenharmony_ci case CEPH_OSD_OP_WRITEFULL: 40762306a36Sopenharmony_ci kfree(op->extent.sparse_ext); 40862306a36Sopenharmony_ci ceph_osd_data_release(&op->extent.osd_data); 40962306a36Sopenharmony_ci break; 41062306a36Sopenharmony_ci case CEPH_OSD_OP_CALL: 41162306a36Sopenharmony_ci ceph_osd_data_release(&op->cls.request_info); 41262306a36Sopenharmony_ci ceph_osd_data_release(&op->cls.request_data); 41362306a36Sopenharmony_ci ceph_osd_data_release(&op->cls.response_data); 41462306a36Sopenharmony_ci break; 41562306a36Sopenharmony_ci case CEPH_OSD_OP_SETXATTR: 41662306a36Sopenharmony_ci case CEPH_OSD_OP_CMPXATTR: 41762306a36Sopenharmony_ci ceph_osd_data_release(&op->xattr.osd_data); 41862306a36Sopenharmony_ci break; 41962306a36Sopenharmony_ci case CEPH_OSD_OP_STAT: 42062306a36Sopenharmony_ci ceph_osd_data_release(&op->raw_data_in); 42162306a36Sopenharmony_ci break; 42262306a36Sopenharmony_ci case CEPH_OSD_OP_NOTIFY_ACK: 42362306a36Sopenharmony_ci ceph_osd_data_release(&op->notify_ack.request_data); 42462306a36Sopenharmony_ci break; 42562306a36Sopenharmony_ci case CEPH_OSD_OP_NOTIFY: 42662306a36Sopenharmony_ci ceph_osd_data_release(&op->notify.request_data); 42762306a36Sopenharmony_ci ceph_osd_data_release(&op->notify.response_data); 42862306a36Sopenharmony_ci break; 42962306a36Sopenharmony_ci case CEPH_OSD_OP_LIST_WATCHERS: 43062306a36Sopenharmony_ci ceph_osd_data_release(&op->list_watchers.response_data); 43162306a36Sopenharmony_ci break; 43262306a36Sopenharmony_ci case CEPH_OSD_OP_COPY_FROM2: 43362306a36Sopenharmony_ci ceph_osd_data_release(&op->copy_from.osd_data); 43462306a36Sopenharmony_ci break; 43562306a36Sopenharmony_ci default: 43662306a36Sopenharmony_ci break; 43762306a36Sopenharmony_ci } 43862306a36Sopenharmony_ci} 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci/* 44162306a36Sopenharmony_ci * Assumes @t is zero-initialized. 44262306a36Sopenharmony_ci */ 44362306a36Sopenharmony_cistatic void target_init(struct ceph_osd_request_target *t) 44462306a36Sopenharmony_ci{ 44562306a36Sopenharmony_ci ceph_oid_init(&t->base_oid); 44662306a36Sopenharmony_ci ceph_oloc_init(&t->base_oloc); 44762306a36Sopenharmony_ci ceph_oid_init(&t->target_oid); 44862306a36Sopenharmony_ci ceph_oloc_init(&t->target_oloc); 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci ceph_osds_init(&t->acting); 45162306a36Sopenharmony_ci ceph_osds_init(&t->up); 45262306a36Sopenharmony_ci t->size = -1; 45362306a36Sopenharmony_ci t->min_size = -1; 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci t->osd = CEPH_HOMELESS_OSD; 45662306a36Sopenharmony_ci} 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_cistatic void target_copy(struct ceph_osd_request_target *dest, 45962306a36Sopenharmony_ci const struct ceph_osd_request_target *src) 46062306a36Sopenharmony_ci{ 46162306a36Sopenharmony_ci ceph_oid_copy(&dest->base_oid, &src->base_oid); 46262306a36Sopenharmony_ci ceph_oloc_copy(&dest->base_oloc, &src->base_oloc); 46362306a36Sopenharmony_ci ceph_oid_copy(&dest->target_oid, &src->target_oid); 46462306a36Sopenharmony_ci ceph_oloc_copy(&dest->target_oloc, &src->target_oloc); 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci dest->pgid = src->pgid; /* struct */ 46762306a36Sopenharmony_ci dest->spgid = src->spgid; /* struct */ 46862306a36Sopenharmony_ci dest->pg_num = src->pg_num; 46962306a36Sopenharmony_ci dest->pg_num_mask = src->pg_num_mask; 47062306a36Sopenharmony_ci ceph_osds_copy(&dest->acting, &src->acting); 47162306a36Sopenharmony_ci ceph_osds_copy(&dest->up, &src->up); 47262306a36Sopenharmony_ci dest->size = src->size; 47362306a36Sopenharmony_ci dest->min_size = src->min_size; 47462306a36Sopenharmony_ci dest->sort_bitwise = src->sort_bitwise; 47562306a36Sopenharmony_ci dest->recovery_deletes = src->recovery_deletes; 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci dest->flags = src->flags; 47862306a36Sopenharmony_ci dest->used_replica = src->used_replica; 47962306a36Sopenharmony_ci dest->paused = src->paused; 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci dest->epoch = src->epoch; 48262306a36Sopenharmony_ci dest->last_force_resend = src->last_force_resend; 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci dest->osd = src->osd; 48562306a36Sopenharmony_ci} 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_cistatic void target_destroy(struct ceph_osd_request_target *t) 48862306a36Sopenharmony_ci{ 48962306a36Sopenharmony_ci ceph_oid_destroy(&t->base_oid); 49062306a36Sopenharmony_ci ceph_oloc_destroy(&t->base_oloc); 49162306a36Sopenharmony_ci ceph_oid_destroy(&t->target_oid); 49262306a36Sopenharmony_ci ceph_oloc_destroy(&t->target_oloc); 49362306a36Sopenharmony_ci} 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci/* 49662306a36Sopenharmony_ci * requests 49762306a36Sopenharmony_ci */ 49862306a36Sopenharmony_cistatic void request_release_checks(struct ceph_osd_request *req) 49962306a36Sopenharmony_ci{ 50062306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_NODE(&req->r_node)); 50162306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_NODE(&req->r_mc_node)); 50262306a36Sopenharmony_ci WARN_ON(!list_empty(&req->r_private_item)); 50362306a36Sopenharmony_ci WARN_ON(req->r_osd); 50462306a36Sopenharmony_ci} 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_cistatic void ceph_osdc_release_request(struct kref *kref) 50762306a36Sopenharmony_ci{ 50862306a36Sopenharmony_ci struct ceph_osd_request *req = container_of(kref, 50962306a36Sopenharmony_ci struct ceph_osd_request, r_kref); 51062306a36Sopenharmony_ci unsigned int which; 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci dout("%s %p (r_request %p r_reply %p)\n", __func__, req, 51362306a36Sopenharmony_ci req->r_request, req->r_reply); 51462306a36Sopenharmony_ci request_release_checks(req); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci if (req->r_request) 51762306a36Sopenharmony_ci ceph_msg_put(req->r_request); 51862306a36Sopenharmony_ci if (req->r_reply) 51962306a36Sopenharmony_ci ceph_msg_put(req->r_reply); 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci for (which = 0; which < req->r_num_ops; which++) 52262306a36Sopenharmony_ci osd_req_op_data_release(req, which); 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci target_destroy(&req->r_t); 52562306a36Sopenharmony_ci ceph_put_snap_context(req->r_snapc); 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci if (req->r_mempool) 52862306a36Sopenharmony_ci mempool_free(req, req->r_osdc->req_mempool); 52962306a36Sopenharmony_ci else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS) 53062306a36Sopenharmony_ci kmem_cache_free(ceph_osd_request_cache, req); 53162306a36Sopenharmony_ci else 53262306a36Sopenharmony_ci kfree(req); 53362306a36Sopenharmony_ci} 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_civoid ceph_osdc_get_request(struct ceph_osd_request *req) 53662306a36Sopenharmony_ci{ 53762306a36Sopenharmony_ci dout("%s %p (was %d)\n", __func__, req, 53862306a36Sopenharmony_ci kref_read(&req->r_kref)); 53962306a36Sopenharmony_ci kref_get(&req->r_kref); 54062306a36Sopenharmony_ci} 54162306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_get_request); 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_civoid ceph_osdc_put_request(struct ceph_osd_request *req) 54462306a36Sopenharmony_ci{ 54562306a36Sopenharmony_ci if (req) { 54662306a36Sopenharmony_ci dout("%s %p (was %d)\n", __func__, req, 54762306a36Sopenharmony_ci kref_read(&req->r_kref)); 54862306a36Sopenharmony_ci kref_put(&req->r_kref, ceph_osdc_release_request); 54962306a36Sopenharmony_ci } 55062306a36Sopenharmony_ci} 55162306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_put_request); 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_cistatic void request_init(struct ceph_osd_request *req) 55462306a36Sopenharmony_ci{ 55562306a36Sopenharmony_ci /* req only, each op is zeroed in osd_req_op_init() */ 55662306a36Sopenharmony_ci memset(req, 0, sizeof(*req)); 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci kref_init(&req->r_kref); 55962306a36Sopenharmony_ci init_completion(&req->r_completion); 56062306a36Sopenharmony_ci RB_CLEAR_NODE(&req->r_node); 56162306a36Sopenharmony_ci RB_CLEAR_NODE(&req->r_mc_node); 56262306a36Sopenharmony_ci INIT_LIST_HEAD(&req->r_private_item); 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci target_init(&req->r_t); 56562306a36Sopenharmony_ci} 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_cistruct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, 56862306a36Sopenharmony_ci struct ceph_snap_context *snapc, 56962306a36Sopenharmony_ci unsigned int num_ops, 57062306a36Sopenharmony_ci bool use_mempool, 57162306a36Sopenharmony_ci gfp_t gfp_flags) 57262306a36Sopenharmony_ci{ 57362306a36Sopenharmony_ci struct ceph_osd_request *req; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci if (use_mempool) { 57662306a36Sopenharmony_ci BUG_ON(num_ops > CEPH_OSD_SLAB_OPS); 57762306a36Sopenharmony_ci req = mempool_alloc(osdc->req_mempool, gfp_flags); 57862306a36Sopenharmony_ci } else if (num_ops <= CEPH_OSD_SLAB_OPS) { 57962306a36Sopenharmony_ci req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags); 58062306a36Sopenharmony_ci } else { 58162306a36Sopenharmony_ci BUG_ON(num_ops > CEPH_OSD_MAX_OPS); 58262306a36Sopenharmony_ci req = kmalloc(struct_size(req, r_ops, num_ops), gfp_flags); 58362306a36Sopenharmony_ci } 58462306a36Sopenharmony_ci if (unlikely(!req)) 58562306a36Sopenharmony_ci return NULL; 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci request_init(req); 58862306a36Sopenharmony_ci req->r_osdc = osdc; 58962306a36Sopenharmony_ci req->r_mempool = use_mempool; 59062306a36Sopenharmony_ci req->r_num_ops = num_ops; 59162306a36Sopenharmony_ci req->r_snapid = CEPH_NOSNAP; 59262306a36Sopenharmony_ci req->r_snapc = ceph_get_snap_context(snapc); 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_ci dout("%s req %p\n", __func__, req); 59562306a36Sopenharmony_ci return req; 59662306a36Sopenharmony_ci} 59762306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_alloc_request); 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_cistatic int ceph_oloc_encoding_size(const struct ceph_object_locator *oloc) 60062306a36Sopenharmony_ci{ 60162306a36Sopenharmony_ci return 8 + 4 + 4 + 4 + (oloc->pool_ns ? oloc->pool_ns->len : 0); 60262306a36Sopenharmony_ci} 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_cistatic int __ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp, 60562306a36Sopenharmony_ci int num_request_data_items, 60662306a36Sopenharmony_ci int num_reply_data_items) 60762306a36Sopenharmony_ci{ 60862306a36Sopenharmony_ci struct ceph_osd_client *osdc = req->r_osdc; 60962306a36Sopenharmony_ci struct ceph_msg *msg; 61062306a36Sopenharmony_ci int msg_size; 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci WARN_ON(req->r_request || req->r_reply); 61362306a36Sopenharmony_ci WARN_ON(ceph_oid_empty(&req->r_base_oid)); 61462306a36Sopenharmony_ci WARN_ON(ceph_oloc_empty(&req->r_base_oloc)); 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci /* create request message */ 61762306a36Sopenharmony_ci msg_size = CEPH_ENCODING_START_BLK_LEN + 61862306a36Sopenharmony_ci CEPH_PGID_ENCODING_LEN + 1; /* spgid */ 61962306a36Sopenharmony_ci msg_size += 4 + 4 + 4; /* hash, osdmap_epoch, flags */ 62062306a36Sopenharmony_ci msg_size += CEPH_ENCODING_START_BLK_LEN + 62162306a36Sopenharmony_ci sizeof(struct ceph_osd_reqid); /* reqid */ 62262306a36Sopenharmony_ci msg_size += sizeof(struct ceph_blkin_trace_info); /* trace */ 62362306a36Sopenharmony_ci msg_size += 4 + sizeof(struct ceph_timespec); /* client_inc, mtime */ 62462306a36Sopenharmony_ci msg_size += CEPH_ENCODING_START_BLK_LEN + 62562306a36Sopenharmony_ci ceph_oloc_encoding_size(&req->r_base_oloc); /* oloc */ 62662306a36Sopenharmony_ci msg_size += 4 + req->r_base_oid.name_len; /* oid */ 62762306a36Sopenharmony_ci msg_size += 2 + req->r_num_ops * sizeof(struct ceph_osd_op); 62862306a36Sopenharmony_ci msg_size += 8; /* snapid */ 62962306a36Sopenharmony_ci msg_size += 8; /* snap_seq */ 63062306a36Sopenharmony_ci msg_size += 4 + 8 * (req->r_snapc ? req->r_snapc->num_snaps : 0); 63162306a36Sopenharmony_ci msg_size += 4 + 8; /* retry_attempt, features */ 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci if (req->r_mempool) 63462306a36Sopenharmony_ci msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size, 63562306a36Sopenharmony_ci num_request_data_items); 63662306a36Sopenharmony_ci else 63762306a36Sopenharmony_ci msg = ceph_msg_new2(CEPH_MSG_OSD_OP, msg_size, 63862306a36Sopenharmony_ci num_request_data_items, gfp, true); 63962306a36Sopenharmony_ci if (!msg) 64062306a36Sopenharmony_ci return -ENOMEM; 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci memset(msg->front.iov_base, 0, msg->front.iov_len); 64362306a36Sopenharmony_ci req->r_request = msg; 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci /* create reply message */ 64662306a36Sopenharmony_ci msg_size = OSD_OPREPLY_FRONT_LEN; 64762306a36Sopenharmony_ci msg_size += req->r_base_oid.name_len; 64862306a36Sopenharmony_ci msg_size += req->r_num_ops * sizeof(struct ceph_osd_op); 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci if (req->r_mempool) 65162306a36Sopenharmony_ci msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size, 65262306a36Sopenharmony_ci num_reply_data_items); 65362306a36Sopenharmony_ci else 65462306a36Sopenharmony_ci msg = ceph_msg_new2(CEPH_MSG_OSD_OPREPLY, msg_size, 65562306a36Sopenharmony_ci num_reply_data_items, gfp, true); 65662306a36Sopenharmony_ci if (!msg) 65762306a36Sopenharmony_ci return -ENOMEM; 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci req->r_reply = msg; 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci return 0; 66262306a36Sopenharmony_ci} 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_cistatic bool osd_req_opcode_valid(u16 opcode) 66562306a36Sopenharmony_ci{ 66662306a36Sopenharmony_ci switch (opcode) { 66762306a36Sopenharmony_ci#define GENERATE_CASE(op, opcode, str) case CEPH_OSD_OP_##op: return true; 66862306a36Sopenharmony_ci__CEPH_FORALL_OSD_OPS(GENERATE_CASE) 66962306a36Sopenharmony_ci#undef GENERATE_CASE 67062306a36Sopenharmony_ci default: 67162306a36Sopenharmony_ci return false; 67262306a36Sopenharmony_ci } 67362306a36Sopenharmony_ci} 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_cistatic void get_num_data_items(struct ceph_osd_request *req, 67662306a36Sopenharmony_ci int *num_request_data_items, 67762306a36Sopenharmony_ci int *num_reply_data_items) 67862306a36Sopenharmony_ci{ 67962306a36Sopenharmony_ci struct ceph_osd_req_op *op; 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci *num_request_data_items = 0; 68262306a36Sopenharmony_ci *num_reply_data_items = 0; 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) { 68562306a36Sopenharmony_ci switch (op->op) { 68662306a36Sopenharmony_ci /* request */ 68762306a36Sopenharmony_ci case CEPH_OSD_OP_WRITE: 68862306a36Sopenharmony_ci case CEPH_OSD_OP_WRITEFULL: 68962306a36Sopenharmony_ci case CEPH_OSD_OP_SETXATTR: 69062306a36Sopenharmony_ci case CEPH_OSD_OP_CMPXATTR: 69162306a36Sopenharmony_ci case CEPH_OSD_OP_NOTIFY_ACK: 69262306a36Sopenharmony_ci case CEPH_OSD_OP_COPY_FROM2: 69362306a36Sopenharmony_ci *num_request_data_items += 1; 69462306a36Sopenharmony_ci break; 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci /* reply */ 69762306a36Sopenharmony_ci case CEPH_OSD_OP_STAT: 69862306a36Sopenharmony_ci case CEPH_OSD_OP_READ: 69962306a36Sopenharmony_ci case CEPH_OSD_OP_SPARSE_READ: 70062306a36Sopenharmony_ci case CEPH_OSD_OP_LIST_WATCHERS: 70162306a36Sopenharmony_ci *num_reply_data_items += 1; 70262306a36Sopenharmony_ci break; 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ci /* both */ 70562306a36Sopenharmony_ci case CEPH_OSD_OP_NOTIFY: 70662306a36Sopenharmony_ci *num_request_data_items += 1; 70762306a36Sopenharmony_ci *num_reply_data_items += 1; 70862306a36Sopenharmony_ci break; 70962306a36Sopenharmony_ci case CEPH_OSD_OP_CALL: 71062306a36Sopenharmony_ci *num_request_data_items += 2; 71162306a36Sopenharmony_ci *num_reply_data_items += 1; 71262306a36Sopenharmony_ci break; 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci default: 71562306a36Sopenharmony_ci WARN_ON(!osd_req_opcode_valid(op->op)); 71662306a36Sopenharmony_ci break; 71762306a36Sopenharmony_ci } 71862306a36Sopenharmony_ci } 71962306a36Sopenharmony_ci} 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci/* 72262306a36Sopenharmony_ci * oid, oloc and OSD op opcode(s) must be filled in before this function 72362306a36Sopenharmony_ci * is called. 72462306a36Sopenharmony_ci */ 72562306a36Sopenharmony_ciint ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp) 72662306a36Sopenharmony_ci{ 72762306a36Sopenharmony_ci int num_request_data_items, num_reply_data_items; 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci get_num_data_items(req, &num_request_data_items, &num_reply_data_items); 73062306a36Sopenharmony_ci return __ceph_osdc_alloc_messages(req, gfp, num_request_data_items, 73162306a36Sopenharmony_ci num_reply_data_items); 73262306a36Sopenharmony_ci} 73362306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_alloc_messages); 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci/* 73662306a36Sopenharmony_ci * This is an osd op init function for opcodes that have no data or 73762306a36Sopenharmony_ci * other information associated with them. It also serves as a 73862306a36Sopenharmony_ci * common init routine for all the other init functions, below. 73962306a36Sopenharmony_ci */ 74062306a36Sopenharmony_cistruct ceph_osd_req_op * 74162306a36Sopenharmony_ciosd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, 74262306a36Sopenharmony_ci u16 opcode, u32 flags) 74362306a36Sopenharmony_ci{ 74462306a36Sopenharmony_ci struct ceph_osd_req_op *op; 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci BUG_ON(which >= osd_req->r_num_ops); 74762306a36Sopenharmony_ci BUG_ON(!osd_req_opcode_valid(opcode)); 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci op = &osd_req->r_ops[which]; 75062306a36Sopenharmony_ci memset(op, 0, sizeof (*op)); 75162306a36Sopenharmony_ci op->op = opcode; 75262306a36Sopenharmony_ci op->flags = flags; 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci return op; 75562306a36Sopenharmony_ci} 75662306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_init); 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_civoid osd_req_op_extent_init(struct ceph_osd_request *osd_req, 75962306a36Sopenharmony_ci unsigned int which, u16 opcode, 76062306a36Sopenharmony_ci u64 offset, u64 length, 76162306a36Sopenharmony_ci u64 truncate_size, u32 truncate_seq) 76262306a36Sopenharmony_ci{ 76362306a36Sopenharmony_ci struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, 76462306a36Sopenharmony_ci opcode, 0); 76562306a36Sopenharmony_ci size_t payload_len = 0; 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && 76862306a36Sopenharmony_ci opcode != CEPH_OSD_OP_WRITEFULL && opcode != CEPH_OSD_OP_ZERO && 76962306a36Sopenharmony_ci opcode != CEPH_OSD_OP_TRUNCATE && opcode != CEPH_OSD_OP_SPARSE_READ); 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci op->extent.offset = offset; 77262306a36Sopenharmony_ci op->extent.length = length; 77362306a36Sopenharmony_ci op->extent.truncate_size = truncate_size; 77462306a36Sopenharmony_ci op->extent.truncate_seq = truncate_seq; 77562306a36Sopenharmony_ci if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL) 77662306a36Sopenharmony_ci payload_len += length; 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci op->indata_len = payload_len; 77962306a36Sopenharmony_ci} 78062306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_extent_init); 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_civoid osd_req_op_extent_update(struct ceph_osd_request *osd_req, 78362306a36Sopenharmony_ci unsigned int which, u64 length) 78462306a36Sopenharmony_ci{ 78562306a36Sopenharmony_ci struct ceph_osd_req_op *op; 78662306a36Sopenharmony_ci u64 previous; 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_ci BUG_ON(which >= osd_req->r_num_ops); 78962306a36Sopenharmony_ci op = &osd_req->r_ops[which]; 79062306a36Sopenharmony_ci previous = op->extent.length; 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci if (length == previous) 79362306a36Sopenharmony_ci return; /* Nothing to do */ 79462306a36Sopenharmony_ci BUG_ON(length > previous); 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci op->extent.length = length; 79762306a36Sopenharmony_ci if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL) 79862306a36Sopenharmony_ci op->indata_len -= previous - length; 79962306a36Sopenharmony_ci} 80062306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_extent_update); 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_civoid osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, 80362306a36Sopenharmony_ci unsigned int which, u64 offset_inc) 80462306a36Sopenharmony_ci{ 80562306a36Sopenharmony_ci struct ceph_osd_req_op *op, *prev_op; 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci BUG_ON(which + 1 >= osd_req->r_num_ops); 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci prev_op = &osd_req->r_ops[which]; 81062306a36Sopenharmony_ci op = osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags); 81162306a36Sopenharmony_ci /* dup previous one */ 81262306a36Sopenharmony_ci op->indata_len = prev_op->indata_len; 81362306a36Sopenharmony_ci op->outdata_len = prev_op->outdata_len; 81462306a36Sopenharmony_ci op->extent = prev_op->extent; 81562306a36Sopenharmony_ci /* adjust offset */ 81662306a36Sopenharmony_ci op->extent.offset += offset_inc; 81762306a36Sopenharmony_ci op->extent.length -= offset_inc; 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL) 82062306a36Sopenharmony_ci op->indata_len -= offset_inc; 82162306a36Sopenharmony_ci} 82262306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_extent_dup_last); 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ciint osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, 82562306a36Sopenharmony_ci const char *class, const char *method) 82662306a36Sopenharmony_ci{ 82762306a36Sopenharmony_ci struct ceph_osd_req_op *op; 82862306a36Sopenharmony_ci struct ceph_pagelist *pagelist; 82962306a36Sopenharmony_ci size_t payload_len = 0; 83062306a36Sopenharmony_ci size_t size; 83162306a36Sopenharmony_ci int ret; 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci op = osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0); 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_ci pagelist = ceph_pagelist_alloc(GFP_NOFS); 83662306a36Sopenharmony_ci if (!pagelist) 83762306a36Sopenharmony_ci return -ENOMEM; 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci op->cls.class_name = class; 84062306a36Sopenharmony_ci size = strlen(class); 84162306a36Sopenharmony_ci BUG_ON(size > (size_t) U8_MAX); 84262306a36Sopenharmony_ci op->cls.class_len = size; 84362306a36Sopenharmony_ci ret = ceph_pagelist_append(pagelist, class, size); 84462306a36Sopenharmony_ci if (ret) 84562306a36Sopenharmony_ci goto err_pagelist_free; 84662306a36Sopenharmony_ci payload_len += size; 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci op->cls.method_name = method; 84962306a36Sopenharmony_ci size = strlen(method); 85062306a36Sopenharmony_ci BUG_ON(size > (size_t) U8_MAX); 85162306a36Sopenharmony_ci op->cls.method_len = size; 85262306a36Sopenharmony_ci ret = ceph_pagelist_append(pagelist, method, size); 85362306a36Sopenharmony_ci if (ret) 85462306a36Sopenharmony_ci goto err_pagelist_free; 85562306a36Sopenharmony_ci payload_len += size; 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist); 85862306a36Sopenharmony_ci op->indata_len = payload_len; 85962306a36Sopenharmony_ci return 0; 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_cierr_pagelist_free: 86262306a36Sopenharmony_ci ceph_pagelist_release(pagelist); 86362306a36Sopenharmony_ci return ret; 86462306a36Sopenharmony_ci} 86562306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_cls_init); 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ciint osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, 86862306a36Sopenharmony_ci u16 opcode, const char *name, const void *value, 86962306a36Sopenharmony_ci size_t size, u8 cmp_op, u8 cmp_mode) 87062306a36Sopenharmony_ci{ 87162306a36Sopenharmony_ci struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, 87262306a36Sopenharmony_ci opcode, 0); 87362306a36Sopenharmony_ci struct ceph_pagelist *pagelist; 87462306a36Sopenharmony_ci size_t payload_len; 87562306a36Sopenharmony_ci int ret; 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR); 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci pagelist = ceph_pagelist_alloc(GFP_NOFS); 88062306a36Sopenharmony_ci if (!pagelist) 88162306a36Sopenharmony_ci return -ENOMEM; 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci payload_len = strlen(name); 88462306a36Sopenharmony_ci op->xattr.name_len = payload_len; 88562306a36Sopenharmony_ci ret = ceph_pagelist_append(pagelist, name, payload_len); 88662306a36Sopenharmony_ci if (ret) 88762306a36Sopenharmony_ci goto err_pagelist_free; 88862306a36Sopenharmony_ci 88962306a36Sopenharmony_ci op->xattr.value_len = size; 89062306a36Sopenharmony_ci ret = ceph_pagelist_append(pagelist, value, size); 89162306a36Sopenharmony_ci if (ret) 89262306a36Sopenharmony_ci goto err_pagelist_free; 89362306a36Sopenharmony_ci payload_len += size; 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci op->xattr.cmp_op = cmp_op; 89662306a36Sopenharmony_ci op->xattr.cmp_mode = cmp_mode; 89762306a36Sopenharmony_ci 89862306a36Sopenharmony_ci ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist); 89962306a36Sopenharmony_ci op->indata_len = payload_len; 90062306a36Sopenharmony_ci return 0; 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_cierr_pagelist_free: 90362306a36Sopenharmony_ci ceph_pagelist_release(pagelist); 90462306a36Sopenharmony_ci return ret; 90562306a36Sopenharmony_ci} 90662306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_xattr_init); 90762306a36Sopenharmony_ci 90862306a36Sopenharmony_ci/* 90962306a36Sopenharmony_ci * @watch_opcode: CEPH_OSD_WATCH_OP_* 91062306a36Sopenharmony_ci */ 91162306a36Sopenharmony_cistatic void osd_req_op_watch_init(struct ceph_osd_request *req, int which, 91262306a36Sopenharmony_ci u8 watch_opcode, u64 cookie, u32 gen) 91362306a36Sopenharmony_ci{ 91462306a36Sopenharmony_ci struct ceph_osd_req_op *op; 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); 91762306a36Sopenharmony_ci op->watch.cookie = cookie; 91862306a36Sopenharmony_ci op->watch.op = watch_opcode; 91962306a36Sopenharmony_ci op->watch.gen = gen; 92062306a36Sopenharmony_ci} 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_ci/* 92362306a36Sopenharmony_ci * prot_ver, timeout and notify payload (may be empty) should already be 92462306a36Sopenharmony_ci * encoded in @request_pl 92562306a36Sopenharmony_ci */ 92662306a36Sopenharmony_cistatic void osd_req_op_notify_init(struct ceph_osd_request *req, int which, 92762306a36Sopenharmony_ci u64 cookie, struct ceph_pagelist *request_pl) 92862306a36Sopenharmony_ci{ 92962306a36Sopenharmony_ci struct ceph_osd_req_op *op; 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_ci op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); 93262306a36Sopenharmony_ci op->notify.cookie = cookie; 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl); 93562306a36Sopenharmony_ci op->indata_len = request_pl->length; 93662306a36Sopenharmony_ci} 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_ci/* 93962306a36Sopenharmony_ci * @flags: CEPH_OSD_OP_ALLOC_HINT_FLAG_* 94062306a36Sopenharmony_ci */ 94162306a36Sopenharmony_civoid osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, 94262306a36Sopenharmony_ci unsigned int which, 94362306a36Sopenharmony_ci u64 expected_object_size, 94462306a36Sopenharmony_ci u64 expected_write_size, 94562306a36Sopenharmony_ci u32 flags) 94662306a36Sopenharmony_ci{ 94762306a36Sopenharmony_ci struct ceph_osd_req_op *op; 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci op = osd_req_op_init(osd_req, which, CEPH_OSD_OP_SETALLOCHINT, 0); 95062306a36Sopenharmony_ci op->alloc_hint.expected_object_size = expected_object_size; 95162306a36Sopenharmony_ci op->alloc_hint.expected_write_size = expected_write_size; 95262306a36Sopenharmony_ci op->alloc_hint.flags = flags; 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_ci /* 95562306a36Sopenharmony_ci * CEPH_OSD_OP_SETALLOCHINT op is advisory and therefore deemed 95662306a36Sopenharmony_ci * not worth a feature bit. Set FAILOK per-op flag to make 95762306a36Sopenharmony_ci * sure older osds don't trip over an unsupported opcode. 95862306a36Sopenharmony_ci */ 95962306a36Sopenharmony_ci op->flags |= CEPH_OSD_OP_FLAG_FAILOK; 96062306a36Sopenharmony_ci} 96162306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_alloc_hint_init); 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_cistatic void ceph_osdc_msg_data_add(struct ceph_msg *msg, 96462306a36Sopenharmony_ci struct ceph_osd_data *osd_data) 96562306a36Sopenharmony_ci{ 96662306a36Sopenharmony_ci u64 length = ceph_osd_data_length(osd_data); 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { 96962306a36Sopenharmony_ci BUG_ON(length > (u64) SIZE_MAX); 97062306a36Sopenharmony_ci if (length) 97162306a36Sopenharmony_ci ceph_msg_data_add_pages(msg, osd_data->pages, 97262306a36Sopenharmony_ci length, osd_data->alignment, false); 97362306a36Sopenharmony_ci } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) { 97462306a36Sopenharmony_ci BUG_ON(!length); 97562306a36Sopenharmony_ci ceph_msg_data_add_pagelist(msg, osd_data->pagelist); 97662306a36Sopenharmony_ci#ifdef CONFIG_BLOCK 97762306a36Sopenharmony_ci } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) { 97862306a36Sopenharmony_ci ceph_msg_data_add_bio(msg, &osd_data->bio_pos, length); 97962306a36Sopenharmony_ci#endif 98062306a36Sopenharmony_ci } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) { 98162306a36Sopenharmony_ci ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos); 98262306a36Sopenharmony_ci } else if (osd_data->type == CEPH_OSD_DATA_TYPE_ITER) { 98362306a36Sopenharmony_ci ceph_msg_data_add_iter(msg, &osd_data->iter); 98462306a36Sopenharmony_ci } else { 98562306a36Sopenharmony_ci BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE); 98662306a36Sopenharmony_ci } 98762306a36Sopenharmony_ci} 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_cistatic u32 osd_req_encode_op(struct ceph_osd_op *dst, 99062306a36Sopenharmony_ci const struct ceph_osd_req_op *src) 99162306a36Sopenharmony_ci{ 99262306a36Sopenharmony_ci switch (src->op) { 99362306a36Sopenharmony_ci case CEPH_OSD_OP_STAT: 99462306a36Sopenharmony_ci break; 99562306a36Sopenharmony_ci case CEPH_OSD_OP_READ: 99662306a36Sopenharmony_ci case CEPH_OSD_OP_SPARSE_READ: 99762306a36Sopenharmony_ci case CEPH_OSD_OP_WRITE: 99862306a36Sopenharmony_ci case CEPH_OSD_OP_WRITEFULL: 99962306a36Sopenharmony_ci case CEPH_OSD_OP_ZERO: 100062306a36Sopenharmony_ci case CEPH_OSD_OP_TRUNCATE: 100162306a36Sopenharmony_ci dst->extent.offset = cpu_to_le64(src->extent.offset); 100262306a36Sopenharmony_ci dst->extent.length = cpu_to_le64(src->extent.length); 100362306a36Sopenharmony_ci dst->extent.truncate_size = 100462306a36Sopenharmony_ci cpu_to_le64(src->extent.truncate_size); 100562306a36Sopenharmony_ci dst->extent.truncate_seq = 100662306a36Sopenharmony_ci cpu_to_le32(src->extent.truncate_seq); 100762306a36Sopenharmony_ci break; 100862306a36Sopenharmony_ci case CEPH_OSD_OP_CALL: 100962306a36Sopenharmony_ci dst->cls.class_len = src->cls.class_len; 101062306a36Sopenharmony_ci dst->cls.method_len = src->cls.method_len; 101162306a36Sopenharmony_ci dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); 101262306a36Sopenharmony_ci break; 101362306a36Sopenharmony_ci case CEPH_OSD_OP_WATCH: 101462306a36Sopenharmony_ci dst->watch.cookie = cpu_to_le64(src->watch.cookie); 101562306a36Sopenharmony_ci dst->watch.ver = cpu_to_le64(0); 101662306a36Sopenharmony_ci dst->watch.op = src->watch.op; 101762306a36Sopenharmony_ci dst->watch.gen = cpu_to_le32(src->watch.gen); 101862306a36Sopenharmony_ci break; 101962306a36Sopenharmony_ci case CEPH_OSD_OP_NOTIFY_ACK: 102062306a36Sopenharmony_ci break; 102162306a36Sopenharmony_ci case CEPH_OSD_OP_NOTIFY: 102262306a36Sopenharmony_ci dst->notify.cookie = cpu_to_le64(src->notify.cookie); 102362306a36Sopenharmony_ci break; 102462306a36Sopenharmony_ci case CEPH_OSD_OP_LIST_WATCHERS: 102562306a36Sopenharmony_ci break; 102662306a36Sopenharmony_ci case CEPH_OSD_OP_SETALLOCHINT: 102762306a36Sopenharmony_ci dst->alloc_hint.expected_object_size = 102862306a36Sopenharmony_ci cpu_to_le64(src->alloc_hint.expected_object_size); 102962306a36Sopenharmony_ci dst->alloc_hint.expected_write_size = 103062306a36Sopenharmony_ci cpu_to_le64(src->alloc_hint.expected_write_size); 103162306a36Sopenharmony_ci dst->alloc_hint.flags = cpu_to_le32(src->alloc_hint.flags); 103262306a36Sopenharmony_ci break; 103362306a36Sopenharmony_ci case CEPH_OSD_OP_SETXATTR: 103462306a36Sopenharmony_ci case CEPH_OSD_OP_CMPXATTR: 103562306a36Sopenharmony_ci dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); 103662306a36Sopenharmony_ci dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); 103762306a36Sopenharmony_ci dst->xattr.cmp_op = src->xattr.cmp_op; 103862306a36Sopenharmony_ci dst->xattr.cmp_mode = src->xattr.cmp_mode; 103962306a36Sopenharmony_ci break; 104062306a36Sopenharmony_ci case CEPH_OSD_OP_CREATE: 104162306a36Sopenharmony_ci case CEPH_OSD_OP_DELETE: 104262306a36Sopenharmony_ci break; 104362306a36Sopenharmony_ci case CEPH_OSD_OP_COPY_FROM2: 104462306a36Sopenharmony_ci dst->copy_from.snapid = cpu_to_le64(src->copy_from.snapid); 104562306a36Sopenharmony_ci dst->copy_from.src_version = 104662306a36Sopenharmony_ci cpu_to_le64(src->copy_from.src_version); 104762306a36Sopenharmony_ci dst->copy_from.flags = src->copy_from.flags; 104862306a36Sopenharmony_ci dst->copy_from.src_fadvise_flags = 104962306a36Sopenharmony_ci cpu_to_le32(src->copy_from.src_fadvise_flags); 105062306a36Sopenharmony_ci break; 105162306a36Sopenharmony_ci case CEPH_OSD_OP_ASSERT_VER: 105262306a36Sopenharmony_ci dst->assert_ver.unused = cpu_to_le64(0); 105362306a36Sopenharmony_ci dst->assert_ver.ver = cpu_to_le64(src->assert_ver.ver); 105462306a36Sopenharmony_ci break; 105562306a36Sopenharmony_ci default: 105662306a36Sopenharmony_ci pr_err("unsupported osd opcode %s\n", 105762306a36Sopenharmony_ci ceph_osd_op_name(src->op)); 105862306a36Sopenharmony_ci WARN_ON(1); 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci return 0; 106162306a36Sopenharmony_ci } 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci dst->op = cpu_to_le16(src->op); 106462306a36Sopenharmony_ci dst->flags = cpu_to_le32(src->flags); 106562306a36Sopenharmony_ci dst->payload_len = cpu_to_le32(src->indata_len); 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci return src->indata_len; 106862306a36Sopenharmony_ci} 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci/* 107162306a36Sopenharmony_ci * build new request AND message, calculate layout, and adjust file 107262306a36Sopenharmony_ci * extent as needed. 107362306a36Sopenharmony_ci * 107462306a36Sopenharmony_ci * if the file was recently truncated, we include information about its 107562306a36Sopenharmony_ci * old and new size so that the object can be updated appropriately. (we 107662306a36Sopenharmony_ci * avoid synchronously deleting truncated objects because it's slow.) 107762306a36Sopenharmony_ci */ 107862306a36Sopenharmony_cistruct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, 107962306a36Sopenharmony_ci struct ceph_file_layout *layout, 108062306a36Sopenharmony_ci struct ceph_vino vino, 108162306a36Sopenharmony_ci u64 off, u64 *plen, 108262306a36Sopenharmony_ci unsigned int which, int num_ops, 108362306a36Sopenharmony_ci int opcode, int flags, 108462306a36Sopenharmony_ci struct ceph_snap_context *snapc, 108562306a36Sopenharmony_ci u32 truncate_seq, 108662306a36Sopenharmony_ci u64 truncate_size, 108762306a36Sopenharmony_ci bool use_mempool) 108862306a36Sopenharmony_ci{ 108962306a36Sopenharmony_ci struct ceph_osd_request *req; 109062306a36Sopenharmony_ci u64 objnum = 0; 109162306a36Sopenharmony_ci u64 objoff = 0; 109262306a36Sopenharmony_ci u64 objlen = 0; 109362306a36Sopenharmony_ci int r; 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_ci BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && 109662306a36Sopenharmony_ci opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE && 109762306a36Sopenharmony_ci opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE && 109862306a36Sopenharmony_ci opcode != CEPH_OSD_OP_SPARSE_READ); 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, 110162306a36Sopenharmony_ci GFP_NOFS); 110262306a36Sopenharmony_ci if (!req) { 110362306a36Sopenharmony_ci r = -ENOMEM; 110462306a36Sopenharmony_ci goto fail; 110562306a36Sopenharmony_ci } 110662306a36Sopenharmony_ci 110762306a36Sopenharmony_ci /* calculate max write size */ 110862306a36Sopenharmony_ci r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen); 110962306a36Sopenharmony_ci if (r) 111062306a36Sopenharmony_ci goto fail; 111162306a36Sopenharmony_ci 111262306a36Sopenharmony_ci if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) { 111362306a36Sopenharmony_ci osd_req_op_init(req, which, opcode, 0); 111462306a36Sopenharmony_ci } else { 111562306a36Sopenharmony_ci u32 object_size = layout->object_size; 111662306a36Sopenharmony_ci u32 object_base = off - objoff; 111762306a36Sopenharmony_ci if (!(truncate_seq == 1 && truncate_size == -1ULL)) { 111862306a36Sopenharmony_ci if (truncate_size <= object_base) { 111962306a36Sopenharmony_ci truncate_size = 0; 112062306a36Sopenharmony_ci } else { 112162306a36Sopenharmony_ci truncate_size -= object_base; 112262306a36Sopenharmony_ci if (truncate_size > object_size) 112362306a36Sopenharmony_ci truncate_size = object_size; 112462306a36Sopenharmony_ci } 112562306a36Sopenharmony_ci } 112662306a36Sopenharmony_ci osd_req_op_extent_init(req, which, opcode, objoff, objlen, 112762306a36Sopenharmony_ci truncate_size, truncate_seq); 112862306a36Sopenharmony_ci } 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_ci req->r_base_oloc.pool = layout->pool_id; 113162306a36Sopenharmony_ci req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns); 113262306a36Sopenharmony_ci ceph_oid_printf(&req->r_base_oid, "%llx.%08llx", vino.ino, objnum); 113362306a36Sopenharmony_ci req->r_flags = flags | osdc->client->options->read_from_replica; 113462306a36Sopenharmony_ci 113562306a36Sopenharmony_ci req->r_snapid = vino.snap; 113662306a36Sopenharmony_ci if (flags & CEPH_OSD_FLAG_WRITE) 113762306a36Sopenharmony_ci req->r_data_offset = off; 113862306a36Sopenharmony_ci 113962306a36Sopenharmony_ci if (num_ops > 1) { 114062306a36Sopenharmony_ci int num_req_ops, num_rep_ops; 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci /* 114362306a36Sopenharmony_ci * If this is a multi-op write request, assume that we'll need 114462306a36Sopenharmony_ci * request ops. If it's a multi-op read then assume we'll need 114562306a36Sopenharmony_ci * reply ops. Anything else and call it -EINVAL. 114662306a36Sopenharmony_ci */ 114762306a36Sopenharmony_ci if (flags & CEPH_OSD_FLAG_WRITE) { 114862306a36Sopenharmony_ci num_req_ops = num_ops; 114962306a36Sopenharmony_ci num_rep_ops = 0; 115062306a36Sopenharmony_ci } else if (flags & CEPH_OSD_FLAG_READ) { 115162306a36Sopenharmony_ci num_req_ops = 0; 115262306a36Sopenharmony_ci num_rep_ops = num_ops; 115362306a36Sopenharmony_ci } else { 115462306a36Sopenharmony_ci r = -EINVAL; 115562306a36Sopenharmony_ci goto fail; 115662306a36Sopenharmony_ci } 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_ci r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_req_ops, 115962306a36Sopenharmony_ci num_rep_ops); 116062306a36Sopenharmony_ci } else { 116162306a36Sopenharmony_ci r = ceph_osdc_alloc_messages(req, GFP_NOFS); 116262306a36Sopenharmony_ci } 116362306a36Sopenharmony_ci if (r) 116462306a36Sopenharmony_ci goto fail; 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci return req; 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_cifail: 116962306a36Sopenharmony_ci ceph_osdc_put_request(req); 117062306a36Sopenharmony_ci return ERR_PTR(r); 117162306a36Sopenharmony_ci} 117262306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_new_request); 117362306a36Sopenharmony_ci 117462306a36Sopenharmony_ciint __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt) 117562306a36Sopenharmony_ci{ 117662306a36Sopenharmony_ci op->extent.sparse_ext_cnt = cnt; 117762306a36Sopenharmony_ci op->extent.sparse_ext = kmalloc_array(cnt, 117862306a36Sopenharmony_ci sizeof(*op->extent.sparse_ext), 117962306a36Sopenharmony_ci GFP_NOFS); 118062306a36Sopenharmony_ci if (!op->extent.sparse_ext) 118162306a36Sopenharmony_ci return -ENOMEM; 118262306a36Sopenharmony_ci return 0; 118362306a36Sopenharmony_ci} 118462306a36Sopenharmony_ciEXPORT_SYMBOL(__ceph_alloc_sparse_ext_map); 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_ci/* 118762306a36Sopenharmony_ci * We keep osd requests in an rbtree, sorted by ->r_tid. 118862306a36Sopenharmony_ci */ 118962306a36Sopenharmony_ciDEFINE_RB_FUNCS(request, struct ceph_osd_request, r_tid, r_node) 119062306a36Sopenharmony_ciDEFINE_RB_FUNCS(request_mc, struct ceph_osd_request, r_tid, r_mc_node) 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci/* 119362306a36Sopenharmony_ci * Call @fn on each OSD request as long as @fn returns 0. 119462306a36Sopenharmony_ci */ 119562306a36Sopenharmony_cistatic void for_each_request(struct ceph_osd_client *osdc, 119662306a36Sopenharmony_ci int (*fn)(struct ceph_osd_request *req, void *arg), 119762306a36Sopenharmony_ci void *arg) 119862306a36Sopenharmony_ci{ 119962306a36Sopenharmony_ci struct rb_node *n, *p; 120062306a36Sopenharmony_ci 120162306a36Sopenharmony_ci for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { 120262306a36Sopenharmony_ci struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_ci for (p = rb_first(&osd->o_requests); p; ) { 120562306a36Sopenharmony_ci struct ceph_osd_request *req = 120662306a36Sopenharmony_ci rb_entry(p, struct ceph_osd_request, r_node); 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci p = rb_next(p); 120962306a36Sopenharmony_ci if (fn(req, arg)) 121062306a36Sopenharmony_ci return; 121162306a36Sopenharmony_ci } 121262306a36Sopenharmony_ci } 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) { 121562306a36Sopenharmony_ci struct ceph_osd_request *req = 121662306a36Sopenharmony_ci rb_entry(p, struct ceph_osd_request, r_node); 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci p = rb_next(p); 121962306a36Sopenharmony_ci if (fn(req, arg)) 122062306a36Sopenharmony_ci return; 122162306a36Sopenharmony_ci } 122262306a36Sopenharmony_ci} 122362306a36Sopenharmony_ci 122462306a36Sopenharmony_cistatic bool osd_homeless(struct ceph_osd *osd) 122562306a36Sopenharmony_ci{ 122662306a36Sopenharmony_ci return osd->o_osd == CEPH_HOMELESS_OSD; 122762306a36Sopenharmony_ci} 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_cistatic bool osd_registered(struct ceph_osd *osd) 123062306a36Sopenharmony_ci{ 123162306a36Sopenharmony_ci verify_osdc_locked(osd->o_osdc); 123262306a36Sopenharmony_ci 123362306a36Sopenharmony_ci return !RB_EMPTY_NODE(&osd->o_node); 123462306a36Sopenharmony_ci} 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_ci/* 123762306a36Sopenharmony_ci * Assumes @osd is zero-initialized. 123862306a36Sopenharmony_ci */ 123962306a36Sopenharmony_cistatic void osd_init(struct ceph_osd *osd) 124062306a36Sopenharmony_ci{ 124162306a36Sopenharmony_ci refcount_set(&osd->o_ref, 1); 124262306a36Sopenharmony_ci RB_CLEAR_NODE(&osd->o_node); 124362306a36Sopenharmony_ci spin_lock_init(&osd->o_requests_lock); 124462306a36Sopenharmony_ci osd->o_requests = RB_ROOT; 124562306a36Sopenharmony_ci osd->o_linger_requests = RB_ROOT; 124662306a36Sopenharmony_ci osd->o_backoff_mappings = RB_ROOT; 124762306a36Sopenharmony_ci osd->o_backoffs_by_id = RB_ROOT; 124862306a36Sopenharmony_ci INIT_LIST_HEAD(&osd->o_osd_lru); 124962306a36Sopenharmony_ci INIT_LIST_HEAD(&osd->o_keepalive_item); 125062306a36Sopenharmony_ci osd->o_incarnation = 1; 125162306a36Sopenharmony_ci mutex_init(&osd->lock); 125262306a36Sopenharmony_ci} 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_cistatic void ceph_init_sparse_read(struct ceph_sparse_read *sr) 125562306a36Sopenharmony_ci{ 125662306a36Sopenharmony_ci kfree(sr->sr_extent); 125762306a36Sopenharmony_ci memset(sr, '\0', sizeof(*sr)); 125862306a36Sopenharmony_ci sr->sr_state = CEPH_SPARSE_READ_HDR; 125962306a36Sopenharmony_ci} 126062306a36Sopenharmony_ci 126162306a36Sopenharmony_cistatic void osd_cleanup(struct ceph_osd *osd) 126262306a36Sopenharmony_ci{ 126362306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_NODE(&osd->o_node)); 126462306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT(&osd->o_requests)); 126562306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT(&osd->o_linger_requests)); 126662306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT(&osd->o_backoff_mappings)); 126762306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT(&osd->o_backoffs_by_id)); 126862306a36Sopenharmony_ci WARN_ON(!list_empty(&osd->o_osd_lru)); 126962306a36Sopenharmony_ci WARN_ON(!list_empty(&osd->o_keepalive_item)); 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_ci ceph_init_sparse_read(&osd->o_sparse_read); 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci if (osd->o_auth.authorizer) { 127462306a36Sopenharmony_ci WARN_ON(osd_homeless(osd)); 127562306a36Sopenharmony_ci ceph_auth_destroy_authorizer(osd->o_auth.authorizer); 127662306a36Sopenharmony_ci } 127762306a36Sopenharmony_ci} 127862306a36Sopenharmony_ci 127962306a36Sopenharmony_ci/* 128062306a36Sopenharmony_ci * Track open sessions with osds. 128162306a36Sopenharmony_ci */ 128262306a36Sopenharmony_cistatic struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) 128362306a36Sopenharmony_ci{ 128462306a36Sopenharmony_ci struct ceph_osd *osd; 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_ci WARN_ON(onum == CEPH_HOMELESS_OSD); 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci osd = kzalloc(sizeof(*osd), GFP_NOIO | __GFP_NOFAIL); 128962306a36Sopenharmony_ci osd_init(osd); 129062306a36Sopenharmony_ci osd->o_osdc = osdc; 129162306a36Sopenharmony_ci osd->o_osd = onum; 129262306a36Sopenharmony_ci osd->o_sparse_op_idx = -1; 129362306a36Sopenharmony_ci 129462306a36Sopenharmony_ci ceph_init_sparse_read(&osd->o_sparse_read); 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr); 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_ci return osd; 129962306a36Sopenharmony_ci} 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_cistatic struct ceph_osd *get_osd(struct ceph_osd *osd) 130262306a36Sopenharmony_ci{ 130362306a36Sopenharmony_ci if (refcount_inc_not_zero(&osd->o_ref)) { 130462306a36Sopenharmony_ci dout("get_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref)-1, 130562306a36Sopenharmony_ci refcount_read(&osd->o_ref)); 130662306a36Sopenharmony_ci return osd; 130762306a36Sopenharmony_ci } else { 130862306a36Sopenharmony_ci dout("get_osd %p FAIL\n", osd); 130962306a36Sopenharmony_ci return NULL; 131062306a36Sopenharmony_ci } 131162306a36Sopenharmony_ci} 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_cistatic void put_osd(struct ceph_osd *osd) 131462306a36Sopenharmony_ci{ 131562306a36Sopenharmony_ci dout("put_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref), 131662306a36Sopenharmony_ci refcount_read(&osd->o_ref) - 1); 131762306a36Sopenharmony_ci if (refcount_dec_and_test(&osd->o_ref)) { 131862306a36Sopenharmony_ci osd_cleanup(osd); 131962306a36Sopenharmony_ci kfree(osd); 132062306a36Sopenharmony_ci } 132162306a36Sopenharmony_ci} 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_ciDEFINE_RB_FUNCS(osd, struct ceph_osd, o_osd, o_node) 132462306a36Sopenharmony_ci 132562306a36Sopenharmony_cistatic void __move_osd_to_lru(struct ceph_osd *osd) 132662306a36Sopenharmony_ci{ 132762306a36Sopenharmony_ci struct ceph_osd_client *osdc = osd->o_osdc; 132862306a36Sopenharmony_ci 132962306a36Sopenharmony_ci dout("%s osd %p osd%d\n", __func__, osd, osd->o_osd); 133062306a36Sopenharmony_ci BUG_ON(!list_empty(&osd->o_osd_lru)); 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_ci spin_lock(&osdc->osd_lru_lock); 133362306a36Sopenharmony_ci list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); 133462306a36Sopenharmony_ci spin_unlock(&osdc->osd_lru_lock); 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_ci osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl; 133762306a36Sopenharmony_ci} 133862306a36Sopenharmony_ci 133962306a36Sopenharmony_cistatic void maybe_move_osd_to_lru(struct ceph_osd *osd) 134062306a36Sopenharmony_ci{ 134162306a36Sopenharmony_ci if (RB_EMPTY_ROOT(&osd->o_requests) && 134262306a36Sopenharmony_ci RB_EMPTY_ROOT(&osd->o_linger_requests)) 134362306a36Sopenharmony_ci __move_osd_to_lru(osd); 134462306a36Sopenharmony_ci} 134562306a36Sopenharmony_ci 134662306a36Sopenharmony_cistatic void __remove_osd_from_lru(struct ceph_osd *osd) 134762306a36Sopenharmony_ci{ 134862306a36Sopenharmony_ci struct ceph_osd_client *osdc = osd->o_osdc; 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_ci dout("%s osd %p osd%d\n", __func__, osd, osd->o_osd); 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_ci spin_lock(&osdc->osd_lru_lock); 135362306a36Sopenharmony_ci if (!list_empty(&osd->o_osd_lru)) 135462306a36Sopenharmony_ci list_del_init(&osd->o_osd_lru); 135562306a36Sopenharmony_ci spin_unlock(&osdc->osd_lru_lock); 135662306a36Sopenharmony_ci} 135762306a36Sopenharmony_ci 135862306a36Sopenharmony_ci/* 135962306a36Sopenharmony_ci * Close the connection and assign any leftover requests to the 136062306a36Sopenharmony_ci * homeless session. 136162306a36Sopenharmony_ci */ 136262306a36Sopenharmony_cistatic void close_osd(struct ceph_osd *osd) 136362306a36Sopenharmony_ci{ 136462306a36Sopenharmony_ci struct ceph_osd_client *osdc = osd->o_osdc; 136562306a36Sopenharmony_ci struct rb_node *n; 136662306a36Sopenharmony_ci 136762306a36Sopenharmony_ci verify_osdc_wrlocked(osdc); 136862306a36Sopenharmony_ci dout("%s osd %p osd%d\n", __func__, osd, osd->o_osd); 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_ci ceph_con_close(&osd->o_con); 137162306a36Sopenharmony_ci 137262306a36Sopenharmony_ci for (n = rb_first(&osd->o_requests); n; ) { 137362306a36Sopenharmony_ci struct ceph_osd_request *req = 137462306a36Sopenharmony_ci rb_entry(n, struct ceph_osd_request, r_node); 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci n = rb_next(n); /* unlink_request() */ 137762306a36Sopenharmony_ci 137862306a36Sopenharmony_ci dout(" reassigning req %p tid %llu\n", req, req->r_tid); 137962306a36Sopenharmony_ci unlink_request(osd, req); 138062306a36Sopenharmony_ci link_request(&osdc->homeless_osd, req); 138162306a36Sopenharmony_ci } 138262306a36Sopenharmony_ci for (n = rb_first(&osd->o_linger_requests); n; ) { 138362306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = 138462306a36Sopenharmony_ci rb_entry(n, struct ceph_osd_linger_request, node); 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_ci n = rb_next(n); /* unlink_linger() */ 138762306a36Sopenharmony_ci 138862306a36Sopenharmony_ci dout(" reassigning lreq %p linger_id %llu\n", lreq, 138962306a36Sopenharmony_ci lreq->linger_id); 139062306a36Sopenharmony_ci unlink_linger(osd, lreq); 139162306a36Sopenharmony_ci link_linger(&osdc->homeless_osd, lreq); 139262306a36Sopenharmony_ci } 139362306a36Sopenharmony_ci clear_backoffs(osd); 139462306a36Sopenharmony_ci 139562306a36Sopenharmony_ci __remove_osd_from_lru(osd); 139662306a36Sopenharmony_ci erase_osd(&osdc->osds, osd); 139762306a36Sopenharmony_ci put_osd(osd); 139862306a36Sopenharmony_ci} 139962306a36Sopenharmony_ci 140062306a36Sopenharmony_ci/* 140162306a36Sopenharmony_ci * reset osd connect 140262306a36Sopenharmony_ci */ 140362306a36Sopenharmony_cistatic int reopen_osd(struct ceph_osd *osd) 140462306a36Sopenharmony_ci{ 140562306a36Sopenharmony_ci struct ceph_entity_addr *peer_addr; 140662306a36Sopenharmony_ci 140762306a36Sopenharmony_ci dout("%s osd %p osd%d\n", __func__, osd, osd->o_osd); 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci if (RB_EMPTY_ROOT(&osd->o_requests) && 141062306a36Sopenharmony_ci RB_EMPTY_ROOT(&osd->o_linger_requests)) { 141162306a36Sopenharmony_ci close_osd(osd); 141262306a36Sopenharmony_ci return -ENODEV; 141362306a36Sopenharmony_ci } 141462306a36Sopenharmony_ci 141562306a36Sopenharmony_ci peer_addr = &osd->o_osdc->osdmap->osd_addr[osd->o_osd]; 141662306a36Sopenharmony_ci if (!memcmp(peer_addr, &osd->o_con.peer_addr, sizeof (*peer_addr)) && 141762306a36Sopenharmony_ci !ceph_con_opened(&osd->o_con)) { 141862306a36Sopenharmony_ci struct rb_node *n; 141962306a36Sopenharmony_ci 142062306a36Sopenharmony_ci dout("osd addr hasn't changed and connection never opened, " 142162306a36Sopenharmony_ci "letting msgr retry\n"); 142262306a36Sopenharmony_ci /* touch each r_stamp for handle_timeout()'s benfit */ 142362306a36Sopenharmony_ci for (n = rb_first(&osd->o_requests); n; n = rb_next(n)) { 142462306a36Sopenharmony_ci struct ceph_osd_request *req = 142562306a36Sopenharmony_ci rb_entry(n, struct ceph_osd_request, r_node); 142662306a36Sopenharmony_ci req->r_stamp = jiffies; 142762306a36Sopenharmony_ci } 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_ci return -EAGAIN; 143062306a36Sopenharmony_ci } 143162306a36Sopenharmony_ci 143262306a36Sopenharmony_ci ceph_con_close(&osd->o_con); 143362306a36Sopenharmony_ci ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, peer_addr); 143462306a36Sopenharmony_ci osd->o_incarnation++; 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_ci return 0; 143762306a36Sopenharmony_ci} 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_cistatic struct ceph_osd *lookup_create_osd(struct ceph_osd_client *osdc, int o, 144062306a36Sopenharmony_ci bool wrlocked) 144162306a36Sopenharmony_ci{ 144262306a36Sopenharmony_ci struct ceph_osd *osd; 144362306a36Sopenharmony_ci 144462306a36Sopenharmony_ci if (wrlocked) 144562306a36Sopenharmony_ci verify_osdc_wrlocked(osdc); 144662306a36Sopenharmony_ci else 144762306a36Sopenharmony_ci verify_osdc_locked(osdc); 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_ci if (o != CEPH_HOMELESS_OSD) 145062306a36Sopenharmony_ci osd = lookup_osd(&osdc->osds, o); 145162306a36Sopenharmony_ci else 145262306a36Sopenharmony_ci osd = &osdc->homeless_osd; 145362306a36Sopenharmony_ci if (!osd) { 145462306a36Sopenharmony_ci if (!wrlocked) 145562306a36Sopenharmony_ci return ERR_PTR(-EAGAIN); 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_ci osd = create_osd(osdc, o); 145862306a36Sopenharmony_ci insert_osd(&osdc->osds, osd); 145962306a36Sopenharmony_ci ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, 146062306a36Sopenharmony_ci &osdc->osdmap->osd_addr[osd->o_osd]); 146162306a36Sopenharmony_ci } 146262306a36Sopenharmony_ci 146362306a36Sopenharmony_ci dout("%s osdc %p osd%d -> osd %p\n", __func__, osdc, o, osd); 146462306a36Sopenharmony_ci return osd; 146562306a36Sopenharmony_ci} 146662306a36Sopenharmony_ci 146762306a36Sopenharmony_ci/* 146862306a36Sopenharmony_ci * Create request <-> OSD session relation. 146962306a36Sopenharmony_ci * 147062306a36Sopenharmony_ci * @req has to be assigned a tid, @osd may be homeless. 147162306a36Sopenharmony_ci */ 147262306a36Sopenharmony_cistatic void link_request(struct ceph_osd *osd, struct ceph_osd_request *req) 147362306a36Sopenharmony_ci{ 147462306a36Sopenharmony_ci verify_osd_locked(osd); 147562306a36Sopenharmony_ci WARN_ON(!req->r_tid || req->r_osd); 147662306a36Sopenharmony_ci dout("%s osd %p osd%d req %p tid %llu\n", __func__, osd, osd->o_osd, 147762306a36Sopenharmony_ci req, req->r_tid); 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci if (!osd_homeless(osd)) 148062306a36Sopenharmony_ci __remove_osd_from_lru(osd); 148162306a36Sopenharmony_ci else 148262306a36Sopenharmony_ci atomic_inc(&osd->o_osdc->num_homeless); 148362306a36Sopenharmony_ci 148462306a36Sopenharmony_ci get_osd(osd); 148562306a36Sopenharmony_ci spin_lock(&osd->o_requests_lock); 148662306a36Sopenharmony_ci insert_request(&osd->o_requests, req); 148762306a36Sopenharmony_ci spin_unlock(&osd->o_requests_lock); 148862306a36Sopenharmony_ci req->r_osd = osd; 148962306a36Sopenharmony_ci} 149062306a36Sopenharmony_ci 149162306a36Sopenharmony_cistatic void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req) 149262306a36Sopenharmony_ci{ 149362306a36Sopenharmony_ci verify_osd_locked(osd); 149462306a36Sopenharmony_ci WARN_ON(req->r_osd != osd); 149562306a36Sopenharmony_ci dout("%s osd %p osd%d req %p tid %llu\n", __func__, osd, osd->o_osd, 149662306a36Sopenharmony_ci req, req->r_tid); 149762306a36Sopenharmony_ci 149862306a36Sopenharmony_ci req->r_osd = NULL; 149962306a36Sopenharmony_ci spin_lock(&osd->o_requests_lock); 150062306a36Sopenharmony_ci erase_request(&osd->o_requests, req); 150162306a36Sopenharmony_ci spin_unlock(&osd->o_requests_lock); 150262306a36Sopenharmony_ci put_osd(osd); 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_ci if (!osd_homeless(osd)) 150562306a36Sopenharmony_ci maybe_move_osd_to_lru(osd); 150662306a36Sopenharmony_ci else 150762306a36Sopenharmony_ci atomic_dec(&osd->o_osdc->num_homeless); 150862306a36Sopenharmony_ci} 150962306a36Sopenharmony_ci 151062306a36Sopenharmony_cistatic bool __pool_full(struct ceph_pg_pool_info *pi) 151162306a36Sopenharmony_ci{ 151262306a36Sopenharmony_ci return pi->flags & CEPH_POOL_FLAG_FULL; 151362306a36Sopenharmony_ci} 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_cistatic bool have_pool_full(struct ceph_osd_client *osdc) 151662306a36Sopenharmony_ci{ 151762306a36Sopenharmony_ci struct rb_node *n; 151862306a36Sopenharmony_ci 151962306a36Sopenharmony_ci for (n = rb_first(&osdc->osdmap->pg_pools); n; n = rb_next(n)) { 152062306a36Sopenharmony_ci struct ceph_pg_pool_info *pi = 152162306a36Sopenharmony_ci rb_entry(n, struct ceph_pg_pool_info, node); 152262306a36Sopenharmony_ci 152362306a36Sopenharmony_ci if (__pool_full(pi)) 152462306a36Sopenharmony_ci return true; 152562306a36Sopenharmony_ci } 152662306a36Sopenharmony_ci 152762306a36Sopenharmony_ci return false; 152862306a36Sopenharmony_ci} 152962306a36Sopenharmony_ci 153062306a36Sopenharmony_cistatic bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) 153162306a36Sopenharmony_ci{ 153262306a36Sopenharmony_ci struct ceph_pg_pool_info *pi; 153362306a36Sopenharmony_ci 153462306a36Sopenharmony_ci pi = ceph_pg_pool_by_id(osdc->osdmap, pool_id); 153562306a36Sopenharmony_ci if (!pi) 153662306a36Sopenharmony_ci return false; 153762306a36Sopenharmony_ci 153862306a36Sopenharmony_ci return __pool_full(pi); 153962306a36Sopenharmony_ci} 154062306a36Sopenharmony_ci 154162306a36Sopenharmony_ci/* 154262306a36Sopenharmony_ci * Returns whether a request should be blocked from being sent 154362306a36Sopenharmony_ci * based on the current osdmap and osd_client settings. 154462306a36Sopenharmony_ci */ 154562306a36Sopenharmony_cistatic bool target_should_be_paused(struct ceph_osd_client *osdc, 154662306a36Sopenharmony_ci const struct ceph_osd_request_target *t, 154762306a36Sopenharmony_ci struct ceph_pg_pool_info *pi) 154862306a36Sopenharmony_ci{ 154962306a36Sopenharmony_ci bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); 155062306a36Sopenharmony_ci bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || 155162306a36Sopenharmony_ci ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || 155262306a36Sopenharmony_ci __pool_full(pi); 155362306a36Sopenharmony_ci 155462306a36Sopenharmony_ci WARN_ON(pi->id != t->target_oloc.pool); 155562306a36Sopenharmony_ci return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) || 155662306a36Sopenharmony_ci ((t->flags & CEPH_OSD_FLAG_WRITE) && pausewr) || 155762306a36Sopenharmony_ci (osdc->osdmap->epoch < osdc->epoch_barrier); 155862306a36Sopenharmony_ci} 155962306a36Sopenharmony_ci 156062306a36Sopenharmony_cistatic int pick_random_replica(const struct ceph_osds *acting) 156162306a36Sopenharmony_ci{ 156262306a36Sopenharmony_ci int i = get_random_u32_below(acting->size); 156362306a36Sopenharmony_ci 156462306a36Sopenharmony_ci dout("%s picked osd%d, primary osd%d\n", __func__, 156562306a36Sopenharmony_ci acting->osds[i], acting->primary); 156662306a36Sopenharmony_ci return i; 156762306a36Sopenharmony_ci} 156862306a36Sopenharmony_ci 156962306a36Sopenharmony_ci/* 157062306a36Sopenharmony_ci * Picks the closest replica based on client's location given by 157162306a36Sopenharmony_ci * crush_location option. Prefers the primary if the locality is 157262306a36Sopenharmony_ci * the same. 157362306a36Sopenharmony_ci */ 157462306a36Sopenharmony_cistatic int pick_closest_replica(struct ceph_osd_client *osdc, 157562306a36Sopenharmony_ci const struct ceph_osds *acting) 157662306a36Sopenharmony_ci{ 157762306a36Sopenharmony_ci struct ceph_options *opt = osdc->client->options; 157862306a36Sopenharmony_ci int best_i, best_locality; 157962306a36Sopenharmony_ci int i = 0, locality; 158062306a36Sopenharmony_ci 158162306a36Sopenharmony_ci do { 158262306a36Sopenharmony_ci locality = ceph_get_crush_locality(osdc->osdmap, 158362306a36Sopenharmony_ci acting->osds[i], 158462306a36Sopenharmony_ci &opt->crush_locs); 158562306a36Sopenharmony_ci if (i == 0 || 158662306a36Sopenharmony_ci (locality >= 0 && best_locality < 0) || 158762306a36Sopenharmony_ci (locality >= 0 && best_locality >= 0 && 158862306a36Sopenharmony_ci locality < best_locality)) { 158962306a36Sopenharmony_ci best_i = i; 159062306a36Sopenharmony_ci best_locality = locality; 159162306a36Sopenharmony_ci } 159262306a36Sopenharmony_ci } while (++i < acting->size); 159362306a36Sopenharmony_ci 159462306a36Sopenharmony_ci dout("%s picked osd%d with locality %d, primary osd%d\n", __func__, 159562306a36Sopenharmony_ci acting->osds[best_i], best_locality, acting->primary); 159662306a36Sopenharmony_ci return best_i; 159762306a36Sopenharmony_ci} 159862306a36Sopenharmony_ci 159962306a36Sopenharmony_cienum calc_target_result { 160062306a36Sopenharmony_ci CALC_TARGET_NO_ACTION = 0, 160162306a36Sopenharmony_ci CALC_TARGET_NEED_RESEND, 160262306a36Sopenharmony_ci CALC_TARGET_POOL_DNE, 160362306a36Sopenharmony_ci}; 160462306a36Sopenharmony_ci 160562306a36Sopenharmony_cistatic enum calc_target_result calc_target(struct ceph_osd_client *osdc, 160662306a36Sopenharmony_ci struct ceph_osd_request_target *t, 160762306a36Sopenharmony_ci bool any_change) 160862306a36Sopenharmony_ci{ 160962306a36Sopenharmony_ci struct ceph_pg_pool_info *pi; 161062306a36Sopenharmony_ci struct ceph_pg pgid, last_pgid; 161162306a36Sopenharmony_ci struct ceph_osds up, acting; 161262306a36Sopenharmony_ci bool is_read = t->flags & CEPH_OSD_FLAG_READ; 161362306a36Sopenharmony_ci bool is_write = t->flags & CEPH_OSD_FLAG_WRITE; 161462306a36Sopenharmony_ci bool force_resend = false; 161562306a36Sopenharmony_ci bool unpaused = false; 161662306a36Sopenharmony_ci bool legacy_change = false; 161762306a36Sopenharmony_ci bool split = false; 161862306a36Sopenharmony_ci bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); 161962306a36Sopenharmony_ci bool recovery_deletes = ceph_osdmap_flag(osdc, 162062306a36Sopenharmony_ci CEPH_OSDMAP_RECOVERY_DELETES); 162162306a36Sopenharmony_ci enum calc_target_result ct_res; 162262306a36Sopenharmony_ci 162362306a36Sopenharmony_ci t->epoch = osdc->osdmap->epoch; 162462306a36Sopenharmony_ci pi = ceph_pg_pool_by_id(osdc->osdmap, t->base_oloc.pool); 162562306a36Sopenharmony_ci if (!pi) { 162662306a36Sopenharmony_ci t->osd = CEPH_HOMELESS_OSD; 162762306a36Sopenharmony_ci ct_res = CALC_TARGET_POOL_DNE; 162862306a36Sopenharmony_ci goto out; 162962306a36Sopenharmony_ci } 163062306a36Sopenharmony_ci 163162306a36Sopenharmony_ci if (osdc->osdmap->epoch == pi->last_force_request_resend) { 163262306a36Sopenharmony_ci if (t->last_force_resend < pi->last_force_request_resend) { 163362306a36Sopenharmony_ci t->last_force_resend = pi->last_force_request_resend; 163462306a36Sopenharmony_ci force_resend = true; 163562306a36Sopenharmony_ci } else if (t->last_force_resend == 0) { 163662306a36Sopenharmony_ci force_resend = true; 163762306a36Sopenharmony_ci } 163862306a36Sopenharmony_ci } 163962306a36Sopenharmony_ci 164062306a36Sopenharmony_ci /* apply tiering */ 164162306a36Sopenharmony_ci ceph_oid_copy(&t->target_oid, &t->base_oid); 164262306a36Sopenharmony_ci ceph_oloc_copy(&t->target_oloc, &t->base_oloc); 164362306a36Sopenharmony_ci if ((t->flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) { 164462306a36Sopenharmony_ci if (is_read && pi->read_tier >= 0) 164562306a36Sopenharmony_ci t->target_oloc.pool = pi->read_tier; 164662306a36Sopenharmony_ci if (is_write && pi->write_tier >= 0) 164762306a36Sopenharmony_ci t->target_oloc.pool = pi->write_tier; 164862306a36Sopenharmony_ci 164962306a36Sopenharmony_ci pi = ceph_pg_pool_by_id(osdc->osdmap, t->target_oloc.pool); 165062306a36Sopenharmony_ci if (!pi) { 165162306a36Sopenharmony_ci t->osd = CEPH_HOMELESS_OSD; 165262306a36Sopenharmony_ci ct_res = CALC_TARGET_POOL_DNE; 165362306a36Sopenharmony_ci goto out; 165462306a36Sopenharmony_ci } 165562306a36Sopenharmony_ci } 165662306a36Sopenharmony_ci 165762306a36Sopenharmony_ci __ceph_object_locator_to_pg(pi, &t->target_oid, &t->target_oloc, &pgid); 165862306a36Sopenharmony_ci last_pgid.pool = pgid.pool; 165962306a36Sopenharmony_ci last_pgid.seed = ceph_stable_mod(pgid.seed, t->pg_num, t->pg_num_mask); 166062306a36Sopenharmony_ci 166162306a36Sopenharmony_ci ceph_pg_to_up_acting_osds(osdc->osdmap, pi, &pgid, &up, &acting); 166262306a36Sopenharmony_ci if (any_change && 166362306a36Sopenharmony_ci ceph_is_new_interval(&t->acting, 166462306a36Sopenharmony_ci &acting, 166562306a36Sopenharmony_ci &t->up, 166662306a36Sopenharmony_ci &up, 166762306a36Sopenharmony_ci t->size, 166862306a36Sopenharmony_ci pi->size, 166962306a36Sopenharmony_ci t->min_size, 167062306a36Sopenharmony_ci pi->min_size, 167162306a36Sopenharmony_ci t->pg_num, 167262306a36Sopenharmony_ci pi->pg_num, 167362306a36Sopenharmony_ci t->sort_bitwise, 167462306a36Sopenharmony_ci sort_bitwise, 167562306a36Sopenharmony_ci t->recovery_deletes, 167662306a36Sopenharmony_ci recovery_deletes, 167762306a36Sopenharmony_ci &last_pgid)) 167862306a36Sopenharmony_ci force_resend = true; 167962306a36Sopenharmony_ci 168062306a36Sopenharmony_ci if (t->paused && !target_should_be_paused(osdc, t, pi)) { 168162306a36Sopenharmony_ci t->paused = false; 168262306a36Sopenharmony_ci unpaused = true; 168362306a36Sopenharmony_ci } 168462306a36Sopenharmony_ci legacy_change = ceph_pg_compare(&t->pgid, &pgid) || 168562306a36Sopenharmony_ci ceph_osds_changed(&t->acting, &acting, 168662306a36Sopenharmony_ci t->used_replica || any_change); 168762306a36Sopenharmony_ci if (t->pg_num) 168862306a36Sopenharmony_ci split = ceph_pg_is_split(&last_pgid, t->pg_num, pi->pg_num); 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci if (legacy_change || force_resend || split) { 169162306a36Sopenharmony_ci t->pgid = pgid; /* struct */ 169262306a36Sopenharmony_ci ceph_pg_to_primary_shard(osdc->osdmap, pi, &pgid, &t->spgid); 169362306a36Sopenharmony_ci ceph_osds_copy(&t->acting, &acting); 169462306a36Sopenharmony_ci ceph_osds_copy(&t->up, &up); 169562306a36Sopenharmony_ci t->size = pi->size; 169662306a36Sopenharmony_ci t->min_size = pi->min_size; 169762306a36Sopenharmony_ci t->pg_num = pi->pg_num; 169862306a36Sopenharmony_ci t->pg_num_mask = pi->pg_num_mask; 169962306a36Sopenharmony_ci t->sort_bitwise = sort_bitwise; 170062306a36Sopenharmony_ci t->recovery_deletes = recovery_deletes; 170162306a36Sopenharmony_ci 170262306a36Sopenharmony_ci if ((t->flags & (CEPH_OSD_FLAG_BALANCE_READS | 170362306a36Sopenharmony_ci CEPH_OSD_FLAG_LOCALIZE_READS)) && 170462306a36Sopenharmony_ci !is_write && pi->type == CEPH_POOL_TYPE_REP && 170562306a36Sopenharmony_ci acting.size > 1) { 170662306a36Sopenharmony_ci int pos; 170762306a36Sopenharmony_ci 170862306a36Sopenharmony_ci WARN_ON(!is_read || acting.osds[0] != acting.primary); 170962306a36Sopenharmony_ci if (t->flags & CEPH_OSD_FLAG_BALANCE_READS) { 171062306a36Sopenharmony_ci pos = pick_random_replica(&acting); 171162306a36Sopenharmony_ci } else { 171262306a36Sopenharmony_ci pos = pick_closest_replica(osdc, &acting); 171362306a36Sopenharmony_ci } 171462306a36Sopenharmony_ci t->osd = acting.osds[pos]; 171562306a36Sopenharmony_ci t->used_replica = pos > 0; 171662306a36Sopenharmony_ci } else { 171762306a36Sopenharmony_ci t->osd = acting.primary; 171862306a36Sopenharmony_ci t->used_replica = false; 171962306a36Sopenharmony_ci } 172062306a36Sopenharmony_ci } 172162306a36Sopenharmony_ci 172262306a36Sopenharmony_ci if (unpaused || legacy_change || force_resend || split) 172362306a36Sopenharmony_ci ct_res = CALC_TARGET_NEED_RESEND; 172462306a36Sopenharmony_ci else 172562306a36Sopenharmony_ci ct_res = CALC_TARGET_NO_ACTION; 172662306a36Sopenharmony_ci 172762306a36Sopenharmony_ciout: 172862306a36Sopenharmony_ci dout("%s t %p -> %d%d%d%d ct_res %d osd%d\n", __func__, t, unpaused, 172962306a36Sopenharmony_ci legacy_change, force_resend, split, ct_res, t->osd); 173062306a36Sopenharmony_ci return ct_res; 173162306a36Sopenharmony_ci} 173262306a36Sopenharmony_ci 173362306a36Sopenharmony_cistatic struct ceph_spg_mapping *alloc_spg_mapping(void) 173462306a36Sopenharmony_ci{ 173562306a36Sopenharmony_ci struct ceph_spg_mapping *spg; 173662306a36Sopenharmony_ci 173762306a36Sopenharmony_ci spg = kmalloc(sizeof(*spg), GFP_NOIO); 173862306a36Sopenharmony_ci if (!spg) 173962306a36Sopenharmony_ci return NULL; 174062306a36Sopenharmony_ci 174162306a36Sopenharmony_ci RB_CLEAR_NODE(&spg->node); 174262306a36Sopenharmony_ci spg->backoffs = RB_ROOT; 174362306a36Sopenharmony_ci return spg; 174462306a36Sopenharmony_ci} 174562306a36Sopenharmony_ci 174662306a36Sopenharmony_cistatic void free_spg_mapping(struct ceph_spg_mapping *spg) 174762306a36Sopenharmony_ci{ 174862306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_NODE(&spg->node)); 174962306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT(&spg->backoffs)); 175062306a36Sopenharmony_ci 175162306a36Sopenharmony_ci kfree(spg); 175262306a36Sopenharmony_ci} 175362306a36Sopenharmony_ci 175462306a36Sopenharmony_ci/* 175562306a36Sopenharmony_ci * rbtree of ceph_spg_mapping for handling map<spg_t, ...>, similar to 175662306a36Sopenharmony_ci * ceph_pg_mapping. Used to track OSD backoffs -- a backoff [range] is 175762306a36Sopenharmony_ci * defined only within a specific spgid; it does not pass anything to 175862306a36Sopenharmony_ci * children on split, or to another primary. 175962306a36Sopenharmony_ci */ 176062306a36Sopenharmony_ciDEFINE_RB_FUNCS2(spg_mapping, struct ceph_spg_mapping, spgid, ceph_spg_compare, 176162306a36Sopenharmony_ci RB_BYPTR, const struct ceph_spg *, node) 176262306a36Sopenharmony_ci 176362306a36Sopenharmony_cistatic u64 hoid_get_bitwise_key(const struct ceph_hobject_id *hoid) 176462306a36Sopenharmony_ci{ 176562306a36Sopenharmony_ci return hoid->is_max ? 0x100000000ull : hoid->hash_reverse_bits; 176662306a36Sopenharmony_ci} 176762306a36Sopenharmony_ci 176862306a36Sopenharmony_cistatic void hoid_get_effective_key(const struct ceph_hobject_id *hoid, 176962306a36Sopenharmony_ci void **pkey, size_t *pkey_len) 177062306a36Sopenharmony_ci{ 177162306a36Sopenharmony_ci if (hoid->key_len) { 177262306a36Sopenharmony_ci *pkey = hoid->key; 177362306a36Sopenharmony_ci *pkey_len = hoid->key_len; 177462306a36Sopenharmony_ci } else { 177562306a36Sopenharmony_ci *pkey = hoid->oid; 177662306a36Sopenharmony_ci *pkey_len = hoid->oid_len; 177762306a36Sopenharmony_ci } 177862306a36Sopenharmony_ci} 177962306a36Sopenharmony_ci 178062306a36Sopenharmony_cistatic int compare_names(const void *name1, size_t name1_len, 178162306a36Sopenharmony_ci const void *name2, size_t name2_len) 178262306a36Sopenharmony_ci{ 178362306a36Sopenharmony_ci int ret; 178462306a36Sopenharmony_ci 178562306a36Sopenharmony_ci ret = memcmp(name1, name2, min(name1_len, name2_len)); 178662306a36Sopenharmony_ci if (!ret) { 178762306a36Sopenharmony_ci if (name1_len < name2_len) 178862306a36Sopenharmony_ci ret = -1; 178962306a36Sopenharmony_ci else if (name1_len > name2_len) 179062306a36Sopenharmony_ci ret = 1; 179162306a36Sopenharmony_ci } 179262306a36Sopenharmony_ci return ret; 179362306a36Sopenharmony_ci} 179462306a36Sopenharmony_ci 179562306a36Sopenharmony_cistatic int hoid_compare(const struct ceph_hobject_id *lhs, 179662306a36Sopenharmony_ci const struct ceph_hobject_id *rhs) 179762306a36Sopenharmony_ci{ 179862306a36Sopenharmony_ci void *effective_key1, *effective_key2; 179962306a36Sopenharmony_ci size_t effective_key1_len, effective_key2_len; 180062306a36Sopenharmony_ci int ret; 180162306a36Sopenharmony_ci 180262306a36Sopenharmony_ci if (lhs->is_max < rhs->is_max) 180362306a36Sopenharmony_ci return -1; 180462306a36Sopenharmony_ci if (lhs->is_max > rhs->is_max) 180562306a36Sopenharmony_ci return 1; 180662306a36Sopenharmony_ci 180762306a36Sopenharmony_ci if (lhs->pool < rhs->pool) 180862306a36Sopenharmony_ci return -1; 180962306a36Sopenharmony_ci if (lhs->pool > rhs->pool) 181062306a36Sopenharmony_ci return 1; 181162306a36Sopenharmony_ci 181262306a36Sopenharmony_ci if (hoid_get_bitwise_key(lhs) < hoid_get_bitwise_key(rhs)) 181362306a36Sopenharmony_ci return -1; 181462306a36Sopenharmony_ci if (hoid_get_bitwise_key(lhs) > hoid_get_bitwise_key(rhs)) 181562306a36Sopenharmony_ci return 1; 181662306a36Sopenharmony_ci 181762306a36Sopenharmony_ci ret = compare_names(lhs->nspace, lhs->nspace_len, 181862306a36Sopenharmony_ci rhs->nspace, rhs->nspace_len); 181962306a36Sopenharmony_ci if (ret) 182062306a36Sopenharmony_ci return ret; 182162306a36Sopenharmony_ci 182262306a36Sopenharmony_ci hoid_get_effective_key(lhs, &effective_key1, &effective_key1_len); 182362306a36Sopenharmony_ci hoid_get_effective_key(rhs, &effective_key2, &effective_key2_len); 182462306a36Sopenharmony_ci ret = compare_names(effective_key1, effective_key1_len, 182562306a36Sopenharmony_ci effective_key2, effective_key2_len); 182662306a36Sopenharmony_ci if (ret) 182762306a36Sopenharmony_ci return ret; 182862306a36Sopenharmony_ci 182962306a36Sopenharmony_ci ret = compare_names(lhs->oid, lhs->oid_len, rhs->oid, rhs->oid_len); 183062306a36Sopenharmony_ci if (ret) 183162306a36Sopenharmony_ci return ret; 183262306a36Sopenharmony_ci 183362306a36Sopenharmony_ci if (lhs->snapid < rhs->snapid) 183462306a36Sopenharmony_ci return -1; 183562306a36Sopenharmony_ci if (lhs->snapid > rhs->snapid) 183662306a36Sopenharmony_ci return 1; 183762306a36Sopenharmony_ci 183862306a36Sopenharmony_ci return 0; 183962306a36Sopenharmony_ci} 184062306a36Sopenharmony_ci 184162306a36Sopenharmony_ci/* 184262306a36Sopenharmony_ci * For decoding ->begin and ->end of MOSDBackoff only -- no MIN/MAX 184362306a36Sopenharmony_ci * compat stuff here. 184462306a36Sopenharmony_ci * 184562306a36Sopenharmony_ci * Assumes @hoid is zero-initialized. 184662306a36Sopenharmony_ci */ 184762306a36Sopenharmony_cistatic int decode_hoid(void **p, void *end, struct ceph_hobject_id *hoid) 184862306a36Sopenharmony_ci{ 184962306a36Sopenharmony_ci u8 struct_v; 185062306a36Sopenharmony_ci u32 struct_len; 185162306a36Sopenharmony_ci int ret; 185262306a36Sopenharmony_ci 185362306a36Sopenharmony_ci ret = ceph_start_decoding(p, end, 4, "hobject_t", &struct_v, 185462306a36Sopenharmony_ci &struct_len); 185562306a36Sopenharmony_ci if (ret) 185662306a36Sopenharmony_ci return ret; 185762306a36Sopenharmony_ci 185862306a36Sopenharmony_ci if (struct_v < 4) { 185962306a36Sopenharmony_ci pr_err("got struct_v %d < 4 of hobject_t\n", struct_v); 186062306a36Sopenharmony_ci goto e_inval; 186162306a36Sopenharmony_ci } 186262306a36Sopenharmony_ci 186362306a36Sopenharmony_ci hoid->key = ceph_extract_encoded_string(p, end, &hoid->key_len, 186462306a36Sopenharmony_ci GFP_NOIO); 186562306a36Sopenharmony_ci if (IS_ERR(hoid->key)) { 186662306a36Sopenharmony_ci ret = PTR_ERR(hoid->key); 186762306a36Sopenharmony_ci hoid->key = NULL; 186862306a36Sopenharmony_ci return ret; 186962306a36Sopenharmony_ci } 187062306a36Sopenharmony_ci 187162306a36Sopenharmony_ci hoid->oid = ceph_extract_encoded_string(p, end, &hoid->oid_len, 187262306a36Sopenharmony_ci GFP_NOIO); 187362306a36Sopenharmony_ci if (IS_ERR(hoid->oid)) { 187462306a36Sopenharmony_ci ret = PTR_ERR(hoid->oid); 187562306a36Sopenharmony_ci hoid->oid = NULL; 187662306a36Sopenharmony_ci return ret; 187762306a36Sopenharmony_ci } 187862306a36Sopenharmony_ci 187962306a36Sopenharmony_ci ceph_decode_64_safe(p, end, hoid->snapid, e_inval); 188062306a36Sopenharmony_ci ceph_decode_32_safe(p, end, hoid->hash, e_inval); 188162306a36Sopenharmony_ci ceph_decode_8_safe(p, end, hoid->is_max, e_inval); 188262306a36Sopenharmony_ci 188362306a36Sopenharmony_ci hoid->nspace = ceph_extract_encoded_string(p, end, &hoid->nspace_len, 188462306a36Sopenharmony_ci GFP_NOIO); 188562306a36Sopenharmony_ci if (IS_ERR(hoid->nspace)) { 188662306a36Sopenharmony_ci ret = PTR_ERR(hoid->nspace); 188762306a36Sopenharmony_ci hoid->nspace = NULL; 188862306a36Sopenharmony_ci return ret; 188962306a36Sopenharmony_ci } 189062306a36Sopenharmony_ci 189162306a36Sopenharmony_ci ceph_decode_64_safe(p, end, hoid->pool, e_inval); 189262306a36Sopenharmony_ci 189362306a36Sopenharmony_ci ceph_hoid_build_hash_cache(hoid); 189462306a36Sopenharmony_ci return 0; 189562306a36Sopenharmony_ci 189662306a36Sopenharmony_cie_inval: 189762306a36Sopenharmony_ci return -EINVAL; 189862306a36Sopenharmony_ci} 189962306a36Sopenharmony_ci 190062306a36Sopenharmony_cistatic int hoid_encoding_size(const struct ceph_hobject_id *hoid) 190162306a36Sopenharmony_ci{ 190262306a36Sopenharmony_ci return 8 + 4 + 1 + 8 + /* snapid, hash, is_max, pool */ 190362306a36Sopenharmony_ci 4 + hoid->key_len + 4 + hoid->oid_len + 4 + hoid->nspace_len; 190462306a36Sopenharmony_ci} 190562306a36Sopenharmony_ci 190662306a36Sopenharmony_cistatic void encode_hoid(void **p, void *end, const struct ceph_hobject_id *hoid) 190762306a36Sopenharmony_ci{ 190862306a36Sopenharmony_ci ceph_start_encoding(p, 4, 3, hoid_encoding_size(hoid)); 190962306a36Sopenharmony_ci ceph_encode_string(p, end, hoid->key, hoid->key_len); 191062306a36Sopenharmony_ci ceph_encode_string(p, end, hoid->oid, hoid->oid_len); 191162306a36Sopenharmony_ci ceph_encode_64(p, hoid->snapid); 191262306a36Sopenharmony_ci ceph_encode_32(p, hoid->hash); 191362306a36Sopenharmony_ci ceph_encode_8(p, hoid->is_max); 191462306a36Sopenharmony_ci ceph_encode_string(p, end, hoid->nspace, hoid->nspace_len); 191562306a36Sopenharmony_ci ceph_encode_64(p, hoid->pool); 191662306a36Sopenharmony_ci} 191762306a36Sopenharmony_ci 191862306a36Sopenharmony_cistatic void free_hoid(struct ceph_hobject_id *hoid) 191962306a36Sopenharmony_ci{ 192062306a36Sopenharmony_ci if (hoid) { 192162306a36Sopenharmony_ci kfree(hoid->key); 192262306a36Sopenharmony_ci kfree(hoid->oid); 192362306a36Sopenharmony_ci kfree(hoid->nspace); 192462306a36Sopenharmony_ci kfree(hoid); 192562306a36Sopenharmony_ci } 192662306a36Sopenharmony_ci} 192762306a36Sopenharmony_ci 192862306a36Sopenharmony_cistatic struct ceph_osd_backoff *alloc_backoff(void) 192962306a36Sopenharmony_ci{ 193062306a36Sopenharmony_ci struct ceph_osd_backoff *backoff; 193162306a36Sopenharmony_ci 193262306a36Sopenharmony_ci backoff = kzalloc(sizeof(*backoff), GFP_NOIO); 193362306a36Sopenharmony_ci if (!backoff) 193462306a36Sopenharmony_ci return NULL; 193562306a36Sopenharmony_ci 193662306a36Sopenharmony_ci RB_CLEAR_NODE(&backoff->spg_node); 193762306a36Sopenharmony_ci RB_CLEAR_NODE(&backoff->id_node); 193862306a36Sopenharmony_ci return backoff; 193962306a36Sopenharmony_ci} 194062306a36Sopenharmony_ci 194162306a36Sopenharmony_cistatic void free_backoff(struct ceph_osd_backoff *backoff) 194262306a36Sopenharmony_ci{ 194362306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_NODE(&backoff->spg_node)); 194462306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_NODE(&backoff->id_node)); 194562306a36Sopenharmony_ci 194662306a36Sopenharmony_ci free_hoid(backoff->begin); 194762306a36Sopenharmony_ci free_hoid(backoff->end); 194862306a36Sopenharmony_ci kfree(backoff); 194962306a36Sopenharmony_ci} 195062306a36Sopenharmony_ci 195162306a36Sopenharmony_ci/* 195262306a36Sopenharmony_ci * Within a specific spgid, backoffs are managed by ->begin hoid. 195362306a36Sopenharmony_ci */ 195462306a36Sopenharmony_ciDEFINE_RB_INSDEL_FUNCS2(backoff, struct ceph_osd_backoff, begin, hoid_compare, 195562306a36Sopenharmony_ci RB_BYVAL, spg_node); 195662306a36Sopenharmony_ci 195762306a36Sopenharmony_cistatic struct ceph_osd_backoff *lookup_containing_backoff(struct rb_root *root, 195862306a36Sopenharmony_ci const struct ceph_hobject_id *hoid) 195962306a36Sopenharmony_ci{ 196062306a36Sopenharmony_ci struct rb_node *n = root->rb_node; 196162306a36Sopenharmony_ci 196262306a36Sopenharmony_ci while (n) { 196362306a36Sopenharmony_ci struct ceph_osd_backoff *cur = 196462306a36Sopenharmony_ci rb_entry(n, struct ceph_osd_backoff, spg_node); 196562306a36Sopenharmony_ci int cmp; 196662306a36Sopenharmony_ci 196762306a36Sopenharmony_ci cmp = hoid_compare(hoid, cur->begin); 196862306a36Sopenharmony_ci if (cmp < 0) { 196962306a36Sopenharmony_ci n = n->rb_left; 197062306a36Sopenharmony_ci } else if (cmp > 0) { 197162306a36Sopenharmony_ci if (hoid_compare(hoid, cur->end) < 0) 197262306a36Sopenharmony_ci return cur; 197362306a36Sopenharmony_ci 197462306a36Sopenharmony_ci n = n->rb_right; 197562306a36Sopenharmony_ci } else { 197662306a36Sopenharmony_ci return cur; 197762306a36Sopenharmony_ci } 197862306a36Sopenharmony_ci } 197962306a36Sopenharmony_ci 198062306a36Sopenharmony_ci return NULL; 198162306a36Sopenharmony_ci} 198262306a36Sopenharmony_ci 198362306a36Sopenharmony_ci/* 198462306a36Sopenharmony_ci * Each backoff has a unique id within its OSD session. 198562306a36Sopenharmony_ci */ 198662306a36Sopenharmony_ciDEFINE_RB_FUNCS(backoff_by_id, struct ceph_osd_backoff, id, id_node) 198762306a36Sopenharmony_ci 198862306a36Sopenharmony_cistatic void clear_backoffs(struct ceph_osd *osd) 198962306a36Sopenharmony_ci{ 199062306a36Sopenharmony_ci while (!RB_EMPTY_ROOT(&osd->o_backoff_mappings)) { 199162306a36Sopenharmony_ci struct ceph_spg_mapping *spg = 199262306a36Sopenharmony_ci rb_entry(rb_first(&osd->o_backoff_mappings), 199362306a36Sopenharmony_ci struct ceph_spg_mapping, node); 199462306a36Sopenharmony_ci 199562306a36Sopenharmony_ci while (!RB_EMPTY_ROOT(&spg->backoffs)) { 199662306a36Sopenharmony_ci struct ceph_osd_backoff *backoff = 199762306a36Sopenharmony_ci rb_entry(rb_first(&spg->backoffs), 199862306a36Sopenharmony_ci struct ceph_osd_backoff, spg_node); 199962306a36Sopenharmony_ci 200062306a36Sopenharmony_ci erase_backoff(&spg->backoffs, backoff); 200162306a36Sopenharmony_ci erase_backoff_by_id(&osd->o_backoffs_by_id, backoff); 200262306a36Sopenharmony_ci free_backoff(backoff); 200362306a36Sopenharmony_ci } 200462306a36Sopenharmony_ci erase_spg_mapping(&osd->o_backoff_mappings, spg); 200562306a36Sopenharmony_ci free_spg_mapping(spg); 200662306a36Sopenharmony_ci } 200762306a36Sopenharmony_ci} 200862306a36Sopenharmony_ci 200962306a36Sopenharmony_ci/* 201062306a36Sopenharmony_ci * Set up a temporary, non-owning view into @t. 201162306a36Sopenharmony_ci */ 201262306a36Sopenharmony_cistatic void hoid_fill_from_target(struct ceph_hobject_id *hoid, 201362306a36Sopenharmony_ci const struct ceph_osd_request_target *t) 201462306a36Sopenharmony_ci{ 201562306a36Sopenharmony_ci hoid->key = NULL; 201662306a36Sopenharmony_ci hoid->key_len = 0; 201762306a36Sopenharmony_ci hoid->oid = t->target_oid.name; 201862306a36Sopenharmony_ci hoid->oid_len = t->target_oid.name_len; 201962306a36Sopenharmony_ci hoid->snapid = CEPH_NOSNAP; 202062306a36Sopenharmony_ci hoid->hash = t->pgid.seed; 202162306a36Sopenharmony_ci hoid->is_max = false; 202262306a36Sopenharmony_ci if (t->target_oloc.pool_ns) { 202362306a36Sopenharmony_ci hoid->nspace = t->target_oloc.pool_ns->str; 202462306a36Sopenharmony_ci hoid->nspace_len = t->target_oloc.pool_ns->len; 202562306a36Sopenharmony_ci } else { 202662306a36Sopenharmony_ci hoid->nspace = NULL; 202762306a36Sopenharmony_ci hoid->nspace_len = 0; 202862306a36Sopenharmony_ci } 202962306a36Sopenharmony_ci hoid->pool = t->target_oloc.pool; 203062306a36Sopenharmony_ci ceph_hoid_build_hash_cache(hoid); 203162306a36Sopenharmony_ci} 203262306a36Sopenharmony_ci 203362306a36Sopenharmony_cistatic bool should_plug_request(struct ceph_osd_request *req) 203462306a36Sopenharmony_ci{ 203562306a36Sopenharmony_ci struct ceph_osd *osd = req->r_osd; 203662306a36Sopenharmony_ci struct ceph_spg_mapping *spg; 203762306a36Sopenharmony_ci struct ceph_osd_backoff *backoff; 203862306a36Sopenharmony_ci struct ceph_hobject_id hoid; 203962306a36Sopenharmony_ci 204062306a36Sopenharmony_ci spg = lookup_spg_mapping(&osd->o_backoff_mappings, &req->r_t.spgid); 204162306a36Sopenharmony_ci if (!spg) 204262306a36Sopenharmony_ci return false; 204362306a36Sopenharmony_ci 204462306a36Sopenharmony_ci hoid_fill_from_target(&hoid, &req->r_t); 204562306a36Sopenharmony_ci backoff = lookup_containing_backoff(&spg->backoffs, &hoid); 204662306a36Sopenharmony_ci if (!backoff) 204762306a36Sopenharmony_ci return false; 204862306a36Sopenharmony_ci 204962306a36Sopenharmony_ci dout("%s req %p tid %llu backoff osd%d spgid %llu.%xs%d id %llu\n", 205062306a36Sopenharmony_ci __func__, req, req->r_tid, osd->o_osd, backoff->spgid.pgid.pool, 205162306a36Sopenharmony_ci backoff->spgid.pgid.seed, backoff->spgid.shard, backoff->id); 205262306a36Sopenharmony_ci return true; 205362306a36Sopenharmony_ci} 205462306a36Sopenharmony_ci 205562306a36Sopenharmony_ci/* 205662306a36Sopenharmony_ci * Keep get_num_data_items() in sync with this function. 205762306a36Sopenharmony_ci */ 205862306a36Sopenharmony_cistatic void setup_request_data(struct ceph_osd_request *req) 205962306a36Sopenharmony_ci{ 206062306a36Sopenharmony_ci struct ceph_msg *request_msg = req->r_request; 206162306a36Sopenharmony_ci struct ceph_msg *reply_msg = req->r_reply; 206262306a36Sopenharmony_ci struct ceph_osd_req_op *op; 206362306a36Sopenharmony_ci 206462306a36Sopenharmony_ci if (req->r_request->num_data_items || req->r_reply->num_data_items) 206562306a36Sopenharmony_ci return; 206662306a36Sopenharmony_ci 206762306a36Sopenharmony_ci WARN_ON(request_msg->data_length || reply_msg->data_length); 206862306a36Sopenharmony_ci for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) { 206962306a36Sopenharmony_ci switch (op->op) { 207062306a36Sopenharmony_ci /* request */ 207162306a36Sopenharmony_ci case CEPH_OSD_OP_WRITE: 207262306a36Sopenharmony_ci case CEPH_OSD_OP_WRITEFULL: 207362306a36Sopenharmony_ci WARN_ON(op->indata_len != op->extent.length); 207462306a36Sopenharmony_ci ceph_osdc_msg_data_add(request_msg, 207562306a36Sopenharmony_ci &op->extent.osd_data); 207662306a36Sopenharmony_ci break; 207762306a36Sopenharmony_ci case CEPH_OSD_OP_SETXATTR: 207862306a36Sopenharmony_ci case CEPH_OSD_OP_CMPXATTR: 207962306a36Sopenharmony_ci WARN_ON(op->indata_len != op->xattr.name_len + 208062306a36Sopenharmony_ci op->xattr.value_len); 208162306a36Sopenharmony_ci ceph_osdc_msg_data_add(request_msg, 208262306a36Sopenharmony_ci &op->xattr.osd_data); 208362306a36Sopenharmony_ci break; 208462306a36Sopenharmony_ci case CEPH_OSD_OP_NOTIFY_ACK: 208562306a36Sopenharmony_ci ceph_osdc_msg_data_add(request_msg, 208662306a36Sopenharmony_ci &op->notify_ack.request_data); 208762306a36Sopenharmony_ci break; 208862306a36Sopenharmony_ci case CEPH_OSD_OP_COPY_FROM2: 208962306a36Sopenharmony_ci ceph_osdc_msg_data_add(request_msg, 209062306a36Sopenharmony_ci &op->copy_from.osd_data); 209162306a36Sopenharmony_ci break; 209262306a36Sopenharmony_ci 209362306a36Sopenharmony_ci /* reply */ 209462306a36Sopenharmony_ci case CEPH_OSD_OP_STAT: 209562306a36Sopenharmony_ci ceph_osdc_msg_data_add(reply_msg, 209662306a36Sopenharmony_ci &op->raw_data_in); 209762306a36Sopenharmony_ci break; 209862306a36Sopenharmony_ci case CEPH_OSD_OP_READ: 209962306a36Sopenharmony_ci case CEPH_OSD_OP_SPARSE_READ: 210062306a36Sopenharmony_ci ceph_osdc_msg_data_add(reply_msg, 210162306a36Sopenharmony_ci &op->extent.osd_data); 210262306a36Sopenharmony_ci break; 210362306a36Sopenharmony_ci case CEPH_OSD_OP_LIST_WATCHERS: 210462306a36Sopenharmony_ci ceph_osdc_msg_data_add(reply_msg, 210562306a36Sopenharmony_ci &op->list_watchers.response_data); 210662306a36Sopenharmony_ci break; 210762306a36Sopenharmony_ci 210862306a36Sopenharmony_ci /* both */ 210962306a36Sopenharmony_ci case CEPH_OSD_OP_CALL: 211062306a36Sopenharmony_ci WARN_ON(op->indata_len != op->cls.class_len + 211162306a36Sopenharmony_ci op->cls.method_len + 211262306a36Sopenharmony_ci op->cls.indata_len); 211362306a36Sopenharmony_ci ceph_osdc_msg_data_add(request_msg, 211462306a36Sopenharmony_ci &op->cls.request_info); 211562306a36Sopenharmony_ci /* optional, can be NONE */ 211662306a36Sopenharmony_ci ceph_osdc_msg_data_add(request_msg, 211762306a36Sopenharmony_ci &op->cls.request_data); 211862306a36Sopenharmony_ci /* optional, can be NONE */ 211962306a36Sopenharmony_ci ceph_osdc_msg_data_add(reply_msg, 212062306a36Sopenharmony_ci &op->cls.response_data); 212162306a36Sopenharmony_ci break; 212262306a36Sopenharmony_ci case CEPH_OSD_OP_NOTIFY: 212362306a36Sopenharmony_ci ceph_osdc_msg_data_add(request_msg, 212462306a36Sopenharmony_ci &op->notify.request_data); 212562306a36Sopenharmony_ci ceph_osdc_msg_data_add(reply_msg, 212662306a36Sopenharmony_ci &op->notify.response_data); 212762306a36Sopenharmony_ci break; 212862306a36Sopenharmony_ci } 212962306a36Sopenharmony_ci } 213062306a36Sopenharmony_ci} 213162306a36Sopenharmony_ci 213262306a36Sopenharmony_cistatic void encode_pgid(void **p, const struct ceph_pg *pgid) 213362306a36Sopenharmony_ci{ 213462306a36Sopenharmony_ci ceph_encode_8(p, 1); 213562306a36Sopenharmony_ci ceph_encode_64(p, pgid->pool); 213662306a36Sopenharmony_ci ceph_encode_32(p, pgid->seed); 213762306a36Sopenharmony_ci ceph_encode_32(p, -1); /* preferred */ 213862306a36Sopenharmony_ci} 213962306a36Sopenharmony_ci 214062306a36Sopenharmony_cistatic void encode_spgid(void **p, const struct ceph_spg *spgid) 214162306a36Sopenharmony_ci{ 214262306a36Sopenharmony_ci ceph_start_encoding(p, 1, 1, CEPH_PGID_ENCODING_LEN + 1); 214362306a36Sopenharmony_ci encode_pgid(p, &spgid->pgid); 214462306a36Sopenharmony_ci ceph_encode_8(p, spgid->shard); 214562306a36Sopenharmony_ci} 214662306a36Sopenharmony_ci 214762306a36Sopenharmony_cistatic void encode_oloc(void **p, void *end, 214862306a36Sopenharmony_ci const struct ceph_object_locator *oloc) 214962306a36Sopenharmony_ci{ 215062306a36Sopenharmony_ci ceph_start_encoding(p, 5, 4, ceph_oloc_encoding_size(oloc)); 215162306a36Sopenharmony_ci ceph_encode_64(p, oloc->pool); 215262306a36Sopenharmony_ci ceph_encode_32(p, -1); /* preferred */ 215362306a36Sopenharmony_ci ceph_encode_32(p, 0); /* key len */ 215462306a36Sopenharmony_ci if (oloc->pool_ns) 215562306a36Sopenharmony_ci ceph_encode_string(p, end, oloc->pool_ns->str, 215662306a36Sopenharmony_ci oloc->pool_ns->len); 215762306a36Sopenharmony_ci else 215862306a36Sopenharmony_ci ceph_encode_32(p, 0); 215962306a36Sopenharmony_ci} 216062306a36Sopenharmony_ci 216162306a36Sopenharmony_cistatic void encode_request_partial(struct ceph_osd_request *req, 216262306a36Sopenharmony_ci struct ceph_msg *msg) 216362306a36Sopenharmony_ci{ 216462306a36Sopenharmony_ci void *p = msg->front.iov_base; 216562306a36Sopenharmony_ci void *const end = p + msg->front_alloc_len; 216662306a36Sopenharmony_ci u32 data_len = 0; 216762306a36Sopenharmony_ci int i; 216862306a36Sopenharmony_ci 216962306a36Sopenharmony_ci if (req->r_flags & CEPH_OSD_FLAG_WRITE) { 217062306a36Sopenharmony_ci /* snapshots aren't writeable */ 217162306a36Sopenharmony_ci WARN_ON(req->r_snapid != CEPH_NOSNAP); 217262306a36Sopenharmony_ci } else { 217362306a36Sopenharmony_ci WARN_ON(req->r_mtime.tv_sec || req->r_mtime.tv_nsec || 217462306a36Sopenharmony_ci req->r_data_offset || req->r_snapc); 217562306a36Sopenharmony_ci } 217662306a36Sopenharmony_ci 217762306a36Sopenharmony_ci setup_request_data(req); 217862306a36Sopenharmony_ci 217962306a36Sopenharmony_ci encode_spgid(&p, &req->r_t.spgid); /* actual spg */ 218062306a36Sopenharmony_ci ceph_encode_32(&p, req->r_t.pgid.seed); /* raw hash */ 218162306a36Sopenharmony_ci ceph_encode_32(&p, req->r_osdc->osdmap->epoch); 218262306a36Sopenharmony_ci ceph_encode_32(&p, req->r_flags); 218362306a36Sopenharmony_ci 218462306a36Sopenharmony_ci /* reqid */ 218562306a36Sopenharmony_ci ceph_start_encoding(&p, 2, 2, sizeof(struct ceph_osd_reqid)); 218662306a36Sopenharmony_ci memset(p, 0, sizeof(struct ceph_osd_reqid)); 218762306a36Sopenharmony_ci p += sizeof(struct ceph_osd_reqid); 218862306a36Sopenharmony_ci 218962306a36Sopenharmony_ci /* trace */ 219062306a36Sopenharmony_ci memset(p, 0, sizeof(struct ceph_blkin_trace_info)); 219162306a36Sopenharmony_ci p += sizeof(struct ceph_blkin_trace_info); 219262306a36Sopenharmony_ci 219362306a36Sopenharmony_ci ceph_encode_32(&p, 0); /* client_inc, always 0 */ 219462306a36Sopenharmony_ci ceph_encode_timespec64(p, &req->r_mtime); 219562306a36Sopenharmony_ci p += sizeof(struct ceph_timespec); 219662306a36Sopenharmony_ci 219762306a36Sopenharmony_ci encode_oloc(&p, end, &req->r_t.target_oloc); 219862306a36Sopenharmony_ci ceph_encode_string(&p, end, req->r_t.target_oid.name, 219962306a36Sopenharmony_ci req->r_t.target_oid.name_len); 220062306a36Sopenharmony_ci 220162306a36Sopenharmony_ci /* ops, can imply data */ 220262306a36Sopenharmony_ci ceph_encode_16(&p, req->r_num_ops); 220362306a36Sopenharmony_ci for (i = 0; i < req->r_num_ops; i++) { 220462306a36Sopenharmony_ci data_len += osd_req_encode_op(p, &req->r_ops[i]); 220562306a36Sopenharmony_ci p += sizeof(struct ceph_osd_op); 220662306a36Sopenharmony_ci } 220762306a36Sopenharmony_ci 220862306a36Sopenharmony_ci ceph_encode_64(&p, req->r_snapid); /* snapid */ 220962306a36Sopenharmony_ci if (req->r_snapc) { 221062306a36Sopenharmony_ci ceph_encode_64(&p, req->r_snapc->seq); 221162306a36Sopenharmony_ci ceph_encode_32(&p, req->r_snapc->num_snaps); 221262306a36Sopenharmony_ci for (i = 0; i < req->r_snapc->num_snaps; i++) 221362306a36Sopenharmony_ci ceph_encode_64(&p, req->r_snapc->snaps[i]); 221462306a36Sopenharmony_ci } else { 221562306a36Sopenharmony_ci ceph_encode_64(&p, 0); /* snap_seq */ 221662306a36Sopenharmony_ci ceph_encode_32(&p, 0); /* snaps len */ 221762306a36Sopenharmony_ci } 221862306a36Sopenharmony_ci 221962306a36Sopenharmony_ci ceph_encode_32(&p, req->r_attempts); /* retry_attempt */ 222062306a36Sopenharmony_ci BUG_ON(p > end - 8); /* space for features */ 222162306a36Sopenharmony_ci 222262306a36Sopenharmony_ci msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */ 222362306a36Sopenharmony_ci /* front_len is finalized in encode_request_finish() */ 222462306a36Sopenharmony_ci msg->front.iov_len = p - msg->front.iov_base; 222562306a36Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 222662306a36Sopenharmony_ci msg->hdr.data_len = cpu_to_le32(data_len); 222762306a36Sopenharmony_ci /* 222862306a36Sopenharmony_ci * The header "data_off" is a hint to the receiver allowing it 222962306a36Sopenharmony_ci * to align received data into its buffers such that there's no 223062306a36Sopenharmony_ci * need to re-copy it before writing it to disk (direct I/O). 223162306a36Sopenharmony_ci */ 223262306a36Sopenharmony_ci msg->hdr.data_off = cpu_to_le16(req->r_data_offset); 223362306a36Sopenharmony_ci 223462306a36Sopenharmony_ci dout("%s req %p msg %p oid %s oid_len %d\n", __func__, req, msg, 223562306a36Sopenharmony_ci req->r_t.target_oid.name, req->r_t.target_oid.name_len); 223662306a36Sopenharmony_ci} 223762306a36Sopenharmony_ci 223862306a36Sopenharmony_cistatic void encode_request_finish(struct ceph_msg *msg) 223962306a36Sopenharmony_ci{ 224062306a36Sopenharmony_ci void *p = msg->front.iov_base; 224162306a36Sopenharmony_ci void *const partial_end = p + msg->front.iov_len; 224262306a36Sopenharmony_ci void *const end = p + msg->front_alloc_len; 224362306a36Sopenharmony_ci 224462306a36Sopenharmony_ci if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) { 224562306a36Sopenharmony_ci /* luminous OSD -- encode features and be done */ 224662306a36Sopenharmony_ci p = partial_end; 224762306a36Sopenharmony_ci ceph_encode_64(&p, msg->con->peer_features); 224862306a36Sopenharmony_ci } else { 224962306a36Sopenharmony_ci struct { 225062306a36Sopenharmony_ci char spgid[CEPH_ENCODING_START_BLK_LEN + 225162306a36Sopenharmony_ci CEPH_PGID_ENCODING_LEN + 1]; 225262306a36Sopenharmony_ci __le32 hash; 225362306a36Sopenharmony_ci __le32 epoch; 225462306a36Sopenharmony_ci __le32 flags; 225562306a36Sopenharmony_ci char reqid[CEPH_ENCODING_START_BLK_LEN + 225662306a36Sopenharmony_ci sizeof(struct ceph_osd_reqid)]; 225762306a36Sopenharmony_ci char trace[sizeof(struct ceph_blkin_trace_info)]; 225862306a36Sopenharmony_ci __le32 client_inc; 225962306a36Sopenharmony_ci struct ceph_timespec mtime; 226062306a36Sopenharmony_ci } __packed head; 226162306a36Sopenharmony_ci struct ceph_pg pgid; 226262306a36Sopenharmony_ci void *oloc, *oid, *tail; 226362306a36Sopenharmony_ci int oloc_len, oid_len, tail_len; 226462306a36Sopenharmony_ci int len; 226562306a36Sopenharmony_ci 226662306a36Sopenharmony_ci /* 226762306a36Sopenharmony_ci * Pre-luminous OSD -- reencode v8 into v4 using @head 226862306a36Sopenharmony_ci * as a temporary buffer. Encode the raw PG; the rest 226962306a36Sopenharmony_ci * is just a matter of moving oloc, oid and tail blobs 227062306a36Sopenharmony_ci * around. 227162306a36Sopenharmony_ci */ 227262306a36Sopenharmony_ci memcpy(&head, p, sizeof(head)); 227362306a36Sopenharmony_ci p += sizeof(head); 227462306a36Sopenharmony_ci 227562306a36Sopenharmony_ci oloc = p; 227662306a36Sopenharmony_ci p += CEPH_ENCODING_START_BLK_LEN; 227762306a36Sopenharmony_ci pgid.pool = ceph_decode_64(&p); 227862306a36Sopenharmony_ci p += 4 + 4; /* preferred, key len */ 227962306a36Sopenharmony_ci len = ceph_decode_32(&p); 228062306a36Sopenharmony_ci p += len; /* nspace */ 228162306a36Sopenharmony_ci oloc_len = p - oloc; 228262306a36Sopenharmony_ci 228362306a36Sopenharmony_ci oid = p; 228462306a36Sopenharmony_ci len = ceph_decode_32(&p); 228562306a36Sopenharmony_ci p += len; 228662306a36Sopenharmony_ci oid_len = p - oid; 228762306a36Sopenharmony_ci 228862306a36Sopenharmony_ci tail = p; 228962306a36Sopenharmony_ci tail_len = partial_end - p; 229062306a36Sopenharmony_ci 229162306a36Sopenharmony_ci p = msg->front.iov_base; 229262306a36Sopenharmony_ci ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc)); 229362306a36Sopenharmony_ci ceph_encode_copy(&p, &head.epoch, sizeof(head.epoch)); 229462306a36Sopenharmony_ci ceph_encode_copy(&p, &head.flags, sizeof(head.flags)); 229562306a36Sopenharmony_ci ceph_encode_copy(&p, &head.mtime, sizeof(head.mtime)); 229662306a36Sopenharmony_ci 229762306a36Sopenharmony_ci /* reassert_version */ 229862306a36Sopenharmony_ci memset(p, 0, sizeof(struct ceph_eversion)); 229962306a36Sopenharmony_ci p += sizeof(struct ceph_eversion); 230062306a36Sopenharmony_ci 230162306a36Sopenharmony_ci BUG_ON(p >= oloc); 230262306a36Sopenharmony_ci memmove(p, oloc, oloc_len); 230362306a36Sopenharmony_ci p += oloc_len; 230462306a36Sopenharmony_ci 230562306a36Sopenharmony_ci pgid.seed = le32_to_cpu(head.hash); 230662306a36Sopenharmony_ci encode_pgid(&p, &pgid); /* raw pg */ 230762306a36Sopenharmony_ci 230862306a36Sopenharmony_ci BUG_ON(p >= oid); 230962306a36Sopenharmony_ci memmove(p, oid, oid_len); 231062306a36Sopenharmony_ci p += oid_len; 231162306a36Sopenharmony_ci 231262306a36Sopenharmony_ci /* tail -- ops, snapid, snapc, retry_attempt */ 231362306a36Sopenharmony_ci BUG_ON(p >= tail); 231462306a36Sopenharmony_ci memmove(p, tail, tail_len); 231562306a36Sopenharmony_ci p += tail_len; 231662306a36Sopenharmony_ci 231762306a36Sopenharmony_ci msg->hdr.version = cpu_to_le16(4); /* MOSDOp v4 */ 231862306a36Sopenharmony_ci } 231962306a36Sopenharmony_ci 232062306a36Sopenharmony_ci BUG_ON(p > end); 232162306a36Sopenharmony_ci msg->front.iov_len = p - msg->front.iov_base; 232262306a36Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 232362306a36Sopenharmony_ci 232462306a36Sopenharmony_ci dout("%s msg %p tid %llu %u+%u+%u v%d\n", __func__, msg, 232562306a36Sopenharmony_ci le64_to_cpu(msg->hdr.tid), le32_to_cpu(msg->hdr.front_len), 232662306a36Sopenharmony_ci le32_to_cpu(msg->hdr.middle_len), le32_to_cpu(msg->hdr.data_len), 232762306a36Sopenharmony_ci le16_to_cpu(msg->hdr.version)); 232862306a36Sopenharmony_ci} 232962306a36Sopenharmony_ci 233062306a36Sopenharmony_ci/* 233162306a36Sopenharmony_ci * @req has to be assigned a tid and registered. 233262306a36Sopenharmony_ci */ 233362306a36Sopenharmony_cistatic void send_request(struct ceph_osd_request *req) 233462306a36Sopenharmony_ci{ 233562306a36Sopenharmony_ci struct ceph_osd *osd = req->r_osd; 233662306a36Sopenharmony_ci 233762306a36Sopenharmony_ci verify_osd_locked(osd); 233862306a36Sopenharmony_ci WARN_ON(osd->o_osd != req->r_t.osd); 233962306a36Sopenharmony_ci 234062306a36Sopenharmony_ci /* backoff? */ 234162306a36Sopenharmony_ci if (should_plug_request(req)) 234262306a36Sopenharmony_ci return; 234362306a36Sopenharmony_ci 234462306a36Sopenharmony_ci /* 234562306a36Sopenharmony_ci * We may have a previously queued request message hanging 234662306a36Sopenharmony_ci * around. Cancel it to avoid corrupting the msgr. 234762306a36Sopenharmony_ci */ 234862306a36Sopenharmony_ci if (req->r_sent) 234962306a36Sopenharmony_ci ceph_msg_revoke(req->r_request); 235062306a36Sopenharmony_ci 235162306a36Sopenharmony_ci req->r_flags |= CEPH_OSD_FLAG_KNOWN_REDIR; 235262306a36Sopenharmony_ci if (req->r_attempts) 235362306a36Sopenharmony_ci req->r_flags |= CEPH_OSD_FLAG_RETRY; 235462306a36Sopenharmony_ci else 235562306a36Sopenharmony_ci WARN_ON(req->r_flags & CEPH_OSD_FLAG_RETRY); 235662306a36Sopenharmony_ci 235762306a36Sopenharmony_ci encode_request_partial(req, req->r_request); 235862306a36Sopenharmony_ci 235962306a36Sopenharmony_ci dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d e%u flags 0x%x attempt %d\n", 236062306a36Sopenharmony_ci __func__, req, req->r_tid, req->r_t.pgid.pool, req->r_t.pgid.seed, 236162306a36Sopenharmony_ci req->r_t.spgid.pgid.pool, req->r_t.spgid.pgid.seed, 236262306a36Sopenharmony_ci req->r_t.spgid.shard, osd->o_osd, req->r_t.epoch, req->r_flags, 236362306a36Sopenharmony_ci req->r_attempts); 236462306a36Sopenharmony_ci 236562306a36Sopenharmony_ci req->r_t.paused = false; 236662306a36Sopenharmony_ci req->r_stamp = jiffies; 236762306a36Sopenharmony_ci req->r_attempts++; 236862306a36Sopenharmony_ci 236962306a36Sopenharmony_ci req->r_sent = osd->o_incarnation; 237062306a36Sopenharmony_ci req->r_request->hdr.tid = cpu_to_le64(req->r_tid); 237162306a36Sopenharmony_ci ceph_con_send(&osd->o_con, ceph_msg_get(req->r_request)); 237262306a36Sopenharmony_ci} 237362306a36Sopenharmony_ci 237462306a36Sopenharmony_cistatic void maybe_request_map(struct ceph_osd_client *osdc) 237562306a36Sopenharmony_ci{ 237662306a36Sopenharmony_ci bool continuous = false; 237762306a36Sopenharmony_ci 237862306a36Sopenharmony_ci verify_osdc_locked(osdc); 237962306a36Sopenharmony_ci WARN_ON(!osdc->osdmap->epoch); 238062306a36Sopenharmony_ci 238162306a36Sopenharmony_ci if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || 238262306a36Sopenharmony_ci ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD) || 238362306a36Sopenharmony_ci ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) { 238462306a36Sopenharmony_ci dout("%s osdc %p continuous\n", __func__, osdc); 238562306a36Sopenharmony_ci continuous = true; 238662306a36Sopenharmony_ci } else { 238762306a36Sopenharmony_ci dout("%s osdc %p onetime\n", __func__, osdc); 238862306a36Sopenharmony_ci } 238962306a36Sopenharmony_ci 239062306a36Sopenharmony_ci if (ceph_monc_want_map(&osdc->client->monc, CEPH_SUB_OSDMAP, 239162306a36Sopenharmony_ci osdc->osdmap->epoch + 1, continuous)) 239262306a36Sopenharmony_ci ceph_monc_renew_subs(&osdc->client->monc); 239362306a36Sopenharmony_ci} 239462306a36Sopenharmony_ci 239562306a36Sopenharmony_cistatic void complete_request(struct ceph_osd_request *req, int err); 239662306a36Sopenharmony_cistatic void send_map_check(struct ceph_osd_request *req); 239762306a36Sopenharmony_ci 239862306a36Sopenharmony_cistatic void __submit_request(struct ceph_osd_request *req, bool wrlocked) 239962306a36Sopenharmony_ci{ 240062306a36Sopenharmony_ci struct ceph_osd_client *osdc = req->r_osdc; 240162306a36Sopenharmony_ci struct ceph_osd *osd; 240262306a36Sopenharmony_ci enum calc_target_result ct_res; 240362306a36Sopenharmony_ci int err = 0; 240462306a36Sopenharmony_ci bool need_send = false; 240562306a36Sopenharmony_ci bool promoted = false; 240662306a36Sopenharmony_ci 240762306a36Sopenharmony_ci WARN_ON(req->r_tid); 240862306a36Sopenharmony_ci dout("%s req %p wrlocked %d\n", __func__, req, wrlocked); 240962306a36Sopenharmony_ci 241062306a36Sopenharmony_ciagain: 241162306a36Sopenharmony_ci ct_res = calc_target(osdc, &req->r_t, false); 241262306a36Sopenharmony_ci if (ct_res == CALC_TARGET_POOL_DNE && !wrlocked) 241362306a36Sopenharmony_ci goto promote; 241462306a36Sopenharmony_ci 241562306a36Sopenharmony_ci osd = lookup_create_osd(osdc, req->r_t.osd, wrlocked); 241662306a36Sopenharmony_ci if (IS_ERR(osd)) { 241762306a36Sopenharmony_ci WARN_ON(PTR_ERR(osd) != -EAGAIN || wrlocked); 241862306a36Sopenharmony_ci goto promote; 241962306a36Sopenharmony_ci } 242062306a36Sopenharmony_ci 242162306a36Sopenharmony_ci if (osdc->abort_err) { 242262306a36Sopenharmony_ci dout("req %p abort_err %d\n", req, osdc->abort_err); 242362306a36Sopenharmony_ci err = osdc->abort_err; 242462306a36Sopenharmony_ci } else if (osdc->osdmap->epoch < osdc->epoch_barrier) { 242562306a36Sopenharmony_ci dout("req %p epoch %u barrier %u\n", req, osdc->osdmap->epoch, 242662306a36Sopenharmony_ci osdc->epoch_barrier); 242762306a36Sopenharmony_ci req->r_t.paused = true; 242862306a36Sopenharmony_ci maybe_request_map(osdc); 242962306a36Sopenharmony_ci } else if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && 243062306a36Sopenharmony_ci ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) { 243162306a36Sopenharmony_ci dout("req %p pausewr\n", req); 243262306a36Sopenharmony_ci req->r_t.paused = true; 243362306a36Sopenharmony_ci maybe_request_map(osdc); 243462306a36Sopenharmony_ci } else if ((req->r_flags & CEPH_OSD_FLAG_READ) && 243562306a36Sopenharmony_ci ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { 243662306a36Sopenharmony_ci dout("req %p pauserd\n", req); 243762306a36Sopenharmony_ci req->r_t.paused = true; 243862306a36Sopenharmony_ci maybe_request_map(osdc); 243962306a36Sopenharmony_ci } else if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && 244062306a36Sopenharmony_ci !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY | 244162306a36Sopenharmony_ci CEPH_OSD_FLAG_FULL_FORCE)) && 244262306a36Sopenharmony_ci (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || 244362306a36Sopenharmony_ci pool_full(osdc, req->r_t.base_oloc.pool))) { 244462306a36Sopenharmony_ci dout("req %p full/pool_full\n", req); 244562306a36Sopenharmony_ci if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) { 244662306a36Sopenharmony_ci err = -ENOSPC; 244762306a36Sopenharmony_ci } else { 244862306a36Sopenharmony_ci if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) 244962306a36Sopenharmony_ci pr_warn_ratelimited("cluster is full (osdmap FULL)\n"); 245062306a36Sopenharmony_ci else 245162306a36Sopenharmony_ci pr_warn_ratelimited("pool %lld is full or reached quota\n", 245262306a36Sopenharmony_ci req->r_t.base_oloc.pool); 245362306a36Sopenharmony_ci req->r_t.paused = true; 245462306a36Sopenharmony_ci maybe_request_map(osdc); 245562306a36Sopenharmony_ci } 245662306a36Sopenharmony_ci } else if (!osd_homeless(osd)) { 245762306a36Sopenharmony_ci need_send = true; 245862306a36Sopenharmony_ci } else { 245962306a36Sopenharmony_ci maybe_request_map(osdc); 246062306a36Sopenharmony_ci } 246162306a36Sopenharmony_ci 246262306a36Sopenharmony_ci mutex_lock(&osd->lock); 246362306a36Sopenharmony_ci /* 246462306a36Sopenharmony_ci * Assign the tid atomically with send_request() to protect 246562306a36Sopenharmony_ci * multiple writes to the same object from racing with each 246662306a36Sopenharmony_ci * other, resulting in out of order ops on the OSDs. 246762306a36Sopenharmony_ci */ 246862306a36Sopenharmony_ci req->r_tid = atomic64_inc_return(&osdc->last_tid); 246962306a36Sopenharmony_ci link_request(osd, req); 247062306a36Sopenharmony_ci if (need_send) 247162306a36Sopenharmony_ci send_request(req); 247262306a36Sopenharmony_ci else if (err) 247362306a36Sopenharmony_ci complete_request(req, err); 247462306a36Sopenharmony_ci mutex_unlock(&osd->lock); 247562306a36Sopenharmony_ci 247662306a36Sopenharmony_ci if (!err && ct_res == CALC_TARGET_POOL_DNE) 247762306a36Sopenharmony_ci send_map_check(req); 247862306a36Sopenharmony_ci 247962306a36Sopenharmony_ci if (promoted) 248062306a36Sopenharmony_ci downgrade_write(&osdc->lock); 248162306a36Sopenharmony_ci return; 248262306a36Sopenharmony_ci 248362306a36Sopenharmony_cipromote: 248462306a36Sopenharmony_ci up_read(&osdc->lock); 248562306a36Sopenharmony_ci down_write(&osdc->lock); 248662306a36Sopenharmony_ci wrlocked = true; 248762306a36Sopenharmony_ci promoted = true; 248862306a36Sopenharmony_ci goto again; 248962306a36Sopenharmony_ci} 249062306a36Sopenharmony_ci 249162306a36Sopenharmony_cistatic void account_request(struct ceph_osd_request *req) 249262306a36Sopenharmony_ci{ 249362306a36Sopenharmony_ci WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK)); 249462306a36Sopenharmony_ci WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE))); 249562306a36Sopenharmony_ci 249662306a36Sopenharmony_ci req->r_flags |= CEPH_OSD_FLAG_ONDISK; 249762306a36Sopenharmony_ci atomic_inc(&req->r_osdc->num_requests); 249862306a36Sopenharmony_ci 249962306a36Sopenharmony_ci req->r_start_stamp = jiffies; 250062306a36Sopenharmony_ci req->r_start_latency = ktime_get(); 250162306a36Sopenharmony_ci} 250262306a36Sopenharmony_ci 250362306a36Sopenharmony_cistatic void submit_request(struct ceph_osd_request *req, bool wrlocked) 250462306a36Sopenharmony_ci{ 250562306a36Sopenharmony_ci ceph_osdc_get_request(req); 250662306a36Sopenharmony_ci account_request(req); 250762306a36Sopenharmony_ci __submit_request(req, wrlocked); 250862306a36Sopenharmony_ci} 250962306a36Sopenharmony_ci 251062306a36Sopenharmony_cistatic void finish_request(struct ceph_osd_request *req) 251162306a36Sopenharmony_ci{ 251262306a36Sopenharmony_ci struct ceph_osd_client *osdc = req->r_osdc; 251362306a36Sopenharmony_ci 251462306a36Sopenharmony_ci WARN_ON(lookup_request_mc(&osdc->map_checks, req->r_tid)); 251562306a36Sopenharmony_ci dout("%s req %p tid %llu\n", __func__, req, req->r_tid); 251662306a36Sopenharmony_ci 251762306a36Sopenharmony_ci req->r_end_latency = ktime_get(); 251862306a36Sopenharmony_ci 251962306a36Sopenharmony_ci if (req->r_osd) { 252062306a36Sopenharmony_ci ceph_init_sparse_read(&req->r_osd->o_sparse_read); 252162306a36Sopenharmony_ci unlink_request(req->r_osd, req); 252262306a36Sopenharmony_ci } 252362306a36Sopenharmony_ci atomic_dec(&osdc->num_requests); 252462306a36Sopenharmony_ci 252562306a36Sopenharmony_ci /* 252662306a36Sopenharmony_ci * If an OSD has failed or returned and a request has been sent 252762306a36Sopenharmony_ci * twice, it's possible to get a reply and end up here while the 252862306a36Sopenharmony_ci * request message is queued for delivery. We will ignore the 252962306a36Sopenharmony_ci * reply, so not a big deal, but better to try and catch it. 253062306a36Sopenharmony_ci */ 253162306a36Sopenharmony_ci ceph_msg_revoke(req->r_request); 253262306a36Sopenharmony_ci ceph_msg_revoke_incoming(req->r_reply); 253362306a36Sopenharmony_ci} 253462306a36Sopenharmony_ci 253562306a36Sopenharmony_cistatic void __complete_request(struct ceph_osd_request *req) 253662306a36Sopenharmony_ci{ 253762306a36Sopenharmony_ci dout("%s req %p tid %llu cb %ps result %d\n", __func__, req, 253862306a36Sopenharmony_ci req->r_tid, req->r_callback, req->r_result); 253962306a36Sopenharmony_ci 254062306a36Sopenharmony_ci if (req->r_callback) 254162306a36Sopenharmony_ci req->r_callback(req); 254262306a36Sopenharmony_ci complete_all(&req->r_completion); 254362306a36Sopenharmony_ci ceph_osdc_put_request(req); 254462306a36Sopenharmony_ci} 254562306a36Sopenharmony_ci 254662306a36Sopenharmony_cistatic void complete_request_workfn(struct work_struct *work) 254762306a36Sopenharmony_ci{ 254862306a36Sopenharmony_ci struct ceph_osd_request *req = 254962306a36Sopenharmony_ci container_of(work, struct ceph_osd_request, r_complete_work); 255062306a36Sopenharmony_ci 255162306a36Sopenharmony_ci __complete_request(req); 255262306a36Sopenharmony_ci} 255362306a36Sopenharmony_ci 255462306a36Sopenharmony_ci/* 255562306a36Sopenharmony_ci * This is open-coded in handle_reply(). 255662306a36Sopenharmony_ci */ 255762306a36Sopenharmony_cistatic void complete_request(struct ceph_osd_request *req, int err) 255862306a36Sopenharmony_ci{ 255962306a36Sopenharmony_ci dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err); 256062306a36Sopenharmony_ci 256162306a36Sopenharmony_ci req->r_result = err; 256262306a36Sopenharmony_ci finish_request(req); 256362306a36Sopenharmony_ci 256462306a36Sopenharmony_ci INIT_WORK(&req->r_complete_work, complete_request_workfn); 256562306a36Sopenharmony_ci queue_work(req->r_osdc->completion_wq, &req->r_complete_work); 256662306a36Sopenharmony_ci} 256762306a36Sopenharmony_ci 256862306a36Sopenharmony_cistatic void cancel_map_check(struct ceph_osd_request *req) 256962306a36Sopenharmony_ci{ 257062306a36Sopenharmony_ci struct ceph_osd_client *osdc = req->r_osdc; 257162306a36Sopenharmony_ci struct ceph_osd_request *lookup_req; 257262306a36Sopenharmony_ci 257362306a36Sopenharmony_ci verify_osdc_wrlocked(osdc); 257462306a36Sopenharmony_ci 257562306a36Sopenharmony_ci lookup_req = lookup_request_mc(&osdc->map_checks, req->r_tid); 257662306a36Sopenharmony_ci if (!lookup_req) 257762306a36Sopenharmony_ci return; 257862306a36Sopenharmony_ci 257962306a36Sopenharmony_ci WARN_ON(lookup_req != req); 258062306a36Sopenharmony_ci erase_request_mc(&osdc->map_checks, req); 258162306a36Sopenharmony_ci ceph_osdc_put_request(req); 258262306a36Sopenharmony_ci} 258362306a36Sopenharmony_ci 258462306a36Sopenharmony_cistatic void cancel_request(struct ceph_osd_request *req) 258562306a36Sopenharmony_ci{ 258662306a36Sopenharmony_ci dout("%s req %p tid %llu\n", __func__, req, req->r_tid); 258762306a36Sopenharmony_ci 258862306a36Sopenharmony_ci cancel_map_check(req); 258962306a36Sopenharmony_ci finish_request(req); 259062306a36Sopenharmony_ci complete_all(&req->r_completion); 259162306a36Sopenharmony_ci ceph_osdc_put_request(req); 259262306a36Sopenharmony_ci} 259362306a36Sopenharmony_ci 259462306a36Sopenharmony_cistatic void abort_request(struct ceph_osd_request *req, int err) 259562306a36Sopenharmony_ci{ 259662306a36Sopenharmony_ci dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err); 259762306a36Sopenharmony_ci 259862306a36Sopenharmony_ci cancel_map_check(req); 259962306a36Sopenharmony_ci complete_request(req, err); 260062306a36Sopenharmony_ci} 260162306a36Sopenharmony_ci 260262306a36Sopenharmony_cistatic int abort_fn(struct ceph_osd_request *req, void *arg) 260362306a36Sopenharmony_ci{ 260462306a36Sopenharmony_ci int err = *(int *)arg; 260562306a36Sopenharmony_ci 260662306a36Sopenharmony_ci abort_request(req, err); 260762306a36Sopenharmony_ci return 0; /* continue iteration */ 260862306a36Sopenharmony_ci} 260962306a36Sopenharmony_ci 261062306a36Sopenharmony_ci/* 261162306a36Sopenharmony_ci * Abort all in-flight requests with @err and arrange for all future 261262306a36Sopenharmony_ci * requests to be failed immediately. 261362306a36Sopenharmony_ci */ 261462306a36Sopenharmony_civoid ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err) 261562306a36Sopenharmony_ci{ 261662306a36Sopenharmony_ci dout("%s osdc %p err %d\n", __func__, osdc, err); 261762306a36Sopenharmony_ci down_write(&osdc->lock); 261862306a36Sopenharmony_ci for_each_request(osdc, abort_fn, &err); 261962306a36Sopenharmony_ci osdc->abort_err = err; 262062306a36Sopenharmony_ci up_write(&osdc->lock); 262162306a36Sopenharmony_ci} 262262306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_abort_requests); 262362306a36Sopenharmony_ci 262462306a36Sopenharmony_civoid ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc) 262562306a36Sopenharmony_ci{ 262662306a36Sopenharmony_ci down_write(&osdc->lock); 262762306a36Sopenharmony_ci osdc->abort_err = 0; 262862306a36Sopenharmony_ci up_write(&osdc->lock); 262962306a36Sopenharmony_ci} 263062306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_clear_abort_err); 263162306a36Sopenharmony_ci 263262306a36Sopenharmony_cistatic void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb) 263362306a36Sopenharmony_ci{ 263462306a36Sopenharmony_ci if (likely(eb > osdc->epoch_barrier)) { 263562306a36Sopenharmony_ci dout("updating epoch_barrier from %u to %u\n", 263662306a36Sopenharmony_ci osdc->epoch_barrier, eb); 263762306a36Sopenharmony_ci osdc->epoch_barrier = eb; 263862306a36Sopenharmony_ci /* Request map if we're not to the barrier yet */ 263962306a36Sopenharmony_ci if (eb > osdc->osdmap->epoch) 264062306a36Sopenharmony_ci maybe_request_map(osdc); 264162306a36Sopenharmony_ci } 264262306a36Sopenharmony_ci} 264362306a36Sopenharmony_ci 264462306a36Sopenharmony_civoid ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb) 264562306a36Sopenharmony_ci{ 264662306a36Sopenharmony_ci down_read(&osdc->lock); 264762306a36Sopenharmony_ci if (unlikely(eb > osdc->epoch_barrier)) { 264862306a36Sopenharmony_ci up_read(&osdc->lock); 264962306a36Sopenharmony_ci down_write(&osdc->lock); 265062306a36Sopenharmony_ci update_epoch_barrier(osdc, eb); 265162306a36Sopenharmony_ci up_write(&osdc->lock); 265262306a36Sopenharmony_ci } else { 265362306a36Sopenharmony_ci up_read(&osdc->lock); 265462306a36Sopenharmony_ci } 265562306a36Sopenharmony_ci} 265662306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_update_epoch_barrier); 265762306a36Sopenharmony_ci 265862306a36Sopenharmony_ci/* 265962306a36Sopenharmony_ci * We can end up releasing caps as a result of abort_request(). 266062306a36Sopenharmony_ci * In that case, we probably want to ensure that the cap release message 266162306a36Sopenharmony_ci * has an updated epoch barrier in it, so set the epoch barrier prior to 266262306a36Sopenharmony_ci * aborting the first request. 266362306a36Sopenharmony_ci */ 266462306a36Sopenharmony_cistatic int abort_on_full_fn(struct ceph_osd_request *req, void *arg) 266562306a36Sopenharmony_ci{ 266662306a36Sopenharmony_ci struct ceph_osd_client *osdc = req->r_osdc; 266762306a36Sopenharmony_ci bool *victims = arg; 266862306a36Sopenharmony_ci 266962306a36Sopenharmony_ci if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && 267062306a36Sopenharmony_ci (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || 267162306a36Sopenharmony_ci pool_full(osdc, req->r_t.base_oloc.pool))) { 267262306a36Sopenharmony_ci if (!*victims) { 267362306a36Sopenharmony_ci update_epoch_barrier(osdc, osdc->osdmap->epoch); 267462306a36Sopenharmony_ci *victims = true; 267562306a36Sopenharmony_ci } 267662306a36Sopenharmony_ci abort_request(req, -ENOSPC); 267762306a36Sopenharmony_ci } 267862306a36Sopenharmony_ci 267962306a36Sopenharmony_ci return 0; /* continue iteration */ 268062306a36Sopenharmony_ci} 268162306a36Sopenharmony_ci 268262306a36Sopenharmony_ci/* 268362306a36Sopenharmony_ci * Drop all pending requests that are stalled waiting on a full condition to 268462306a36Sopenharmony_ci * clear, and complete them with ENOSPC as the return code. Set the 268562306a36Sopenharmony_ci * osdc->epoch_barrier to the latest map epoch that we've seen if any were 268662306a36Sopenharmony_ci * cancelled. 268762306a36Sopenharmony_ci */ 268862306a36Sopenharmony_cistatic void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc) 268962306a36Sopenharmony_ci{ 269062306a36Sopenharmony_ci bool victims = false; 269162306a36Sopenharmony_ci 269262306a36Sopenharmony_ci if (ceph_test_opt(osdc->client, ABORT_ON_FULL) && 269362306a36Sopenharmony_ci (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc))) 269462306a36Sopenharmony_ci for_each_request(osdc, abort_on_full_fn, &victims); 269562306a36Sopenharmony_ci} 269662306a36Sopenharmony_ci 269762306a36Sopenharmony_cistatic void check_pool_dne(struct ceph_osd_request *req) 269862306a36Sopenharmony_ci{ 269962306a36Sopenharmony_ci struct ceph_osd_client *osdc = req->r_osdc; 270062306a36Sopenharmony_ci struct ceph_osdmap *map = osdc->osdmap; 270162306a36Sopenharmony_ci 270262306a36Sopenharmony_ci verify_osdc_wrlocked(osdc); 270362306a36Sopenharmony_ci WARN_ON(!map->epoch); 270462306a36Sopenharmony_ci 270562306a36Sopenharmony_ci if (req->r_attempts) { 270662306a36Sopenharmony_ci /* 270762306a36Sopenharmony_ci * We sent a request earlier, which means that 270862306a36Sopenharmony_ci * previously the pool existed, and now it does not 270962306a36Sopenharmony_ci * (i.e., it was deleted). 271062306a36Sopenharmony_ci */ 271162306a36Sopenharmony_ci req->r_map_dne_bound = map->epoch; 271262306a36Sopenharmony_ci dout("%s req %p tid %llu pool disappeared\n", __func__, req, 271362306a36Sopenharmony_ci req->r_tid); 271462306a36Sopenharmony_ci } else { 271562306a36Sopenharmony_ci dout("%s req %p tid %llu map_dne_bound %u have %u\n", __func__, 271662306a36Sopenharmony_ci req, req->r_tid, req->r_map_dne_bound, map->epoch); 271762306a36Sopenharmony_ci } 271862306a36Sopenharmony_ci 271962306a36Sopenharmony_ci if (req->r_map_dne_bound) { 272062306a36Sopenharmony_ci if (map->epoch >= req->r_map_dne_bound) { 272162306a36Sopenharmony_ci /* we had a new enough map */ 272262306a36Sopenharmony_ci pr_info_ratelimited("tid %llu pool does not exist\n", 272362306a36Sopenharmony_ci req->r_tid); 272462306a36Sopenharmony_ci complete_request(req, -ENOENT); 272562306a36Sopenharmony_ci } 272662306a36Sopenharmony_ci } else { 272762306a36Sopenharmony_ci send_map_check(req); 272862306a36Sopenharmony_ci } 272962306a36Sopenharmony_ci} 273062306a36Sopenharmony_ci 273162306a36Sopenharmony_cistatic void map_check_cb(struct ceph_mon_generic_request *greq) 273262306a36Sopenharmony_ci{ 273362306a36Sopenharmony_ci struct ceph_osd_client *osdc = &greq->monc->client->osdc; 273462306a36Sopenharmony_ci struct ceph_osd_request *req; 273562306a36Sopenharmony_ci u64 tid = greq->private_data; 273662306a36Sopenharmony_ci 273762306a36Sopenharmony_ci WARN_ON(greq->result || !greq->u.newest); 273862306a36Sopenharmony_ci 273962306a36Sopenharmony_ci down_write(&osdc->lock); 274062306a36Sopenharmony_ci req = lookup_request_mc(&osdc->map_checks, tid); 274162306a36Sopenharmony_ci if (!req) { 274262306a36Sopenharmony_ci dout("%s tid %llu dne\n", __func__, tid); 274362306a36Sopenharmony_ci goto out_unlock; 274462306a36Sopenharmony_ci } 274562306a36Sopenharmony_ci 274662306a36Sopenharmony_ci dout("%s req %p tid %llu map_dne_bound %u newest %llu\n", __func__, 274762306a36Sopenharmony_ci req, req->r_tid, req->r_map_dne_bound, greq->u.newest); 274862306a36Sopenharmony_ci if (!req->r_map_dne_bound) 274962306a36Sopenharmony_ci req->r_map_dne_bound = greq->u.newest; 275062306a36Sopenharmony_ci erase_request_mc(&osdc->map_checks, req); 275162306a36Sopenharmony_ci check_pool_dne(req); 275262306a36Sopenharmony_ci 275362306a36Sopenharmony_ci ceph_osdc_put_request(req); 275462306a36Sopenharmony_ciout_unlock: 275562306a36Sopenharmony_ci up_write(&osdc->lock); 275662306a36Sopenharmony_ci} 275762306a36Sopenharmony_ci 275862306a36Sopenharmony_cistatic void send_map_check(struct ceph_osd_request *req) 275962306a36Sopenharmony_ci{ 276062306a36Sopenharmony_ci struct ceph_osd_client *osdc = req->r_osdc; 276162306a36Sopenharmony_ci struct ceph_osd_request *lookup_req; 276262306a36Sopenharmony_ci int ret; 276362306a36Sopenharmony_ci 276462306a36Sopenharmony_ci verify_osdc_wrlocked(osdc); 276562306a36Sopenharmony_ci 276662306a36Sopenharmony_ci lookup_req = lookup_request_mc(&osdc->map_checks, req->r_tid); 276762306a36Sopenharmony_ci if (lookup_req) { 276862306a36Sopenharmony_ci WARN_ON(lookup_req != req); 276962306a36Sopenharmony_ci return; 277062306a36Sopenharmony_ci } 277162306a36Sopenharmony_ci 277262306a36Sopenharmony_ci ceph_osdc_get_request(req); 277362306a36Sopenharmony_ci insert_request_mc(&osdc->map_checks, req); 277462306a36Sopenharmony_ci ret = ceph_monc_get_version_async(&osdc->client->monc, "osdmap", 277562306a36Sopenharmony_ci map_check_cb, req->r_tid); 277662306a36Sopenharmony_ci WARN_ON(ret); 277762306a36Sopenharmony_ci} 277862306a36Sopenharmony_ci 277962306a36Sopenharmony_ci/* 278062306a36Sopenharmony_ci * lingering requests, watch/notify v2 infrastructure 278162306a36Sopenharmony_ci */ 278262306a36Sopenharmony_cistatic void linger_release(struct kref *kref) 278362306a36Sopenharmony_ci{ 278462306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = 278562306a36Sopenharmony_ci container_of(kref, struct ceph_osd_linger_request, kref); 278662306a36Sopenharmony_ci 278762306a36Sopenharmony_ci dout("%s lreq %p reg_req %p ping_req %p\n", __func__, lreq, 278862306a36Sopenharmony_ci lreq->reg_req, lreq->ping_req); 278962306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_NODE(&lreq->node)); 279062306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_NODE(&lreq->osdc_node)); 279162306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_NODE(&lreq->mc_node)); 279262306a36Sopenharmony_ci WARN_ON(!list_empty(&lreq->scan_item)); 279362306a36Sopenharmony_ci WARN_ON(!list_empty(&lreq->pending_lworks)); 279462306a36Sopenharmony_ci WARN_ON(lreq->osd); 279562306a36Sopenharmony_ci 279662306a36Sopenharmony_ci if (lreq->request_pl) 279762306a36Sopenharmony_ci ceph_pagelist_release(lreq->request_pl); 279862306a36Sopenharmony_ci if (lreq->notify_id_pages) 279962306a36Sopenharmony_ci ceph_release_page_vector(lreq->notify_id_pages, 1); 280062306a36Sopenharmony_ci 280162306a36Sopenharmony_ci ceph_osdc_put_request(lreq->reg_req); 280262306a36Sopenharmony_ci ceph_osdc_put_request(lreq->ping_req); 280362306a36Sopenharmony_ci target_destroy(&lreq->t); 280462306a36Sopenharmony_ci kfree(lreq); 280562306a36Sopenharmony_ci} 280662306a36Sopenharmony_ci 280762306a36Sopenharmony_cistatic void linger_put(struct ceph_osd_linger_request *lreq) 280862306a36Sopenharmony_ci{ 280962306a36Sopenharmony_ci if (lreq) 281062306a36Sopenharmony_ci kref_put(&lreq->kref, linger_release); 281162306a36Sopenharmony_ci} 281262306a36Sopenharmony_ci 281362306a36Sopenharmony_cistatic struct ceph_osd_linger_request * 281462306a36Sopenharmony_cilinger_get(struct ceph_osd_linger_request *lreq) 281562306a36Sopenharmony_ci{ 281662306a36Sopenharmony_ci kref_get(&lreq->kref); 281762306a36Sopenharmony_ci return lreq; 281862306a36Sopenharmony_ci} 281962306a36Sopenharmony_ci 282062306a36Sopenharmony_cistatic struct ceph_osd_linger_request * 282162306a36Sopenharmony_cilinger_alloc(struct ceph_osd_client *osdc) 282262306a36Sopenharmony_ci{ 282362306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq; 282462306a36Sopenharmony_ci 282562306a36Sopenharmony_ci lreq = kzalloc(sizeof(*lreq), GFP_NOIO); 282662306a36Sopenharmony_ci if (!lreq) 282762306a36Sopenharmony_ci return NULL; 282862306a36Sopenharmony_ci 282962306a36Sopenharmony_ci kref_init(&lreq->kref); 283062306a36Sopenharmony_ci mutex_init(&lreq->lock); 283162306a36Sopenharmony_ci RB_CLEAR_NODE(&lreq->node); 283262306a36Sopenharmony_ci RB_CLEAR_NODE(&lreq->osdc_node); 283362306a36Sopenharmony_ci RB_CLEAR_NODE(&lreq->mc_node); 283462306a36Sopenharmony_ci INIT_LIST_HEAD(&lreq->scan_item); 283562306a36Sopenharmony_ci INIT_LIST_HEAD(&lreq->pending_lworks); 283662306a36Sopenharmony_ci init_completion(&lreq->reg_commit_wait); 283762306a36Sopenharmony_ci init_completion(&lreq->notify_finish_wait); 283862306a36Sopenharmony_ci 283962306a36Sopenharmony_ci lreq->osdc = osdc; 284062306a36Sopenharmony_ci target_init(&lreq->t); 284162306a36Sopenharmony_ci 284262306a36Sopenharmony_ci dout("%s lreq %p\n", __func__, lreq); 284362306a36Sopenharmony_ci return lreq; 284462306a36Sopenharmony_ci} 284562306a36Sopenharmony_ci 284662306a36Sopenharmony_ciDEFINE_RB_INSDEL_FUNCS(linger, struct ceph_osd_linger_request, linger_id, node) 284762306a36Sopenharmony_ciDEFINE_RB_FUNCS(linger_osdc, struct ceph_osd_linger_request, linger_id, osdc_node) 284862306a36Sopenharmony_ciDEFINE_RB_FUNCS(linger_mc, struct ceph_osd_linger_request, linger_id, mc_node) 284962306a36Sopenharmony_ci 285062306a36Sopenharmony_ci/* 285162306a36Sopenharmony_ci * Create linger request <-> OSD session relation. 285262306a36Sopenharmony_ci * 285362306a36Sopenharmony_ci * @lreq has to be registered, @osd may be homeless. 285462306a36Sopenharmony_ci */ 285562306a36Sopenharmony_cistatic void link_linger(struct ceph_osd *osd, 285662306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq) 285762306a36Sopenharmony_ci{ 285862306a36Sopenharmony_ci verify_osd_locked(osd); 285962306a36Sopenharmony_ci WARN_ON(!lreq->linger_id || lreq->osd); 286062306a36Sopenharmony_ci dout("%s osd %p osd%d lreq %p linger_id %llu\n", __func__, osd, 286162306a36Sopenharmony_ci osd->o_osd, lreq, lreq->linger_id); 286262306a36Sopenharmony_ci 286362306a36Sopenharmony_ci if (!osd_homeless(osd)) 286462306a36Sopenharmony_ci __remove_osd_from_lru(osd); 286562306a36Sopenharmony_ci else 286662306a36Sopenharmony_ci atomic_inc(&osd->o_osdc->num_homeless); 286762306a36Sopenharmony_ci 286862306a36Sopenharmony_ci get_osd(osd); 286962306a36Sopenharmony_ci insert_linger(&osd->o_linger_requests, lreq); 287062306a36Sopenharmony_ci lreq->osd = osd; 287162306a36Sopenharmony_ci} 287262306a36Sopenharmony_ci 287362306a36Sopenharmony_cistatic void unlink_linger(struct ceph_osd *osd, 287462306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq) 287562306a36Sopenharmony_ci{ 287662306a36Sopenharmony_ci verify_osd_locked(osd); 287762306a36Sopenharmony_ci WARN_ON(lreq->osd != osd); 287862306a36Sopenharmony_ci dout("%s osd %p osd%d lreq %p linger_id %llu\n", __func__, osd, 287962306a36Sopenharmony_ci osd->o_osd, lreq, lreq->linger_id); 288062306a36Sopenharmony_ci 288162306a36Sopenharmony_ci lreq->osd = NULL; 288262306a36Sopenharmony_ci erase_linger(&osd->o_linger_requests, lreq); 288362306a36Sopenharmony_ci put_osd(osd); 288462306a36Sopenharmony_ci 288562306a36Sopenharmony_ci if (!osd_homeless(osd)) 288662306a36Sopenharmony_ci maybe_move_osd_to_lru(osd); 288762306a36Sopenharmony_ci else 288862306a36Sopenharmony_ci atomic_dec(&osd->o_osdc->num_homeless); 288962306a36Sopenharmony_ci} 289062306a36Sopenharmony_ci 289162306a36Sopenharmony_cistatic bool __linger_registered(struct ceph_osd_linger_request *lreq) 289262306a36Sopenharmony_ci{ 289362306a36Sopenharmony_ci verify_osdc_locked(lreq->osdc); 289462306a36Sopenharmony_ci 289562306a36Sopenharmony_ci return !RB_EMPTY_NODE(&lreq->osdc_node); 289662306a36Sopenharmony_ci} 289762306a36Sopenharmony_ci 289862306a36Sopenharmony_cistatic bool linger_registered(struct ceph_osd_linger_request *lreq) 289962306a36Sopenharmony_ci{ 290062306a36Sopenharmony_ci struct ceph_osd_client *osdc = lreq->osdc; 290162306a36Sopenharmony_ci bool registered; 290262306a36Sopenharmony_ci 290362306a36Sopenharmony_ci down_read(&osdc->lock); 290462306a36Sopenharmony_ci registered = __linger_registered(lreq); 290562306a36Sopenharmony_ci up_read(&osdc->lock); 290662306a36Sopenharmony_ci 290762306a36Sopenharmony_ci return registered; 290862306a36Sopenharmony_ci} 290962306a36Sopenharmony_ci 291062306a36Sopenharmony_cistatic void linger_register(struct ceph_osd_linger_request *lreq) 291162306a36Sopenharmony_ci{ 291262306a36Sopenharmony_ci struct ceph_osd_client *osdc = lreq->osdc; 291362306a36Sopenharmony_ci 291462306a36Sopenharmony_ci verify_osdc_wrlocked(osdc); 291562306a36Sopenharmony_ci WARN_ON(lreq->linger_id); 291662306a36Sopenharmony_ci 291762306a36Sopenharmony_ci linger_get(lreq); 291862306a36Sopenharmony_ci lreq->linger_id = ++osdc->last_linger_id; 291962306a36Sopenharmony_ci insert_linger_osdc(&osdc->linger_requests, lreq); 292062306a36Sopenharmony_ci} 292162306a36Sopenharmony_ci 292262306a36Sopenharmony_cistatic void linger_unregister(struct ceph_osd_linger_request *lreq) 292362306a36Sopenharmony_ci{ 292462306a36Sopenharmony_ci struct ceph_osd_client *osdc = lreq->osdc; 292562306a36Sopenharmony_ci 292662306a36Sopenharmony_ci verify_osdc_wrlocked(osdc); 292762306a36Sopenharmony_ci 292862306a36Sopenharmony_ci erase_linger_osdc(&osdc->linger_requests, lreq); 292962306a36Sopenharmony_ci linger_put(lreq); 293062306a36Sopenharmony_ci} 293162306a36Sopenharmony_ci 293262306a36Sopenharmony_cistatic void cancel_linger_request(struct ceph_osd_request *req) 293362306a36Sopenharmony_ci{ 293462306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = req->r_priv; 293562306a36Sopenharmony_ci 293662306a36Sopenharmony_ci WARN_ON(!req->r_linger); 293762306a36Sopenharmony_ci cancel_request(req); 293862306a36Sopenharmony_ci linger_put(lreq); 293962306a36Sopenharmony_ci} 294062306a36Sopenharmony_ci 294162306a36Sopenharmony_cistruct linger_work { 294262306a36Sopenharmony_ci struct work_struct work; 294362306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq; 294462306a36Sopenharmony_ci struct list_head pending_item; 294562306a36Sopenharmony_ci unsigned long queued_stamp; 294662306a36Sopenharmony_ci 294762306a36Sopenharmony_ci union { 294862306a36Sopenharmony_ci struct { 294962306a36Sopenharmony_ci u64 notify_id; 295062306a36Sopenharmony_ci u64 notifier_id; 295162306a36Sopenharmony_ci void *payload; /* points into @msg front */ 295262306a36Sopenharmony_ci size_t payload_len; 295362306a36Sopenharmony_ci 295462306a36Sopenharmony_ci struct ceph_msg *msg; /* for ceph_msg_put() */ 295562306a36Sopenharmony_ci } notify; 295662306a36Sopenharmony_ci struct { 295762306a36Sopenharmony_ci int err; 295862306a36Sopenharmony_ci } error; 295962306a36Sopenharmony_ci }; 296062306a36Sopenharmony_ci}; 296162306a36Sopenharmony_ci 296262306a36Sopenharmony_cistatic struct linger_work *lwork_alloc(struct ceph_osd_linger_request *lreq, 296362306a36Sopenharmony_ci work_func_t workfn) 296462306a36Sopenharmony_ci{ 296562306a36Sopenharmony_ci struct linger_work *lwork; 296662306a36Sopenharmony_ci 296762306a36Sopenharmony_ci lwork = kzalloc(sizeof(*lwork), GFP_NOIO); 296862306a36Sopenharmony_ci if (!lwork) 296962306a36Sopenharmony_ci return NULL; 297062306a36Sopenharmony_ci 297162306a36Sopenharmony_ci INIT_WORK(&lwork->work, workfn); 297262306a36Sopenharmony_ci INIT_LIST_HEAD(&lwork->pending_item); 297362306a36Sopenharmony_ci lwork->lreq = linger_get(lreq); 297462306a36Sopenharmony_ci 297562306a36Sopenharmony_ci return lwork; 297662306a36Sopenharmony_ci} 297762306a36Sopenharmony_ci 297862306a36Sopenharmony_cistatic void lwork_free(struct linger_work *lwork) 297962306a36Sopenharmony_ci{ 298062306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = lwork->lreq; 298162306a36Sopenharmony_ci 298262306a36Sopenharmony_ci mutex_lock(&lreq->lock); 298362306a36Sopenharmony_ci list_del(&lwork->pending_item); 298462306a36Sopenharmony_ci mutex_unlock(&lreq->lock); 298562306a36Sopenharmony_ci 298662306a36Sopenharmony_ci linger_put(lreq); 298762306a36Sopenharmony_ci kfree(lwork); 298862306a36Sopenharmony_ci} 298962306a36Sopenharmony_ci 299062306a36Sopenharmony_cistatic void lwork_queue(struct linger_work *lwork) 299162306a36Sopenharmony_ci{ 299262306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = lwork->lreq; 299362306a36Sopenharmony_ci struct ceph_osd_client *osdc = lreq->osdc; 299462306a36Sopenharmony_ci 299562306a36Sopenharmony_ci verify_lreq_locked(lreq); 299662306a36Sopenharmony_ci WARN_ON(!list_empty(&lwork->pending_item)); 299762306a36Sopenharmony_ci 299862306a36Sopenharmony_ci lwork->queued_stamp = jiffies; 299962306a36Sopenharmony_ci list_add_tail(&lwork->pending_item, &lreq->pending_lworks); 300062306a36Sopenharmony_ci queue_work(osdc->notify_wq, &lwork->work); 300162306a36Sopenharmony_ci} 300262306a36Sopenharmony_ci 300362306a36Sopenharmony_cistatic void do_watch_notify(struct work_struct *w) 300462306a36Sopenharmony_ci{ 300562306a36Sopenharmony_ci struct linger_work *lwork = container_of(w, struct linger_work, work); 300662306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = lwork->lreq; 300762306a36Sopenharmony_ci 300862306a36Sopenharmony_ci if (!linger_registered(lreq)) { 300962306a36Sopenharmony_ci dout("%s lreq %p not registered\n", __func__, lreq); 301062306a36Sopenharmony_ci goto out; 301162306a36Sopenharmony_ci } 301262306a36Sopenharmony_ci 301362306a36Sopenharmony_ci WARN_ON(!lreq->is_watch); 301462306a36Sopenharmony_ci dout("%s lreq %p notify_id %llu notifier_id %llu payload_len %zu\n", 301562306a36Sopenharmony_ci __func__, lreq, lwork->notify.notify_id, lwork->notify.notifier_id, 301662306a36Sopenharmony_ci lwork->notify.payload_len); 301762306a36Sopenharmony_ci lreq->wcb(lreq->data, lwork->notify.notify_id, lreq->linger_id, 301862306a36Sopenharmony_ci lwork->notify.notifier_id, lwork->notify.payload, 301962306a36Sopenharmony_ci lwork->notify.payload_len); 302062306a36Sopenharmony_ci 302162306a36Sopenharmony_ciout: 302262306a36Sopenharmony_ci ceph_msg_put(lwork->notify.msg); 302362306a36Sopenharmony_ci lwork_free(lwork); 302462306a36Sopenharmony_ci} 302562306a36Sopenharmony_ci 302662306a36Sopenharmony_cistatic void do_watch_error(struct work_struct *w) 302762306a36Sopenharmony_ci{ 302862306a36Sopenharmony_ci struct linger_work *lwork = container_of(w, struct linger_work, work); 302962306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = lwork->lreq; 303062306a36Sopenharmony_ci 303162306a36Sopenharmony_ci if (!linger_registered(lreq)) { 303262306a36Sopenharmony_ci dout("%s lreq %p not registered\n", __func__, lreq); 303362306a36Sopenharmony_ci goto out; 303462306a36Sopenharmony_ci } 303562306a36Sopenharmony_ci 303662306a36Sopenharmony_ci dout("%s lreq %p err %d\n", __func__, lreq, lwork->error.err); 303762306a36Sopenharmony_ci lreq->errcb(lreq->data, lreq->linger_id, lwork->error.err); 303862306a36Sopenharmony_ci 303962306a36Sopenharmony_ciout: 304062306a36Sopenharmony_ci lwork_free(lwork); 304162306a36Sopenharmony_ci} 304262306a36Sopenharmony_ci 304362306a36Sopenharmony_cistatic void queue_watch_error(struct ceph_osd_linger_request *lreq) 304462306a36Sopenharmony_ci{ 304562306a36Sopenharmony_ci struct linger_work *lwork; 304662306a36Sopenharmony_ci 304762306a36Sopenharmony_ci lwork = lwork_alloc(lreq, do_watch_error); 304862306a36Sopenharmony_ci if (!lwork) { 304962306a36Sopenharmony_ci pr_err("failed to allocate error-lwork\n"); 305062306a36Sopenharmony_ci return; 305162306a36Sopenharmony_ci } 305262306a36Sopenharmony_ci 305362306a36Sopenharmony_ci lwork->error.err = lreq->last_error; 305462306a36Sopenharmony_ci lwork_queue(lwork); 305562306a36Sopenharmony_ci} 305662306a36Sopenharmony_ci 305762306a36Sopenharmony_cistatic void linger_reg_commit_complete(struct ceph_osd_linger_request *lreq, 305862306a36Sopenharmony_ci int result) 305962306a36Sopenharmony_ci{ 306062306a36Sopenharmony_ci if (!completion_done(&lreq->reg_commit_wait)) { 306162306a36Sopenharmony_ci lreq->reg_commit_error = (result <= 0 ? result : 0); 306262306a36Sopenharmony_ci complete_all(&lreq->reg_commit_wait); 306362306a36Sopenharmony_ci } 306462306a36Sopenharmony_ci} 306562306a36Sopenharmony_ci 306662306a36Sopenharmony_cistatic void linger_commit_cb(struct ceph_osd_request *req) 306762306a36Sopenharmony_ci{ 306862306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = req->r_priv; 306962306a36Sopenharmony_ci 307062306a36Sopenharmony_ci mutex_lock(&lreq->lock); 307162306a36Sopenharmony_ci if (req != lreq->reg_req) { 307262306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", 307362306a36Sopenharmony_ci __func__, lreq, lreq->linger_id, req, lreq->reg_req); 307462306a36Sopenharmony_ci goto out; 307562306a36Sopenharmony_ci } 307662306a36Sopenharmony_ci 307762306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq, 307862306a36Sopenharmony_ci lreq->linger_id, req->r_result); 307962306a36Sopenharmony_ci linger_reg_commit_complete(lreq, req->r_result); 308062306a36Sopenharmony_ci lreq->committed = true; 308162306a36Sopenharmony_ci 308262306a36Sopenharmony_ci if (!lreq->is_watch) { 308362306a36Sopenharmony_ci struct ceph_osd_data *osd_data = 308462306a36Sopenharmony_ci osd_req_op_data(req, 0, notify, response_data); 308562306a36Sopenharmony_ci void *p = page_address(osd_data->pages[0]); 308662306a36Sopenharmony_ci 308762306a36Sopenharmony_ci WARN_ON(req->r_ops[0].op != CEPH_OSD_OP_NOTIFY || 308862306a36Sopenharmony_ci osd_data->type != CEPH_OSD_DATA_TYPE_PAGES); 308962306a36Sopenharmony_ci 309062306a36Sopenharmony_ci /* make note of the notify_id */ 309162306a36Sopenharmony_ci if (req->r_ops[0].outdata_len >= sizeof(u64)) { 309262306a36Sopenharmony_ci lreq->notify_id = ceph_decode_64(&p); 309362306a36Sopenharmony_ci dout("lreq %p notify_id %llu\n", lreq, 309462306a36Sopenharmony_ci lreq->notify_id); 309562306a36Sopenharmony_ci } else { 309662306a36Sopenharmony_ci dout("lreq %p no notify_id\n", lreq); 309762306a36Sopenharmony_ci } 309862306a36Sopenharmony_ci } 309962306a36Sopenharmony_ci 310062306a36Sopenharmony_ciout: 310162306a36Sopenharmony_ci mutex_unlock(&lreq->lock); 310262306a36Sopenharmony_ci linger_put(lreq); 310362306a36Sopenharmony_ci} 310462306a36Sopenharmony_ci 310562306a36Sopenharmony_cistatic int normalize_watch_error(int err) 310662306a36Sopenharmony_ci{ 310762306a36Sopenharmony_ci /* 310862306a36Sopenharmony_ci * Translate ENOENT -> ENOTCONN so that a delete->disconnection 310962306a36Sopenharmony_ci * notification and a failure to reconnect because we raced with 311062306a36Sopenharmony_ci * the delete appear the same to the user. 311162306a36Sopenharmony_ci */ 311262306a36Sopenharmony_ci if (err == -ENOENT) 311362306a36Sopenharmony_ci err = -ENOTCONN; 311462306a36Sopenharmony_ci 311562306a36Sopenharmony_ci return err; 311662306a36Sopenharmony_ci} 311762306a36Sopenharmony_ci 311862306a36Sopenharmony_cistatic void linger_reconnect_cb(struct ceph_osd_request *req) 311962306a36Sopenharmony_ci{ 312062306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = req->r_priv; 312162306a36Sopenharmony_ci 312262306a36Sopenharmony_ci mutex_lock(&lreq->lock); 312362306a36Sopenharmony_ci if (req != lreq->reg_req) { 312462306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", 312562306a36Sopenharmony_ci __func__, lreq, lreq->linger_id, req, lreq->reg_req); 312662306a36Sopenharmony_ci goto out; 312762306a36Sopenharmony_ci } 312862306a36Sopenharmony_ci 312962306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__, 313062306a36Sopenharmony_ci lreq, lreq->linger_id, req->r_result, lreq->last_error); 313162306a36Sopenharmony_ci if (req->r_result < 0) { 313262306a36Sopenharmony_ci if (!lreq->last_error) { 313362306a36Sopenharmony_ci lreq->last_error = normalize_watch_error(req->r_result); 313462306a36Sopenharmony_ci queue_watch_error(lreq); 313562306a36Sopenharmony_ci } 313662306a36Sopenharmony_ci } 313762306a36Sopenharmony_ci 313862306a36Sopenharmony_ciout: 313962306a36Sopenharmony_ci mutex_unlock(&lreq->lock); 314062306a36Sopenharmony_ci linger_put(lreq); 314162306a36Sopenharmony_ci} 314262306a36Sopenharmony_ci 314362306a36Sopenharmony_cistatic void send_linger(struct ceph_osd_linger_request *lreq) 314462306a36Sopenharmony_ci{ 314562306a36Sopenharmony_ci struct ceph_osd_client *osdc = lreq->osdc; 314662306a36Sopenharmony_ci struct ceph_osd_request *req; 314762306a36Sopenharmony_ci int ret; 314862306a36Sopenharmony_ci 314962306a36Sopenharmony_ci verify_osdc_wrlocked(osdc); 315062306a36Sopenharmony_ci mutex_lock(&lreq->lock); 315162306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); 315262306a36Sopenharmony_ci 315362306a36Sopenharmony_ci if (lreq->reg_req) { 315462306a36Sopenharmony_ci if (lreq->reg_req->r_osd) 315562306a36Sopenharmony_ci cancel_linger_request(lreq->reg_req); 315662306a36Sopenharmony_ci ceph_osdc_put_request(lreq->reg_req); 315762306a36Sopenharmony_ci } 315862306a36Sopenharmony_ci 315962306a36Sopenharmony_ci req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); 316062306a36Sopenharmony_ci BUG_ON(!req); 316162306a36Sopenharmony_ci 316262306a36Sopenharmony_ci target_copy(&req->r_t, &lreq->t); 316362306a36Sopenharmony_ci req->r_mtime = lreq->mtime; 316462306a36Sopenharmony_ci 316562306a36Sopenharmony_ci if (lreq->is_watch && lreq->committed) { 316662306a36Sopenharmony_ci osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT, 316762306a36Sopenharmony_ci lreq->linger_id, ++lreq->register_gen); 316862306a36Sopenharmony_ci dout("lreq %p reconnect register_gen %u\n", lreq, 316962306a36Sopenharmony_ci req->r_ops[0].watch.gen); 317062306a36Sopenharmony_ci req->r_callback = linger_reconnect_cb; 317162306a36Sopenharmony_ci } else { 317262306a36Sopenharmony_ci if (lreq->is_watch) { 317362306a36Sopenharmony_ci osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH, 317462306a36Sopenharmony_ci lreq->linger_id, 0); 317562306a36Sopenharmony_ci } else { 317662306a36Sopenharmony_ci lreq->notify_id = 0; 317762306a36Sopenharmony_ci 317862306a36Sopenharmony_ci refcount_inc(&lreq->request_pl->refcnt); 317962306a36Sopenharmony_ci osd_req_op_notify_init(req, 0, lreq->linger_id, 318062306a36Sopenharmony_ci lreq->request_pl); 318162306a36Sopenharmony_ci ceph_osd_data_pages_init( 318262306a36Sopenharmony_ci osd_req_op_data(req, 0, notify, response_data), 318362306a36Sopenharmony_ci lreq->notify_id_pages, PAGE_SIZE, 0, false, false); 318462306a36Sopenharmony_ci } 318562306a36Sopenharmony_ci dout("lreq %p register\n", lreq); 318662306a36Sopenharmony_ci req->r_callback = linger_commit_cb; 318762306a36Sopenharmony_ci } 318862306a36Sopenharmony_ci 318962306a36Sopenharmony_ci ret = ceph_osdc_alloc_messages(req, GFP_NOIO); 319062306a36Sopenharmony_ci BUG_ON(ret); 319162306a36Sopenharmony_ci 319262306a36Sopenharmony_ci req->r_priv = linger_get(lreq); 319362306a36Sopenharmony_ci req->r_linger = true; 319462306a36Sopenharmony_ci lreq->reg_req = req; 319562306a36Sopenharmony_ci mutex_unlock(&lreq->lock); 319662306a36Sopenharmony_ci 319762306a36Sopenharmony_ci submit_request(req, true); 319862306a36Sopenharmony_ci} 319962306a36Sopenharmony_ci 320062306a36Sopenharmony_cistatic void linger_ping_cb(struct ceph_osd_request *req) 320162306a36Sopenharmony_ci{ 320262306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = req->r_priv; 320362306a36Sopenharmony_ci 320462306a36Sopenharmony_ci mutex_lock(&lreq->lock); 320562306a36Sopenharmony_ci if (req != lreq->ping_req) { 320662306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", 320762306a36Sopenharmony_ci __func__, lreq, lreq->linger_id, req, lreq->ping_req); 320862306a36Sopenharmony_ci goto out; 320962306a36Sopenharmony_ci } 321062306a36Sopenharmony_ci 321162306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n", 321262306a36Sopenharmony_ci __func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent, 321362306a36Sopenharmony_ci lreq->last_error); 321462306a36Sopenharmony_ci if (lreq->register_gen == req->r_ops[0].watch.gen) { 321562306a36Sopenharmony_ci if (!req->r_result) { 321662306a36Sopenharmony_ci lreq->watch_valid_thru = lreq->ping_sent; 321762306a36Sopenharmony_ci } else if (!lreq->last_error) { 321862306a36Sopenharmony_ci lreq->last_error = normalize_watch_error(req->r_result); 321962306a36Sopenharmony_ci queue_watch_error(lreq); 322062306a36Sopenharmony_ci } 322162306a36Sopenharmony_ci } else { 322262306a36Sopenharmony_ci dout("lreq %p register_gen %u ignoring old pong %u\n", lreq, 322362306a36Sopenharmony_ci lreq->register_gen, req->r_ops[0].watch.gen); 322462306a36Sopenharmony_ci } 322562306a36Sopenharmony_ci 322662306a36Sopenharmony_ciout: 322762306a36Sopenharmony_ci mutex_unlock(&lreq->lock); 322862306a36Sopenharmony_ci linger_put(lreq); 322962306a36Sopenharmony_ci} 323062306a36Sopenharmony_ci 323162306a36Sopenharmony_cistatic void send_linger_ping(struct ceph_osd_linger_request *lreq) 323262306a36Sopenharmony_ci{ 323362306a36Sopenharmony_ci struct ceph_osd_client *osdc = lreq->osdc; 323462306a36Sopenharmony_ci struct ceph_osd_request *req; 323562306a36Sopenharmony_ci int ret; 323662306a36Sopenharmony_ci 323762306a36Sopenharmony_ci if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { 323862306a36Sopenharmony_ci dout("%s PAUSERD\n", __func__); 323962306a36Sopenharmony_ci return; 324062306a36Sopenharmony_ci } 324162306a36Sopenharmony_ci 324262306a36Sopenharmony_ci lreq->ping_sent = jiffies; 324362306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu ping_sent %lu register_gen %u\n", 324462306a36Sopenharmony_ci __func__, lreq, lreq->linger_id, lreq->ping_sent, 324562306a36Sopenharmony_ci lreq->register_gen); 324662306a36Sopenharmony_ci 324762306a36Sopenharmony_ci if (lreq->ping_req) { 324862306a36Sopenharmony_ci if (lreq->ping_req->r_osd) 324962306a36Sopenharmony_ci cancel_linger_request(lreq->ping_req); 325062306a36Sopenharmony_ci ceph_osdc_put_request(lreq->ping_req); 325162306a36Sopenharmony_ci } 325262306a36Sopenharmony_ci 325362306a36Sopenharmony_ci req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); 325462306a36Sopenharmony_ci BUG_ON(!req); 325562306a36Sopenharmony_ci 325662306a36Sopenharmony_ci target_copy(&req->r_t, &lreq->t); 325762306a36Sopenharmony_ci osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id, 325862306a36Sopenharmony_ci lreq->register_gen); 325962306a36Sopenharmony_ci req->r_callback = linger_ping_cb; 326062306a36Sopenharmony_ci 326162306a36Sopenharmony_ci ret = ceph_osdc_alloc_messages(req, GFP_NOIO); 326262306a36Sopenharmony_ci BUG_ON(ret); 326362306a36Sopenharmony_ci 326462306a36Sopenharmony_ci req->r_priv = linger_get(lreq); 326562306a36Sopenharmony_ci req->r_linger = true; 326662306a36Sopenharmony_ci lreq->ping_req = req; 326762306a36Sopenharmony_ci 326862306a36Sopenharmony_ci ceph_osdc_get_request(req); 326962306a36Sopenharmony_ci account_request(req); 327062306a36Sopenharmony_ci req->r_tid = atomic64_inc_return(&osdc->last_tid); 327162306a36Sopenharmony_ci link_request(lreq->osd, req); 327262306a36Sopenharmony_ci send_request(req); 327362306a36Sopenharmony_ci} 327462306a36Sopenharmony_ci 327562306a36Sopenharmony_cistatic void linger_submit(struct ceph_osd_linger_request *lreq) 327662306a36Sopenharmony_ci{ 327762306a36Sopenharmony_ci struct ceph_osd_client *osdc = lreq->osdc; 327862306a36Sopenharmony_ci struct ceph_osd *osd; 327962306a36Sopenharmony_ci 328062306a36Sopenharmony_ci down_write(&osdc->lock); 328162306a36Sopenharmony_ci linger_register(lreq); 328262306a36Sopenharmony_ci 328362306a36Sopenharmony_ci calc_target(osdc, &lreq->t, false); 328462306a36Sopenharmony_ci osd = lookup_create_osd(osdc, lreq->t.osd, true); 328562306a36Sopenharmony_ci link_linger(osd, lreq); 328662306a36Sopenharmony_ci 328762306a36Sopenharmony_ci send_linger(lreq); 328862306a36Sopenharmony_ci up_write(&osdc->lock); 328962306a36Sopenharmony_ci} 329062306a36Sopenharmony_ci 329162306a36Sopenharmony_cistatic void cancel_linger_map_check(struct ceph_osd_linger_request *lreq) 329262306a36Sopenharmony_ci{ 329362306a36Sopenharmony_ci struct ceph_osd_client *osdc = lreq->osdc; 329462306a36Sopenharmony_ci struct ceph_osd_linger_request *lookup_lreq; 329562306a36Sopenharmony_ci 329662306a36Sopenharmony_ci verify_osdc_wrlocked(osdc); 329762306a36Sopenharmony_ci 329862306a36Sopenharmony_ci lookup_lreq = lookup_linger_mc(&osdc->linger_map_checks, 329962306a36Sopenharmony_ci lreq->linger_id); 330062306a36Sopenharmony_ci if (!lookup_lreq) 330162306a36Sopenharmony_ci return; 330262306a36Sopenharmony_ci 330362306a36Sopenharmony_ci WARN_ON(lookup_lreq != lreq); 330462306a36Sopenharmony_ci erase_linger_mc(&osdc->linger_map_checks, lreq); 330562306a36Sopenharmony_ci linger_put(lreq); 330662306a36Sopenharmony_ci} 330762306a36Sopenharmony_ci 330862306a36Sopenharmony_ci/* 330962306a36Sopenharmony_ci * @lreq has to be both registered and linked. 331062306a36Sopenharmony_ci */ 331162306a36Sopenharmony_cistatic void __linger_cancel(struct ceph_osd_linger_request *lreq) 331262306a36Sopenharmony_ci{ 331362306a36Sopenharmony_ci if (lreq->ping_req && lreq->ping_req->r_osd) 331462306a36Sopenharmony_ci cancel_linger_request(lreq->ping_req); 331562306a36Sopenharmony_ci if (lreq->reg_req && lreq->reg_req->r_osd) 331662306a36Sopenharmony_ci cancel_linger_request(lreq->reg_req); 331762306a36Sopenharmony_ci cancel_linger_map_check(lreq); 331862306a36Sopenharmony_ci unlink_linger(lreq->osd, lreq); 331962306a36Sopenharmony_ci linger_unregister(lreq); 332062306a36Sopenharmony_ci} 332162306a36Sopenharmony_ci 332262306a36Sopenharmony_cistatic void linger_cancel(struct ceph_osd_linger_request *lreq) 332362306a36Sopenharmony_ci{ 332462306a36Sopenharmony_ci struct ceph_osd_client *osdc = lreq->osdc; 332562306a36Sopenharmony_ci 332662306a36Sopenharmony_ci down_write(&osdc->lock); 332762306a36Sopenharmony_ci if (__linger_registered(lreq)) 332862306a36Sopenharmony_ci __linger_cancel(lreq); 332962306a36Sopenharmony_ci up_write(&osdc->lock); 333062306a36Sopenharmony_ci} 333162306a36Sopenharmony_ci 333262306a36Sopenharmony_cistatic void send_linger_map_check(struct ceph_osd_linger_request *lreq); 333362306a36Sopenharmony_ci 333462306a36Sopenharmony_cistatic void check_linger_pool_dne(struct ceph_osd_linger_request *lreq) 333562306a36Sopenharmony_ci{ 333662306a36Sopenharmony_ci struct ceph_osd_client *osdc = lreq->osdc; 333762306a36Sopenharmony_ci struct ceph_osdmap *map = osdc->osdmap; 333862306a36Sopenharmony_ci 333962306a36Sopenharmony_ci verify_osdc_wrlocked(osdc); 334062306a36Sopenharmony_ci WARN_ON(!map->epoch); 334162306a36Sopenharmony_ci 334262306a36Sopenharmony_ci if (lreq->register_gen) { 334362306a36Sopenharmony_ci lreq->map_dne_bound = map->epoch; 334462306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu pool disappeared\n", __func__, 334562306a36Sopenharmony_ci lreq, lreq->linger_id); 334662306a36Sopenharmony_ci } else { 334762306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu map_dne_bound %u have %u\n", 334862306a36Sopenharmony_ci __func__, lreq, lreq->linger_id, lreq->map_dne_bound, 334962306a36Sopenharmony_ci map->epoch); 335062306a36Sopenharmony_ci } 335162306a36Sopenharmony_ci 335262306a36Sopenharmony_ci if (lreq->map_dne_bound) { 335362306a36Sopenharmony_ci if (map->epoch >= lreq->map_dne_bound) { 335462306a36Sopenharmony_ci /* we had a new enough map */ 335562306a36Sopenharmony_ci pr_info("linger_id %llu pool does not exist\n", 335662306a36Sopenharmony_ci lreq->linger_id); 335762306a36Sopenharmony_ci linger_reg_commit_complete(lreq, -ENOENT); 335862306a36Sopenharmony_ci __linger_cancel(lreq); 335962306a36Sopenharmony_ci } 336062306a36Sopenharmony_ci } else { 336162306a36Sopenharmony_ci send_linger_map_check(lreq); 336262306a36Sopenharmony_ci } 336362306a36Sopenharmony_ci} 336462306a36Sopenharmony_ci 336562306a36Sopenharmony_cistatic void linger_map_check_cb(struct ceph_mon_generic_request *greq) 336662306a36Sopenharmony_ci{ 336762306a36Sopenharmony_ci struct ceph_osd_client *osdc = &greq->monc->client->osdc; 336862306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq; 336962306a36Sopenharmony_ci u64 linger_id = greq->private_data; 337062306a36Sopenharmony_ci 337162306a36Sopenharmony_ci WARN_ON(greq->result || !greq->u.newest); 337262306a36Sopenharmony_ci 337362306a36Sopenharmony_ci down_write(&osdc->lock); 337462306a36Sopenharmony_ci lreq = lookup_linger_mc(&osdc->linger_map_checks, linger_id); 337562306a36Sopenharmony_ci if (!lreq) { 337662306a36Sopenharmony_ci dout("%s linger_id %llu dne\n", __func__, linger_id); 337762306a36Sopenharmony_ci goto out_unlock; 337862306a36Sopenharmony_ci } 337962306a36Sopenharmony_ci 338062306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu map_dne_bound %u newest %llu\n", 338162306a36Sopenharmony_ci __func__, lreq, lreq->linger_id, lreq->map_dne_bound, 338262306a36Sopenharmony_ci greq->u.newest); 338362306a36Sopenharmony_ci if (!lreq->map_dne_bound) 338462306a36Sopenharmony_ci lreq->map_dne_bound = greq->u.newest; 338562306a36Sopenharmony_ci erase_linger_mc(&osdc->linger_map_checks, lreq); 338662306a36Sopenharmony_ci check_linger_pool_dne(lreq); 338762306a36Sopenharmony_ci 338862306a36Sopenharmony_ci linger_put(lreq); 338962306a36Sopenharmony_ciout_unlock: 339062306a36Sopenharmony_ci up_write(&osdc->lock); 339162306a36Sopenharmony_ci} 339262306a36Sopenharmony_ci 339362306a36Sopenharmony_cistatic void send_linger_map_check(struct ceph_osd_linger_request *lreq) 339462306a36Sopenharmony_ci{ 339562306a36Sopenharmony_ci struct ceph_osd_client *osdc = lreq->osdc; 339662306a36Sopenharmony_ci struct ceph_osd_linger_request *lookup_lreq; 339762306a36Sopenharmony_ci int ret; 339862306a36Sopenharmony_ci 339962306a36Sopenharmony_ci verify_osdc_wrlocked(osdc); 340062306a36Sopenharmony_ci 340162306a36Sopenharmony_ci lookup_lreq = lookup_linger_mc(&osdc->linger_map_checks, 340262306a36Sopenharmony_ci lreq->linger_id); 340362306a36Sopenharmony_ci if (lookup_lreq) { 340462306a36Sopenharmony_ci WARN_ON(lookup_lreq != lreq); 340562306a36Sopenharmony_ci return; 340662306a36Sopenharmony_ci } 340762306a36Sopenharmony_ci 340862306a36Sopenharmony_ci linger_get(lreq); 340962306a36Sopenharmony_ci insert_linger_mc(&osdc->linger_map_checks, lreq); 341062306a36Sopenharmony_ci ret = ceph_monc_get_version_async(&osdc->client->monc, "osdmap", 341162306a36Sopenharmony_ci linger_map_check_cb, lreq->linger_id); 341262306a36Sopenharmony_ci WARN_ON(ret); 341362306a36Sopenharmony_ci} 341462306a36Sopenharmony_ci 341562306a36Sopenharmony_cistatic int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq) 341662306a36Sopenharmony_ci{ 341762306a36Sopenharmony_ci int ret; 341862306a36Sopenharmony_ci 341962306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); 342062306a36Sopenharmony_ci ret = wait_for_completion_killable(&lreq->reg_commit_wait); 342162306a36Sopenharmony_ci return ret ?: lreq->reg_commit_error; 342262306a36Sopenharmony_ci} 342362306a36Sopenharmony_ci 342462306a36Sopenharmony_cistatic int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq, 342562306a36Sopenharmony_ci unsigned long timeout) 342662306a36Sopenharmony_ci{ 342762306a36Sopenharmony_ci long left; 342862306a36Sopenharmony_ci 342962306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); 343062306a36Sopenharmony_ci left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait, 343162306a36Sopenharmony_ci ceph_timeout_jiffies(timeout)); 343262306a36Sopenharmony_ci if (left <= 0) 343362306a36Sopenharmony_ci left = left ?: -ETIMEDOUT; 343462306a36Sopenharmony_ci else 343562306a36Sopenharmony_ci left = lreq->notify_finish_error; /* completed */ 343662306a36Sopenharmony_ci 343762306a36Sopenharmony_ci return left; 343862306a36Sopenharmony_ci} 343962306a36Sopenharmony_ci 344062306a36Sopenharmony_ci/* 344162306a36Sopenharmony_ci * Timeout callback, called every N seconds. When 1 or more OSD 344262306a36Sopenharmony_ci * requests has been active for more than N seconds, we send a keepalive 344362306a36Sopenharmony_ci * (tag + timestamp) to its OSD to ensure any communications channel 344462306a36Sopenharmony_ci * reset is detected. 344562306a36Sopenharmony_ci */ 344662306a36Sopenharmony_cistatic void handle_timeout(struct work_struct *work) 344762306a36Sopenharmony_ci{ 344862306a36Sopenharmony_ci struct ceph_osd_client *osdc = 344962306a36Sopenharmony_ci container_of(work, struct ceph_osd_client, timeout_work.work); 345062306a36Sopenharmony_ci struct ceph_options *opts = osdc->client->options; 345162306a36Sopenharmony_ci unsigned long cutoff = jiffies - opts->osd_keepalive_timeout; 345262306a36Sopenharmony_ci unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout; 345362306a36Sopenharmony_ci LIST_HEAD(slow_osds); 345462306a36Sopenharmony_ci struct rb_node *n, *p; 345562306a36Sopenharmony_ci 345662306a36Sopenharmony_ci dout("%s osdc %p\n", __func__, osdc); 345762306a36Sopenharmony_ci down_write(&osdc->lock); 345862306a36Sopenharmony_ci 345962306a36Sopenharmony_ci /* 346062306a36Sopenharmony_ci * ping osds that are a bit slow. this ensures that if there 346162306a36Sopenharmony_ci * is a break in the TCP connection we will notice, and reopen 346262306a36Sopenharmony_ci * a connection with that osd (from the fault callback). 346362306a36Sopenharmony_ci */ 346462306a36Sopenharmony_ci for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { 346562306a36Sopenharmony_ci struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); 346662306a36Sopenharmony_ci bool found = false; 346762306a36Sopenharmony_ci 346862306a36Sopenharmony_ci for (p = rb_first(&osd->o_requests); p; ) { 346962306a36Sopenharmony_ci struct ceph_osd_request *req = 347062306a36Sopenharmony_ci rb_entry(p, struct ceph_osd_request, r_node); 347162306a36Sopenharmony_ci 347262306a36Sopenharmony_ci p = rb_next(p); /* abort_request() */ 347362306a36Sopenharmony_ci 347462306a36Sopenharmony_ci if (time_before(req->r_stamp, cutoff)) { 347562306a36Sopenharmony_ci dout(" req %p tid %llu on osd%d is laggy\n", 347662306a36Sopenharmony_ci req, req->r_tid, osd->o_osd); 347762306a36Sopenharmony_ci found = true; 347862306a36Sopenharmony_ci } 347962306a36Sopenharmony_ci if (opts->osd_request_timeout && 348062306a36Sopenharmony_ci time_before(req->r_start_stamp, expiry_cutoff)) { 348162306a36Sopenharmony_ci pr_err_ratelimited("tid %llu on osd%d timeout\n", 348262306a36Sopenharmony_ci req->r_tid, osd->o_osd); 348362306a36Sopenharmony_ci abort_request(req, -ETIMEDOUT); 348462306a36Sopenharmony_ci } 348562306a36Sopenharmony_ci } 348662306a36Sopenharmony_ci for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) { 348762306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = 348862306a36Sopenharmony_ci rb_entry(p, struct ceph_osd_linger_request, node); 348962306a36Sopenharmony_ci 349062306a36Sopenharmony_ci dout(" lreq %p linger_id %llu is served by osd%d\n", 349162306a36Sopenharmony_ci lreq, lreq->linger_id, osd->o_osd); 349262306a36Sopenharmony_ci found = true; 349362306a36Sopenharmony_ci 349462306a36Sopenharmony_ci mutex_lock(&lreq->lock); 349562306a36Sopenharmony_ci if (lreq->is_watch && lreq->committed && !lreq->last_error) 349662306a36Sopenharmony_ci send_linger_ping(lreq); 349762306a36Sopenharmony_ci mutex_unlock(&lreq->lock); 349862306a36Sopenharmony_ci } 349962306a36Sopenharmony_ci 350062306a36Sopenharmony_ci if (found) 350162306a36Sopenharmony_ci list_move_tail(&osd->o_keepalive_item, &slow_osds); 350262306a36Sopenharmony_ci } 350362306a36Sopenharmony_ci 350462306a36Sopenharmony_ci if (opts->osd_request_timeout) { 350562306a36Sopenharmony_ci for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) { 350662306a36Sopenharmony_ci struct ceph_osd_request *req = 350762306a36Sopenharmony_ci rb_entry(p, struct ceph_osd_request, r_node); 350862306a36Sopenharmony_ci 350962306a36Sopenharmony_ci p = rb_next(p); /* abort_request() */ 351062306a36Sopenharmony_ci 351162306a36Sopenharmony_ci if (time_before(req->r_start_stamp, expiry_cutoff)) { 351262306a36Sopenharmony_ci pr_err_ratelimited("tid %llu on osd%d timeout\n", 351362306a36Sopenharmony_ci req->r_tid, osdc->homeless_osd.o_osd); 351462306a36Sopenharmony_ci abort_request(req, -ETIMEDOUT); 351562306a36Sopenharmony_ci } 351662306a36Sopenharmony_ci } 351762306a36Sopenharmony_ci } 351862306a36Sopenharmony_ci 351962306a36Sopenharmony_ci if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds)) 352062306a36Sopenharmony_ci maybe_request_map(osdc); 352162306a36Sopenharmony_ci 352262306a36Sopenharmony_ci while (!list_empty(&slow_osds)) { 352362306a36Sopenharmony_ci struct ceph_osd *osd = list_first_entry(&slow_osds, 352462306a36Sopenharmony_ci struct ceph_osd, 352562306a36Sopenharmony_ci o_keepalive_item); 352662306a36Sopenharmony_ci list_del_init(&osd->o_keepalive_item); 352762306a36Sopenharmony_ci ceph_con_keepalive(&osd->o_con); 352862306a36Sopenharmony_ci } 352962306a36Sopenharmony_ci 353062306a36Sopenharmony_ci up_write(&osdc->lock); 353162306a36Sopenharmony_ci schedule_delayed_work(&osdc->timeout_work, 353262306a36Sopenharmony_ci osdc->client->options->osd_keepalive_timeout); 353362306a36Sopenharmony_ci} 353462306a36Sopenharmony_ci 353562306a36Sopenharmony_cistatic void handle_osds_timeout(struct work_struct *work) 353662306a36Sopenharmony_ci{ 353762306a36Sopenharmony_ci struct ceph_osd_client *osdc = 353862306a36Sopenharmony_ci container_of(work, struct ceph_osd_client, 353962306a36Sopenharmony_ci osds_timeout_work.work); 354062306a36Sopenharmony_ci unsigned long delay = osdc->client->options->osd_idle_ttl / 4; 354162306a36Sopenharmony_ci struct ceph_osd *osd, *nosd; 354262306a36Sopenharmony_ci 354362306a36Sopenharmony_ci dout("%s osdc %p\n", __func__, osdc); 354462306a36Sopenharmony_ci down_write(&osdc->lock); 354562306a36Sopenharmony_ci list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { 354662306a36Sopenharmony_ci if (time_before(jiffies, osd->lru_ttl)) 354762306a36Sopenharmony_ci break; 354862306a36Sopenharmony_ci 354962306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT(&osd->o_requests)); 355062306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT(&osd->o_linger_requests)); 355162306a36Sopenharmony_ci close_osd(osd); 355262306a36Sopenharmony_ci } 355362306a36Sopenharmony_ci 355462306a36Sopenharmony_ci up_write(&osdc->lock); 355562306a36Sopenharmony_ci schedule_delayed_work(&osdc->osds_timeout_work, 355662306a36Sopenharmony_ci round_jiffies_relative(delay)); 355762306a36Sopenharmony_ci} 355862306a36Sopenharmony_ci 355962306a36Sopenharmony_cistatic int ceph_oloc_decode(void **p, void *end, 356062306a36Sopenharmony_ci struct ceph_object_locator *oloc) 356162306a36Sopenharmony_ci{ 356262306a36Sopenharmony_ci u8 struct_v, struct_cv; 356362306a36Sopenharmony_ci u32 len; 356462306a36Sopenharmony_ci void *struct_end; 356562306a36Sopenharmony_ci int ret = 0; 356662306a36Sopenharmony_ci 356762306a36Sopenharmony_ci ceph_decode_need(p, end, 1 + 1 + 4, e_inval); 356862306a36Sopenharmony_ci struct_v = ceph_decode_8(p); 356962306a36Sopenharmony_ci struct_cv = ceph_decode_8(p); 357062306a36Sopenharmony_ci if (struct_v < 3) { 357162306a36Sopenharmony_ci pr_warn("got v %d < 3 cv %d of ceph_object_locator\n", 357262306a36Sopenharmony_ci struct_v, struct_cv); 357362306a36Sopenharmony_ci goto e_inval; 357462306a36Sopenharmony_ci } 357562306a36Sopenharmony_ci if (struct_cv > 6) { 357662306a36Sopenharmony_ci pr_warn("got v %d cv %d > 6 of ceph_object_locator\n", 357762306a36Sopenharmony_ci struct_v, struct_cv); 357862306a36Sopenharmony_ci goto e_inval; 357962306a36Sopenharmony_ci } 358062306a36Sopenharmony_ci len = ceph_decode_32(p); 358162306a36Sopenharmony_ci ceph_decode_need(p, end, len, e_inval); 358262306a36Sopenharmony_ci struct_end = *p + len; 358362306a36Sopenharmony_ci 358462306a36Sopenharmony_ci oloc->pool = ceph_decode_64(p); 358562306a36Sopenharmony_ci *p += 4; /* skip preferred */ 358662306a36Sopenharmony_ci 358762306a36Sopenharmony_ci len = ceph_decode_32(p); 358862306a36Sopenharmony_ci if (len > 0) { 358962306a36Sopenharmony_ci pr_warn("ceph_object_locator::key is set\n"); 359062306a36Sopenharmony_ci goto e_inval; 359162306a36Sopenharmony_ci } 359262306a36Sopenharmony_ci 359362306a36Sopenharmony_ci if (struct_v >= 5) { 359462306a36Sopenharmony_ci bool changed = false; 359562306a36Sopenharmony_ci 359662306a36Sopenharmony_ci len = ceph_decode_32(p); 359762306a36Sopenharmony_ci if (len > 0) { 359862306a36Sopenharmony_ci ceph_decode_need(p, end, len, e_inval); 359962306a36Sopenharmony_ci if (!oloc->pool_ns || 360062306a36Sopenharmony_ci ceph_compare_string(oloc->pool_ns, *p, len)) 360162306a36Sopenharmony_ci changed = true; 360262306a36Sopenharmony_ci *p += len; 360362306a36Sopenharmony_ci } else { 360462306a36Sopenharmony_ci if (oloc->pool_ns) 360562306a36Sopenharmony_ci changed = true; 360662306a36Sopenharmony_ci } 360762306a36Sopenharmony_ci if (changed) { 360862306a36Sopenharmony_ci /* redirect changes namespace */ 360962306a36Sopenharmony_ci pr_warn("ceph_object_locator::nspace is changed\n"); 361062306a36Sopenharmony_ci goto e_inval; 361162306a36Sopenharmony_ci } 361262306a36Sopenharmony_ci } 361362306a36Sopenharmony_ci 361462306a36Sopenharmony_ci if (struct_v >= 6) { 361562306a36Sopenharmony_ci s64 hash = ceph_decode_64(p); 361662306a36Sopenharmony_ci if (hash != -1) { 361762306a36Sopenharmony_ci pr_warn("ceph_object_locator::hash is set\n"); 361862306a36Sopenharmony_ci goto e_inval; 361962306a36Sopenharmony_ci } 362062306a36Sopenharmony_ci } 362162306a36Sopenharmony_ci 362262306a36Sopenharmony_ci /* skip the rest */ 362362306a36Sopenharmony_ci *p = struct_end; 362462306a36Sopenharmony_ciout: 362562306a36Sopenharmony_ci return ret; 362662306a36Sopenharmony_ci 362762306a36Sopenharmony_cie_inval: 362862306a36Sopenharmony_ci ret = -EINVAL; 362962306a36Sopenharmony_ci goto out; 363062306a36Sopenharmony_ci} 363162306a36Sopenharmony_ci 363262306a36Sopenharmony_cistatic int ceph_redirect_decode(void **p, void *end, 363362306a36Sopenharmony_ci struct ceph_request_redirect *redir) 363462306a36Sopenharmony_ci{ 363562306a36Sopenharmony_ci u8 struct_v, struct_cv; 363662306a36Sopenharmony_ci u32 len; 363762306a36Sopenharmony_ci void *struct_end; 363862306a36Sopenharmony_ci int ret; 363962306a36Sopenharmony_ci 364062306a36Sopenharmony_ci ceph_decode_need(p, end, 1 + 1 + 4, e_inval); 364162306a36Sopenharmony_ci struct_v = ceph_decode_8(p); 364262306a36Sopenharmony_ci struct_cv = ceph_decode_8(p); 364362306a36Sopenharmony_ci if (struct_cv > 1) { 364462306a36Sopenharmony_ci pr_warn("got v %d cv %d > 1 of ceph_request_redirect\n", 364562306a36Sopenharmony_ci struct_v, struct_cv); 364662306a36Sopenharmony_ci goto e_inval; 364762306a36Sopenharmony_ci } 364862306a36Sopenharmony_ci len = ceph_decode_32(p); 364962306a36Sopenharmony_ci ceph_decode_need(p, end, len, e_inval); 365062306a36Sopenharmony_ci struct_end = *p + len; 365162306a36Sopenharmony_ci 365262306a36Sopenharmony_ci ret = ceph_oloc_decode(p, end, &redir->oloc); 365362306a36Sopenharmony_ci if (ret) 365462306a36Sopenharmony_ci goto out; 365562306a36Sopenharmony_ci 365662306a36Sopenharmony_ci len = ceph_decode_32(p); 365762306a36Sopenharmony_ci if (len > 0) { 365862306a36Sopenharmony_ci pr_warn("ceph_request_redirect::object_name is set\n"); 365962306a36Sopenharmony_ci goto e_inval; 366062306a36Sopenharmony_ci } 366162306a36Sopenharmony_ci 366262306a36Sopenharmony_ci /* skip the rest */ 366362306a36Sopenharmony_ci *p = struct_end; 366462306a36Sopenharmony_ciout: 366562306a36Sopenharmony_ci return ret; 366662306a36Sopenharmony_ci 366762306a36Sopenharmony_cie_inval: 366862306a36Sopenharmony_ci ret = -EINVAL; 366962306a36Sopenharmony_ci goto out; 367062306a36Sopenharmony_ci} 367162306a36Sopenharmony_ci 367262306a36Sopenharmony_cistruct MOSDOpReply { 367362306a36Sopenharmony_ci struct ceph_pg pgid; 367462306a36Sopenharmony_ci u64 flags; 367562306a36Sopenharmony_ci int result; 367662306a36Sopenharmony_ci u32 epoch; 367762306a36Sopenharmony_ci int num_ops; 367862306a36Sopenharmony_ci u32 outdata_len[CEPH_OSD_MAX_OPS]; 367962306a36Sopenharmony_ci s32 rval[CEPH_OSD_MAX_OPS]; 368062306a36Sopenharmony_ci int retry_attempt; 368162306a36Sopenharmony_ci struct ceph_eversion replay_version; 368262306a36Sopenharmony_ci u64 user_version; 368362306a36Sopenharmony_ci struct ceph_request_redirect redirect; 368462306a36Sopenharmony_ci}; 368562306a36Sopenharmony_ci 368662306a36Sopenharmony_cistatic int decode_MOSDOpReply(const struct ceph_msg *msg, struct MOSDOpReply *m) 368762306a36Sopenharmony_ci{ 368862306a36Sopenharmony_ci void *p = msg->front.iov_base; 368962306a36Sopenharmony_ci void *const end = p + msg->front.iov_len; 369062306a36Sopenharmony_ci u16 version = le16_to_cpu(msg->hdr.version); 369162306a36Sopenharmony_ci struct ceph_eversion bad_replay_version; 369262306a36Sopenharmony_ci u8 decode_redir; 369362306a36Sopenharmony_ci u32 len; 369462306a36Sopenharmony_ci int ret; 369562306a36Sopenharmony_ci int i; 369662306a36Sopenharmony_ci 369762306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, len, e_inval); 369862306a36Sopenharmony_ci ceph_decode_need(&p, end, len, e_inval); 369962306a36Sopenharmony_ci p += len; /* skip oid */ 370062306a36Sopenharmony_ci 370162306a36Sopenharmony_ci ret = ceph_decode_pgid(&p, end, &m->pgid); 370262306a36Sopenharmony_ci if (ret) 370362306a36Sopenharmony_ci return ret; 370462306a36Sopenharmony_ci 370562306a36Sopenharmony_ci ceph_decode_64_safe(&p, end, m->flags, e_inval); 370662306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, m->result, e_inval); 370762306a36Sopenharmony_ci ceph_decode_need(&p, end, sizeof(bad_replay_version), e_inval); 370862306a36Sopenharmony_ci memcpy(&bad_replay_version, p, sizeof(bad_replay_version)); 370962306a36Sopenharmony_ci p += sizeof(bad_replay_version); 371062306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, m->epoch, e_inval); 371162306a36Sopenharmony_ci 371262306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, m->num_ops, e_inval); 371362306a36Sopenharmony_ci if (m->num_ops > ARRAY_SIZE(m->outdata_len)) 371462306a36Sopenharmony_ci goto e_inval; 371562306a36Sopenharmony_ci 371662306a36Sopenharmony_ci ceph_decode_need(&p, end, m->num_ops * sizeof(struct ceph_osd_op), 371762306a36Sopenharmony_ci e_inval); 371862306a36Sopenharmony_ci for (i = 0; i < m->num_ops; i++) { 371962306a36Sopenharmony_ci struct ceph_osd_op *op = p; 372062306a36Sopenharmony_ci 372162306a36Sopenharmony_ci m->outdata_len[i] = le32_to_cpu(op->payload_len); 372262306a36Sopenharmony_ci p += sizeof(*op); 372362306a36Sopenharmony_ci } 372462306a36Sopenharmony_ci 372562306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, m->retry_attempt, e_inval); 372662306a36Sopenharmony_ci for (i = 0; i < m->num_ops; i++) 372762306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, m->rval[i], e_inval); 372862306a36Sopenharmony_ci 372962306a36Sopenharmony_ci if (version >= 5) { 373062306a36Sopenharmony_ci ceph_decode_need(&p, end, sizeof(m->replay_version), e_inval); 373162306a36Sopenharmony_ci memcpy(&m->replay_version, p, sizeof(m->replay_version)); 373262306a36Sopenharmony_ci p += sizeof(m->replay_version); 373362306a36Sopenharmony_ci ceph_decode_64_safe(&p, end, m->user_version, e_inval); 373462306a36Sopenharmony_ci } else { 373562306a36Sopenharmony_ci m->replay_version = bad_replay_version; /* struct */ 373662306a36Sopenharmony_ci m->user_version = le64_to_cpu(m->replay_version.version); 373762306a36Sopenharmony_ci } 373862306a36Sopenharmony_ci 373962306a36Sopenharmony_ci if (version >= 6) { 374062306a36Sopenharmony_ci if (version >= 7) 374162306a36Sopenharmony_ci ceph_decode_8_safe(&p, end, decode_redir, e_inval); 374262306a36Sopenharmony_ci else 374362306a36Sopenharmony_ci decode_redir = 1; 374462306a36Sopenharmony_ci } else { 374562306a36Sopenharmony_ci decode_redir = 0; 374662306a36Sopenharmony_ci } 374762306a36Sopenharmony_ci 374862306a36Sopenharmony_ci if (decode_redir) { 374962306a36Sopenharmony_ci ret = ceph_redirect_decode(&p, end, &m->redirect); 375062306a36Sopenharmony_ci if (ret) 375162306a36Sopenharmony_ci return ret; 375262306a36Sopenharmony_ci } else { 375362306a36Sopenharmony_ci ceph_oloc_init(&m->redirect.oloc); 375462306a36Sopenharmony_ci } 375562306a36Sopenharmony_ci 375662306a36Sopenharmony_ci return 0; 375762306a36Sopenharmony_ci 375862306a36Sopenharmony_cie_inval: 375962306a36Sopenharmony_ci return -EINVAL; 376062306a36Sopenharmony_ci} 376162306a36Sopenharmony_ci 376262306a36Sopenharmony_ci/* 376362306a36Sopenharmony_ci * Handle MOSDOpReply. Set ->r_result and call the callback if it is 376462306a36Sopenharmony_ci * specified. 376562306a36Sopenharmony_ci */ 376662306a36Sopenharmony_cistatic void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) 376762306a36Sopenharmony_ci{ 376862306a36Sopenharmony_ci struct ceph_osd_client *osdc = osd->o_osdc; 376962306a36Sopenharmony_ci struct ceph_osd_request *req; 377062306a36Sopenharmony_ci struct MOSDOpReply m; 377162306a36Sopenharmony_ci u64 tid = le64_to_cpu(msg->hdr.tid); 377262306a36Sopenharmony_ci u32 data_len = 0; 377362306a36Sopenharmony_ci int ret; 377462306a36Sopenharmony_ci int i; 377562306a36Sopenharmony_ci 377662306a36Sopenharmony_ci dout("%s msg %p tid %llu\n", __func__, msg, tid); 377762306a36Sopenharmony_ci 377862306a36Sopenharmony_ci down_read(&osdc->lock); 377962306a36Sopenharmony_ci if (!osd_registered(osd)) { 378062306a36Sopenharmony_ci dout("%s osd%d unknown\n", __func__, osd->o_osd); 378162306a36Sopenharmony_ci goto out_unlock_osdc; 378262306a36Sopenharmony_ci } 378362306a36Sopenharmony_ci WARN_ON(osd->o_osd != le64_to_cpu(msg->hdr.src.num)); 378462306a36Sopenharmony_ci 378562306a36Sopenharmony_ci mutex_lock(&osd->lock); 378662306a36Sopenharmony_ci req = lookup_request(&osd->o_requests, tid); 378762306a36Sopenharmony_ci if (!req) { 378862306a36Sopenharmony_ci dout("%s osd%d tid %llu unknown\n", __func__, osd->o_osd, tid); 378962306a36Sopenharmony_ci goto out_unlock_session; 379062306a36Sopenharmony_ci } 379162306a36Sopenharmony_ci 379262306a36Sopenharmony_ci m.redirect.oloc.pool_ns = req->r_t.target_oloc.pool_ns; 379362306a36Sopenharmony_ci ret = decode_MOSDOpReply(msg, &m); 379462306a36Sopenharmony_ci m.redirect.oloc.pool_ns = NULL; 379562306a36Sopenharmony_ci if (ret) { 379662306a36Sopenharmony_ci pr_err("failed to decode MOSDOpReply for tid %llu: %d\n", 379762306a36Sopenharmony_ci req->r_tid, ret); 379862306a36Sopenharmony_ci ceph_msg_dump(msg); 379962306a36Sopenharmony_ci goto fail_request; 380062306a36Sopenharmony_ci } 380162306a36Sopenharmony_ci dout("%s req %p tid %llu flags 0x%llx pgid %llu.%x epoch %u attempt %d v %u'%llu uv %llu\n", 380262306a36Sopenharmony_ci __func__, req, req->r_tid, m.flags, m.pgid.pool, m.pgid.seed, 380362306a36Sopenharmony_ci m.epoch, m.retry_attempt, le32_to_cpu(m.replay_version.epoch), 380462306a36Sopenharmony_ci le64_to_cpu(m.replay_version.version), m.user_version); 380562306a36Sopenharmony_ci 380662306a36Sopenharmony_ci if (m.retry_attempt >= 0) { 380762306a36Sopenharmony_ci if (m.retry_attempt != req->r_attempts - 1) { 380862306a36Sopenharmony_ci dout("req %p tid %llu retry_attempt %d != %d, ignoring\n", 380962306a36Sopenharmony_ci req, req->r_tid, m.retry_attempt, 381062306a36Sopenharmony_ci req->r_attempts - 1); 381162306a36Sopenharmony_ci goto out_unlock_session; 381262306a36Sopenharmony_ci } 381362306a36Sopenharmony_ci } else { 381462306a36Sopenharmony_ci WARN_ON(1); /* MOSDOpReply v4 is assumed */ 381562306a36Sopenharmony_ci } 381662306a36Sopenharmony_ci 381762306a36Sopenharmony_ci if (!ceph_oloc_empty(&m.redirect.oloc)) { 381862306a36Sopenharmony_ci dout("req %p tid %llu redirect pool %lld\n", req, req->r_tid, 381962306a36Sopenharmony_ci m.redirect.oloc.pool); 382062306a36Sopenharmony_ci unlink_request(osd, req); 382162306a36Sopenharmony_ci mutex_unlock(&osd->lock); 382262306a36Sopenharmony_ci 382362306a36Sopenharmony_ci /* 382462306a36Sopenharmony_ci * Not ceph_oloc_copy() - changing pool_ns is not 382562306a36Sopenharmony_ci * supported. 382662306a36Sopenharmony_ci */ 382762306a36Sopenharmony_ci req->r_t.target_oloc.pool = m.redirect.oloc.pool; 382862306a36Sopenharmony_ci req->r_flags |= CEPH_OSD_FLAG_REDIRECTED | 382962306a36Sopenharmony_ci CEPH_OSD_FLAG_IGNORE_OVERLAY | 383062306a36Sopenharmony_ci CEPH_OSD_FLAG_IGNORE_CACHE; 383162306a36Sopenharmony_ci req->r_tid = 0; 383262306a36Sopenharmony_ci __submit_request(req, false); 383362306a36Sopenharmony_ci goto out_unlock_osdc; 383462306a36Sopenharmony_ci } 383562306a36Sopenharmony_ci 383662306a36Sopenharmony_ci if (m.result == -EAGAIN) { 383762306a36Sopenharmony_ci dout("req %p tid %llu EAGAIN\n", req, req->r_tid); 383862306a36Sopenharmony_ci unlink_request(osd, req); 383962306a36Sopenharmony_ci mutex_unlock(&osd->lock); 384062306a36Sopenharmony_ci 384162306a36Sopenharmony_ci /* 384262306a36Sopenharmony_ci * The object is missing on the replica or not (yet) 384362306a36Sopenharmony_ci * readable. Clear pgid to force a resend to the primary 384462306a36Sopenharmony_ci * via legacy_change. 384562306a36Sopenharmony_ci */ 384662306a36Sopenharmony_ci req->r_t.pgid.pool = 0; 384762306a36Sopenharmony_ci req->r_t.pgid.seed = 0; 384862306a36Sopenharmony_ci WARN_ON(!req->r_t.used_replica); 384962306a36Sopenharmony_ci req->r_flags &= ~(CEPH_OSD_FLAG_BALANCE_READS | 385062306a36Sopenharmony_ci CEPH_OSD_FLAG_LOCALIZE_READS); 385162306a36Sopenharmony_ci req->r_tid = 0; 385262306a36Sopenharmony_ci __submit_request(req, false); 385362306a36Sopenharmony_ci goto out_unlock_osdc; 385462306a36Sopenharmony_ci } 385562306a36Sopenharmony_ci 385662306a36Sopenharmony_ci if (m.num_ops != req->r_num_ops) { 385762306a36Sopenharmony_ci pr_err("num_ops %d != %d for tid %llu\n", m.num_ops, 385862306a36Sopenharmony_ci req->r_num_ops, req->r_tid); 385962306a36Sopenharmony_ci goto fail_request; 386062306a36Sopenharmony_ci } 386162306a36Sopenharmony_ci for (i = 0; i < req->r_num_ops; i++) { 386262306a36Sopenharmony_ci dout(" req %p tid %llu op %d rval %d len %u\n", req, 386362306a36Sopenharmony_ci req->r_tid, i, m.rval[i], m.outdata_len[i]); 386462306a36Sopenharmony_ci req->r_ops[i].rval = m.rval[i]; 386562306a36Sopenharmony_ci req->r_ops[i].outdata_len = m.outdata_len[i]; 386662306a36Sopenharmony_ci data_len += m.outdata_len[i]; 386762306a36Sopenharmony_ci } 386862306a36Sopenharmony_ci if (data_len != le32_to_cpu(msg->hdr.data_len)) { 386962306a36Sopenharmony_ci pr_err("sum of lens %u != %u for tid %llu\n", data_len, 387062306a36Sopenharmony_ci le32_to_cpu(msg->hdr.data_len), req->r_tid); 387162306a36Sopenharmony_ci goto fail_request; 387262306a36Sopenharmony_ci } 387362306a36Sopenharmony_ci dout("%s req %p tid %llu result %d data_len %u\n", __func__, 387462306a36Sopenharmony_ci req, req->r_tid, m.result, data_len); 387562306a36Sopenharmony_ci 387662306a36Sopenharmony_ci /* 387762306a36Sopenharmony_ci * Since we only ever request ONDISK, we should only ever get 387862306a36Sopenharmony_ci * one (type of) reply back. 387962306a36Sopenharmony_ci */ 388062306a36Sopenharmony_ci WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK)); 388162306a36Sopenharmony_ci req->r_version = m.user_version; 388262306a36Sopenharmony_ci req->r_result = m.result ?: data_len; 388362306a36Sopenharmony_ci finish_request(req); 388462306a36Sopenharmony_ci mutex_unlock(&osd->lock); 388562306a36Sopenharmony_ci up_read(&osdc->lock); 388662306a36Sopenharmony_ci 388762306a36Sopenharmony_ci __complete_request(req); 388862306a36Sopenharmony_ci return; 388962306a36Sopenharmony_ci 389062306a36Sopenharmony_cifail_request: 389162306a36Sopenharmony_ci complete_request(req, -EIO); 389262306a36Sopenharmony_ciout_unlock_session: 389362306a36Sopenharmony_ci mutex_unlock(&osd->lock); 389462306a36Sopenharmony_ciout_unlock_osdc: 389562306a36Sopenharmony_ci up_read(&osdc->lock); 389662306a36Sopenharmony_ci} 389762306a36Sopenharmony_ci 389862306a36Sopenharmony_cistatic void set_pool_was_full(struct ceph_osd_client *osdc) 389962306a36Sopenharmony_ci{ 390062306a36Sopenharmony_ci struct rb_node *n; 390162306a36Sopenharmony_ci 390262306a36Sopenharmony_ci for (n = rb_first(&osdc->osdmap->pg_pools); n; n = rb_next(n)) { 390362306a36Sopenharmony_ci struct ceph_pg_pool_info *pi = 390462306a36Sopenharmony_ci rb_entry(n, struct ceph_pg_pool_info, node); 390562306a36Sopenharmony_ci 390662306a36Sopenharmony_ci pi->was_full = __pool_full(pi); 390762306a36Sopenharmony_ci } 390862306a36Sopenharmony_ci} 390962306a36Sopenharmony_ci 391062306a36Sopenharmony_cistatic bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id) 391162306a36Sopenharmony_ci{ 391262306a36Sopenharmony_ci struct ceph_pg_pool_info *pi; 391362306a36Sopenharmony_ci 391462306a36Sopenharmony_ci pi = ceph_pg_pool_by_id(osdc->osdmap, pool_id); 391562306a36Sopenharmony_ci if (!pi) 391662306a36Sopenharmony_ci return false; 391762306a36Sopenharmony_ci 391862306a36Sopenharmony_ci return pi->was_full && !__pool_full(pi); 391962306a36Sopenharmony_ci} 392062306a36Sopenharmony_ci 392162306a36Sopenharmony_cistatic enum calc_target_result 392262306a36Sopenharmony_cirecalc_linger_target(struct ceph_osd_linger_request *lreq) 392362306a36Sopenharmony_ci{ 392462306a36Sopenharmony_ci struct ceph_osd_client *osdc = lreq->osdc; 392562306a36Sopenharmony_ci enum calc_target_result ct_res; 392662306a36Sopenharmony_ci 392762306a36Sopenharmony_ci ct_res = calc_target(osdc, &lreq->t, true); 392862306a36Sopenharmony_ci if (ct_res == CALC_TARGET_NEED_RESEND) { 392962306a36Sopenharmony_ci struct ceph_osd *osd; 393062306a36Sopenharmony_ci 393162306a36Sopenharmony_ci osd = lookup_create_osd(osdc, lreq->t.osd, true); 393262306a36Sopenharmony_ci if (osd != lreq->osd) { 393362306a36Sopenharmony_ci unlink_linger(lreq->osd, lreq); 393462306a36Sopenharmony_ci link_linger(osd, lreq); 393562306a36Sopenharmony_ci } 393662306a36Sopenharmony_ci } 393762306a36Sopenharmony_ci 393862306a36Sopenharmony_ci return ct_res; 393962306a36Sopenharmony_ci} 394062306a36Sopenharmony_ci 394162306a36Sopenharmony_ci/* 394262306a36Sopenharmony_ci * Requeue requests whose mapping to an OSD has changed. 394362306a36Sopenharmony_ci */ 394462306a36Sopenharmony_cistatic void scan_requests(struct ceph_osd *osd, 394562306a36Sopenharmony_ci bool force_resend, 394662306a36Sopenharmony_ci bool cleared_full, 394762306a36Sopenharmony_ci bool check_pool_cleared_full, 394862306a36Sopenharmony_ci struct rb_root *need_resend, 394962306a36Sopenharmony_ci struct list_head *need_resend_linger) 395062306a36Sopenharmony_ci{ 395162306a36Sopenharmony_ci struct ceph_osd_client *osdc = osd->o_osdc; 395262306a36Sopenharmony_ci struct rb_node *n; 395362306a36Sopenharmony_ci bool force_resend_writes; 395462306a36Sopenharmony_ci 395562306a36Sopenharmony_ci for (n = rb_first(&osd->o_linger_requests); n; ) { 395662306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = 395762306a36Sopenharmony_ci rb_entry(n, struct ceph_osd_linger_request, node); 395862306a36Sopenharmony_ci enum calc_target_result ct_res; 395962306a36Sopenharmony_ci 396062306a36Sopenharmony_ci n = rb_next(n); /* recalc_linger_target() */ 396162306a36Sopenharmony_ci 396262306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu\n", __func__, lreq, 396362306a36Sopenharmony_ci lreq->linger_id); 396462306a36Sopenharmony_ci ct_res = recalc_linger_target(lreq); 396562306a36Sopenharmony_ci switch (ct_res) { 396662306a36Sopenharmony_ci case CALC_TARGET_NO_ACTION: 396762306a36Sopenharmony_ci force_resend_writes = cleared_full || 396862306a36Sopenharmony_ci (check_pool_cleared_full && 396962306a36Sopenharmony_ci pool_cleared_full(osdc, lreq->t.base_oloc.pool)); 397062306a36Sopenharmony_ci if (!force_resend && !force_resend_writes) 397162306a36Sopenharmony_ci break; 397262306a36Sopenharmony_ci 397362306a36Sopenharmony_ci fallthrough; 397462306a36Sopenharmony_ci case CALC_TARGET_NEED_RESEND: 397562306a36Sopenharmony_ci cancel_linger_map_check(lreq); 397662306a36Sopenharmony_ci /* 397762306a36Sopenharmony_ci * scan_requests() for the previous epoch(s) 397862306a36Sopenharmony_ci * may have already added it to the list, since 397962306a36Sopenharmony_ci * it's not unlinked here. 398062306a36Sopenharmony_ci */ 398162306a36Sopenharmony_ci if (list_empty(&lreq->scan_item)) 398262306a36Sopenharmony_ci list_add_tail(&lreq->scan_item, need_resend_linger); 398362306a36Sopenharmony_ci break; 398462306a36Sopenharmony_ci case CALC_TARGET_POOL_DNE: 398562306a36Sopenharmony_ci list_del_init(&lreq->scan_item); 398662306a36Sopenharmony_ci check_linger_pool_dne(lreq); 398762306a36Sopenharmony_ci break; 398862306a36Sopenharmony_ci } 398962306a36Sopenharmony_ci } 399062306a36Sopenharmony_ci 399162306a36Sopenharmony_ci for (n = rb_first(&osd->o_requests); n; ) { 399262306a36Sopenharmony_ci struct ceph_osd_request *req = 399362306a36Sopenharmony_ci rb_entry(n, struct ceph_osd_request, r_node); 399462306a36Sopenharmony_ci enum calc_target_result ct_res; 399562306a36Sopenharmony_ci 399662306a36Sopenharmony_ci n = rb_next(n); /* unlink_request(), check_pool_dne() */ 399762306a36Sopenharmony_ci 399862306a36Sopenharmony_ci dout("%s req %p tid %llu\n", __func__, req, req->r_tid); 399962306a36Sopenharmony_ci ct_res = calc_target(osdc, &req->r_t, false); 400062306a36Sopenharmony_ci switch (ct_res) { 400162306a36Sopenharmony_ci case CALC_TARGET_NO_ACTION: 400262306a36Sopenharmony_ci force_resend_writes = cleared_full || 400362306a36Sopenharmony_ci (check_pool_cleared_full && 400462306a36Sopenharmony_ci pool_cleared_full(osdc, req->r_t.base_oloc.pool)); 400562306a36Sopenharmony_ci if (!force_resend && 400662306a36Sopenharmony_ci (!(req->r_flags & CEPH_OSD_FLAG_WRITE) || 400762306a36Sopenharmony_ci !force_resend_writes)) 400862306a36Sopenharmony_ci break; 400962306a36Sopenharmony_ci 401062306a36Sopenharmony_ci fallthrough; 401162306a36Sopenharmony_ci case CALC_TARGET_NEED_RESEND: 401262306a36Sopenharmony_ci cancel_map_check(req); 401362306a36Sopenharmony_ci unlink_request(osd, req); 401462306a36Sopenharmony_ci insert_request(need_resend, req); 401562306a36Sopenharmony_ci break; 401662306a36Sopenharmony_ci case CALC_TARGET_POOL_DNE: 401762306a36Sopenharmony_ci check_pool_dne(req); 401862306a36Sopenharmony_ci break; 401962306a36Sopenharmony_ci } 402062306a36Sopenharmony_ci } 402162306a36Sopenharmony_ci} 402262306a36Sopenharmony_ci 402362306a36Sopenharmony_cistatic int handle_one_map(struct ceph_osd_client *osdc, 402462306a36Sopenharmony_ci void *p, void *end, bool incremental, 402562306a36Sopenharmony_ci struct rb_root *need_resend, 402662306a36Sopenharmony_ci struct list_head *need_resend_linger) 402762306a36Sopenharmony_ci{ 402862306a36Sopenharmony_ci struct ceph_osdmap *newmap; 402962306a36Sopenharmony_ci struct rb_node *n; 403062306a36Sopenharmony_ci bool skipped_map = false; 403162306a36Sopenharmony_ci bool was_full; 403262306a36Sopenharmony_ci 403362306a36Sopenharmony_ci was_full = ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL); 403462306a36Sopenharmony_ci set_pool_was_full(osdc); 403562306a36Sopenharmony_ci 403662306a36Sopenharmony_ci if (incremental) 403762306a36Sopenharmony_ci newmap = osdmap_apply_incremental(&p, end, 403862306a36Sopenharmony_ci ceph_msgr2(osdc->client), 403962306a36Sopenharmony_ci osdc->osdmap); 404062306a36Sopenharmony_ci else 404162306a36Sopenharmony_ci newmap = ceph_osdmap_decode(&p, end, ceph_msgr2(osdc->client)); 404262306a36Sopenharmony_ci if (IS_ERR(newmap)) 404362306a36Sopenharmony_ci return PTR_ERR(newmap); 404462306a36Sopenharmony_ci 404562306a36Sopenharmony_ci if (newmap != osdc->osdmap) { 404662306a36Sopenharmony_ci /* 404762306a36Sopenharmony_ci * Preserve ->was_full before destroying the old map. 404862306a36Sopenharmony_ci * For pools that weren't in the old map, ->was_full 404962306a36Sopenharmony_ci * should be false. 405062306a36Sopenharmony_ci */ 405162306a36Sopenharmony_ci for (n = rb_first(&newmap->pg_pools); n; n = rb_next(n)) { 405262306a36Sopenharmony_ci struct ceph_pg_pool_info *pi = 405362306a36Sopenharmony_ci rb_entry(n, struct ceph_pg_pool_info, node); 405462306a36Sopenharmony_ci struct ceph_pg_pool_info *old_pi; 405562306a36Sopenharmony_ci 405662306a36Sopenharmony_ci old_pi = ceph_pg_pool_by_id(osdc->osdmap, pi->id); 405762306a36Sopenharmony_ci if (old_pi) 405862306a36Sopenharmony_ci pi->was_full = old_pi->was_full; 405962306a36Sopenharmony_ci else 406062306a36Sopenharmony_ci WARN_ON(pi->was_full); 406162306a36Sopenharmony_ci } 406262306a36Sopenharmony_ci 406362306a36Sopenharmony_ci if (osdc->osdmap->epoch && 406462306a36Sopenharmony_ci osdc->osdmap->epoch + 1 < newmap->epoch) { 406562306a36Sopenharmony_ci WARN_ON(incremental); 406662306a36Sopenharmony_ci skipped_map = true; 406762306a36Sopenharmony_ci } 406862306a36Sopenharmony_ci 406962306a36Sopenharmony_ci ceph_osdmap_destroy(osdc->osdmap); 407062306a36Sopenharmony_ci osdc->osdmap = newmap; 407162306a36Sopenharmony_ci } 407262306a36Sopenharmony_ci 407362306a36Sopenharmony_ci was_full &= !ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL); 407462306a36Sopenharmony_ci scan_requests(&osdc->homeless_osd, skipped_map, was_full, true, 407562306a36Sopenharmony_ci need_resend, need_resend_linger); 407662306a36Sopenharmony_ci 407762306a36Sopenharmony_ci for (n = rb_first(&osdc->osds); n; ) { 407862306a36Sopenharmony_ci struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); 407962306a36Sopenharmony_ci 408062306a36Sopenharmony_ci n = rb_next(n); /* close_osd() */ 408162306a36Sopenharmony_ci 408262306a36Sopenharmony_ci scan_requests(osd, skipped_map, was_full, true, need_resend, 408362306a36Sopenharmony_ci need_resend_linger); 408462306a36Sopenharmony_ci if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) || 408562306a36Sopenharmony_ci memcmp(&osd->o_con.peer_addr, 408662306a36Sopenharmony_ci ceph_osd_addr(osdc->osdmap, osd->o_osd), 408762306a36Sopenharmony_ci sizeof(struct ceph_entity_addr))) 408862306a36Sopenharmony_ci close_osd(osd); 408962306a36Sopenharmony_ci } 409062306a36Sopenharmony_ci 409162306a36Sopenharmony_ci return 0; 409262306a36Sopenharmony_ci} 409362306a36Sopenharmony_ci 409462306a36Sopenharmony_cistatic void kick_requests(struct ceph_osd_client *osdc, 409562306a36Sopenharmony_ci struct rb_root *need_resend, 409662306a36Sopenharmony_ci struct list_head *need_resend_linger) 409762306a36Sopenharmony_ci{ 409862306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq, *nlreq; 409962306a36Sopenharmony_ci enum calc_target_result ct_res; 410062306a36Sopenharmony_ci struct rb_node *n; 410162306a36Sopenharmony_ci 410262306a36Sopenharmony_ci /* make sure need_resend targets reflect latest map */ 410362306a36Sopenharmony_ci for (n = rb_first(need_resend); n; ) { 410462306a36Sopenharmony_ci struct ceph_osd_request *req = 410562306a36Sopenharmony_ci rb_entry(n, struct ceph_osd_request, r_node); 410662306a36Sopenharmony_ci 410762306a36Sopenharmony_ci n = rb_next(n); 410862306a36Sopenharmony_ci 410962306a36Sopenharmony_ci if (req->r_t.epoch < osdc->osdmap->epoch) { 411062306a36Sopenharmony_ci ct_res = calc_target(osdc, &req->r_t, false); 411162306a36Sopenharmony_ci if (ct_res == CALC_TARGET_POOL_DNE) { 411262306a36Sopenharmony_ci erase_request(need_resend, req); 411362306a36Sopenharmony_ci check_pool_dne(req); 411462306a36Sopenharmony_ci } 411562306a36Sopenharmony_ci } 411662306a36Sopenharmony_ci } 411762306a36Sopenharmony_ci 411862306a36Sopenharmony_ci for (n = rb_first(need_resend); n; ) { 411962306a36Sopenharmony_ci struct ceph_osd_request *req = 412062306a36Sopenharmony_ci rb_entry(n, struct ceph_osd_request, r_node); 412162306a36Sopenharmony_ci struct ceph_osd *osd; 412262306a36Sopenharmony_ci 412362306a36Sopenharmony_ci n = rb_next(n); 412462306a36Sopenharmony_ci erase_request(need_resend, req); /* before link_request() */ 412562306a36Sopenharmony_ci 412662306a36Sopenharmony_ci osd = lookup_create_osd(osdc, req->r_t.osd, true); 412762306a36Sopenharmony_ci link_request(osd, req); 412862306a36Sopenharmony_ci if (!req->r_linger) { 412962306a36Sopenharmony_ci if (!osd_homeless(osd) && !req->r_t.paused) 413062306a36Sopenharmony_ci send_request(req); 413162306a36Sopenharmony_ci } else { 413262306a36Sopenharmony_ci cancel_linger_request(req); 413362306a36Sopenharmony_ci } 413462306a36Sopenharmony_ci } 413562306a36Sopenharmony_ci 413662306a36Sopenharmony_ci list_for_each_entry_safe(lreq, nlreq, need_resend_linger, scan_item) { 413762306a36Sopenharmony_ci if (!osd_homeless(lreq->osd)) 413862306a36Sopenharmony_ci send_linger(lreq); 413962306a36Sopenharmony_ci 414062306a36Sopenharmony_ci list_del_init(&lreq->scan_item); 414162306a36Sopenharmony_ci } 414262306a36Sopenharmony_ci} 414362306a36Sopenharmony_ci 414462306a36Sopenharmony_ci/* 414562306a36Sopenharmony_ci * Process updated osd map. 414662306a36Sopenharmony_ci * 414762306a36Sopenharmony_ci * The message contains any number of incremental and full maps, normally 414862306a36Sopenharmony_ci * indicating some sort of topology change in the cluster. Kick requests 414962306a36Sopenharmony_ci * off to different OSDs as needed. 415062306a36Sopenharmony_ci */ 415162306a36Sopenharmony_civoid ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) 415262306a36Sopenharmony_ci{ 415362306a36Sopenharmony_ci void *p = msg->front.iov_base; 415462306a36Sopenharmony_ci void *const end = p + msg->front.iov_len; 415562306a36Sopenharmony_ci u32 nr_maps, maplen; 415662306a36Sopenharmony_ci u32 epoch; 415762306a36Sopenharmony_ci struct ceph_fsid fsid; 415862306a36Sopenharmony_ci struct rb_root need_resend = RB_ROOT; 415962306a36Sopenharmony_ci LIST_HEAD(need_resend_linger); 416062306a36Sopenharmony_ci bool handled_incremental = false; 416162306a36Sopenharmony_ci bool was_pauserd, was_pausewr; 416262306a36Sopenharmony_ci bool pauserd, pausewr; 416362306a36Sopenharmony_ci int err; 416462306a36Sopenharmony_ci 416562306a36Sopenharmony_ci dout("%s have %u\n", __func__, osdc->osdmap->epoch); 416662306a36Sopenharmony_ci down_write(&osdc->lock); 416762306a36Sopenharmony_ci 416862306a36Sopenharmony_ci /* verify fsid */ 416962306a36Sopenharmony_ci ceph_decode_need(&p, end, sizeof(fsid), bad); 417062306a36Sopenharmony_ci ceph_decode_copy(&p, &fsid, sizeof(fsid)); 417162306a36Sopenharmony_ci if (ceph_check_fsid(osdc->client, &fsid) < 0) 417262306a36Sopenharmony_ci goto bad; 417362306a36Sopenharmony_ci 417462306a36Sopenharmony_ci was_pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); 417562306a36Sopenharmony_ci was_pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || 417662306a36Sopenharmony_ci ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || 417762306a36Sopenharmony_ci have_pool_full(osdc); 417862306a36Sopenharmony_ci 417962306a36Sopenharmony_ci /* incremental maps */ 418062306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, nr_maps, bad); 418162306a36Sopenharmony_ci dout(" %d inc maps\n", nr_maps); 418262306a36Sopenharmony_ci while (nr_maps > 0) { 418362306a36Sopenharmony_ci ceph_decode_need(&p, end, 2*sizeof(u32), bad); 418462306a36Sopenharmony_ci epoch = ceph_decode_32(&p); 418562306a36Sopenharmony_ci maplen = ceph_decode_32(&p); 418662306a36Sopenharmony_ci ceph_decode_need(&p, end, maplen, bad); 418762306a36Sopenharmony_ci if (osdc->osdmap->epoch && 418862306a36Sopenharmony_ci osdc->osdmap->epoch + 1 == epoch) { 418962306a36Sopenharmony_ci dout("applying incremental map %u len %d\n", 419062306a36Sopenharmony_ci epoch, maplen); 419162306a36Sopenharmony_ci err = handle_one_map(osdc, p, p + maplen, true, 419262306a36Sopenharmony_ci &need_resend, &need_resend_linger); 419362306a36Sopenharmony_ci if (err) 419462306a36Sopenharmony_ci goto bad; 419562306a36Sopenharmony_ci handled_incremental = true; 419662306a36Sopenharmony_ci } else { 419762306a36Sopenharmony_ci dout("ignoring incremental map %u len %d\n", 419862306a36Sopenharmony_ci epoch, maplen); 419962306a36Sopenharmony_ci } 420062306a36Sopenharmony_ci p += maplen; 420162306a36Sopenharmony_ci nr_maps--; 420262306a36Sopenharmony_ci } 420362306a36Sopenharmony_ci if (handled_incremental) 420462306a36Sopenharmony_ci goto done; 420562306a36Sopenharmony_ci 420662306a36Sopenharmony_ci /* full maps */ 420762306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, nr_maps, bad); 420862306a36Sopenharmony_ci dout(" %d full maps\n", nr_maps); 420962306a36Sopenharmony_ci while (nr_maps) { 421062306a36Sopenharmony_ci ceph_decode_need(&p, end, 2*sizeof(u32), bad); 421162306a36Sopenharmony_ci epoch = ceph_decode_32(&p); 421262306a36Sopenharmony_ci maplen = ceph_decode_32(&p); 421362306a36Sopenharmony_ci ceph_decode_need(&p, end, maplen, bad); 421462306a36Sopenharmony_ci if (nr_maps > 1) { 421562306a36Sopenharmony_ci dout("skipping non-latest full map %u len %d\n", 421662306a36Sopenharmony_ci epoch, maplen); 421762306a36Sopenharmony_ci } else if (osdc->osdmap->epoch >= epoch) { 421862306a36Sopenharmony_ci dout("skipping full map %u len %d, " 421962306a36Sopenharmony_ci "older than our %u\n", epoch, maplen, 422062306a36Sopenharmony_ci osdc->osdmap->epoch); 422162306a36Sopenharmony_ci } else { 422262306a36Sopenharmony_ci dout("taking full map %u len %d\n", epoch, maplen); 422362306a36Sopenharmony_ci err = handle_one_map(osdc, p, p + maplen, false, 422462306a36Sopenharmony_ci &need_resend, &need_resend_linger); 422562306a36Sopenharmony_ci if (err) 422662306a36Sopenharmony_ci goto bad; 422762306a36Sopenharmony_ci } 422862306a36Sopenharmony_ci p += maplen; 422962306a36Sopenharmony_ci nr_maps--; 423062306a36Sopenharmony_ci } 423162306a36Sopenharmony_ci 423262306a36Sopenharmony_cidone: 423362306a36Sopenharmony_ci /* 423462306a36Sopenharmony_ci * subscribe to subsequent osdmap updates if full to ensure 423562306a36Sopenharmony_ci * we find out when we are no longer full and stop returning 423662306a36Sopenharmony_ci * ENOSPC. 423762306a36Sopenharmony_ci */ 423862306a36Sopenharmony_ci pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); 423962306a36Sopenharmony_ci pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || 424062306a36Sopenharmony_ci ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || 424162306a36Sopenharmony_ci have_pool_full(osdc); 424262306a36Sopenharmony_ci if (was_pauserd || was_pausewr || pauserd || pausewr || 424362306a36Sopenharmony_ci osdc->osdmap->epoch < osdc->epoch_barrier) 424462306a36Sopenharmony_ci maybe_request_map(osdc); 424562306a36Sopenharmony_ci 424662306a36Sopenharmony_ci kick_requests(osdc, &need_resend, &need_resend_linger); 424762306a36Sopenharmony_ci 424862306a36Sopenharmony_ci ceph_osdc_abort_on_full(osdc); 424962306a36Sopenharmony_ci ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP, 425062306a36Sopenharmony_ci osdc->osdmap->epoch); 425162306a36Sopenharmony_ci up_write(&osdc->lock); 425262306a36Sopenharmony_ci wake_up_all(&osdc->client->auth_wq); 425362306a36Sopenharmony_ci return; 425462306a36Sopenharmony_ci 425562306a36Sopenharmony_cibad: 425662306a36Sopenharmony_ci pr_err("osdc handle_map corrupt msg\n"); 425762306a36Sopenharmony_ci ceph_msg_dump(msg); 425862306a36Sopenharmony_ci up_write(&osdc->lock); 425962306a36Sopenharmony_ci} 426062306a36Sopenharmony_ci 426162306a36Sopenharmony_ci/* 426262306a36Sopenharmony_ci * Resubmit requests pending on the given osd. 426362306a36Sopenharmony_ci */ 426462306a36Sopenharmony_cistatic void kick_osd_requests(struct ceph_osd *osd) 426562306a36Sopenharmony_ci{ 426662306a36Sopenharmony_ci struct rb_node *n; 426762306a36Sopenharmony_ci 426862306a36Sopenharmony_ci clear_backoffs(osd); 426962306a36Sopenharmony_ci 427062306a36Sopenharmony_ci for (n = rb_first(&osd->o_requests); n; ) { 427162306a36Sopenharmony_ci struct ceph_osd_request *req = 427262306a36Sopenharmony_ci rb_entry(n, struct ceph_osd_request, r_node); 427362306a36Sopenharmony_ci 427462306a36Sopenharmony_ci n = rb_next(n); /* cancel_linger_request() */ 427562306a36Sopenharmony_ci 427662306a36Sopenharmony_ci if (!req->r_linger) { 427762306a36Sopenharmony_ci if (!req->r_t.paused) 427862306a36Sopenharmony_ci send_request(req); 427962306a36Sopenharmony_ci } else { 428062306a36Sopenharmony_ci cancel_linger_request(req); 428162306a36Sopenharmony_ci } 428262306a36Sopenharmony_ci } 428362306a36Sopenharmony_ci for (n = rb_first(&osd->o_linger_requests); n; n = rb_next(n)) { 428462306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq = 428562306a36Sopenharmony_ci rb_entry(n, struct ceph_osd_linger_request, node); 428662306a36Sopenharmony_ci 428762306a36Sopenharmony_ci send_linger(lreq); 428862306a36Sopenharmony_ci } 428962306a36Sopenharmony_ci} 429062306a36Sopenharmony_ci 429162306a36Sopenharmony_ci/* 429262306a36Sopenharmony_ci * If the osd connection drops, we need to resubmit all requests. 429362306a36Sopenharmony_ci */ 429462306a36Sopenharmony_cistatic void osd_fault(struct ceph_connection *con) 429562306a36Sopenharmony_ci{ 429662306a36Sopenharmony_ci struct ceph_osd *osd = con->private; 429762306a36Sopenharmony_ci struct ceph_osd_client *osdc = osd->o_osdc; 429862306a36Sopenharmony_ci 429962306a36Sopenharmony_ci dout("%s osd %p osd%d\n", __func__, osd, osd->o_osd); 430062306a36Sopenharmony_ci 430162306a36Sopenharmony_ci down_write(&osdc->lock); 430262306a36Sopenharmony_ci if (!osd_registered(osd)) { 430362306a36Sopenharmony_ci dout("%s osd%d unknown\n", __func__, osd->o_osd); 430462306a36Sopenharmony_ci goto out_unlock; 430562306a36Sopenharmony_ci } 430662306a36Sopenharmony_ci 430762306a36Sopenharmony_ci if (!reopen_osd(osd)) 430862306a36Sopenharmony_ci kick_osd_requests(osd); 430962306a36Sopenharmony_ci maybe_request_map(osdc); 431062306a36Sopenharmony_ci 431162306a36Sopenharmony_ciout_unlock: 431262306a36Sopenharmony_ci up_write(&osdc->lock); 431362306a36Sopenharmony_ci} 431462306a36Sopenharmony_ci 431562306a36Sopenharmony_cistruct MOSDBackoff { 431662306a36Sopenharmony_ci struct ceph_spg spgid; 431762306a36Sopenharmony_ci u32 map_epoch; 431862306a36Sopenharmony_ci u8 op; 431962306a36Sopenharmony_ci u64 id; 432062306a36Sopenharmony_ci struct ceph_hobject_id *begin; 432162306a36Sopenharmony_ci struct ceph_hobject_id *end; 432262306a36Sopenharmony_ci}; 432362306a36Sopenharmony_ci 432462306a36Sopenharmony_cistatic int decode_MOSDBackoff(const struct ceph_msg *msg, struct MOSDBackoff *m) 432562306a36Sopenharmony_ci{ 432662306a36Sopenharmony_ci void *p = msg->front.iov_base; 432762306a36Sopenharmony_ci void *const end = p + msg->front.iov_len; 432862306a36Sopenharmony_ci u8 struct_v; 432962306a36Sopenharmony_ci u32 struct_len; 433062306a36Sopenharmony_ci int ret; 433162306a36Sopenharmony_ci 433262306a36Sopenharmony_ci ret = ceph_start_decoding(&p, end, 1, "spg_t", &struct_v, &struct_len); 433362306a36Sopenharmony_ci if (ret) 433462306a36Sopenharmony_ci return ret; 433562306a36Sopenharmony_ci 433662306a36Sopenharmony_ci ret = ceph_decode_pgid(&p, end, &m->spgid.pgid); 433762306a36Sopenharmony_ci if (ret) 433862306a36Sopenharmony_ci return ret; 433962306a36Sopenharmony_ci 434062306a36Sopenharmony_ci ceph_decode_8_safe(&p, end, m->spgid.shard, e_inval); 434162306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, m->map_epoch, e_inval); 434262306a36Sopenharmony_ci ceph_decode_8_safe(&p, end, m->op, e_inval); 434362306a36Sopenharmony_ci ceph_decode_64_safe(&p, end, m->id, e_inval); 434462306a36Sopenharmony_ci 434562306a36Sopenharmony_ci m->begin = kzalloc(sizeof(*m->begin), GFP_NOIO); 434662306a36Sopenharmony_ci if (!m->begin) 434762306a36Sopenharmony_ci return -ENOMEM; 434862306a36Sopenharmony_ci 434962306a36Sopenharmony_ci ret = decode_hoid(&p, end, m->begin); 435062306a36Sopenharmony_ci if (ret) { 435162306a36Sopenharmony_ci free_hoid(m->begin); 435262306a36Sopenharmony_ci return ret; 435362306a36Sopenharmony_ci } 435462306a36Sopenharmony_ci 435562306a36Sopenharmony_ci m->end = kzalloc(sizeof(*m->end), GFP_NOIO); 435662306a36Sopenharmony_ci if (!m->end) { 435762306a36Sopenharmony_ci free_hoid(m->begin); 435862306a36Sopenharmony_ci return -ENOMEM; 435962306a36Sopenharmony_ci } 436062306a36Sopenharmony_ci 436162306a36Sopenharmony_ci ret = decode_hoid(&p, end, m->end); 436262306a36Sopenharmony_ci if (ret) { 436362306a36Sopenharmony_ci free_hoid(m->begin); 436462306a36Sopenharmony_ci free_hoid(m->end); 436562306a36Sopenharmony_ci return ret; 436662306a36Sopenharmony_ci } 436762306a36Sopenharmony_ci 436862306a36Sopenharmony_ci return 0; 436962306a36Sopenharmony_ci 437062306a36Sopenharmony_cie_inval: 437162306a36Sopenharmony_ci return -EINVAL; 437262306a36Sopenharmony_ci} 437362306a36Sopenharmony_ci 437462306a36Sopenharmony_cistatic struct ceph_msg *create_backoff_message( 437562306a36Sopenharmony_ci const struct ceph_osd_backoff *backoff, 437662306a36Sopenharmony_ci u32 map_epoch) 437762306a36Sopenharmony_ci{ 437862306a36Sopenharmony_ci struct ceph_msg *msg; 437962306a36Sopenharmony_ci void *p, *end; 438062306a36Sopenharmony_ci int msg_size; 438162306a36Sopenharmony_ci 438262306a36Sopenharmony_ci msg_size = CEPH_ENCODING_START_BLK_LEN + 438362306a36Sopenharmony_ci CEPH_PGID_ENCODING_LEN + 1; /* spgid */ 438462306a36Sopenharmony_ci msg_size += 4 + 1 + 8; /* map_epoch, op, id */ 438562306a36Sopenharmony_ci msg_size += CEPH_ENCODING_START_BLK_LEN + 438662306a36Sopenharmony_ci hoid_encoding_size(backoff->begin); 438762306a36Sopenharmony_ci msg_size += CEPH_ENCODING_START_BLK_LEN + 438862306a36Sopenharmony_ci hoid_encoding_size(backoff->end); 438962306a36Sopenharmony_ci 439062306a36Sopenharmony_ci msg = ceph_msg_new(CEPH_MSG_OSD_BACKOFF, msg_size, GFP_NOIO, true); 439162306a36Sopenharmony_ci if (!msg) 439262306a36Sopenharmony_ci return NULL; 439362306a36Sopenharmony_ci 439462306a36Sopenharmony_ci p = msg->front.iov_base; 439562306a36Sopenharmony_ci end = p + msg->front_alloc_len; 439662306a36Sopenharmony_ci 439762306a36Sopenharmony_ci encode_spgid(&p, &backoff->spgid); 439862306a36Sopenharmony_ci ceph_encode_32(&p, map_epoch); 439962306a36Sopenharmony_ci ceph_encode_8(&p, CEPH_OSD_BACKOFF_OP_ACK_BLOCK); 440062306a36Sopenharmony_ci ceph_encode_64(&p, backoff->id); 440162306a36Sopenharmony_ci encode_hoid(&p, end, backoff->begin); 440262306a36Sopenharmony_ci encode_hoid(&p, end, backoff->end); 440362306a36Sopenharmony_ci BUG_ON(p != end); 440462306a36Sopenharmony_ci 440562306a36Sopenharmony_ci msg->front.iov_len = p - msg->front.iov_base; 440662306a36Sopenharmony_ci msg->hdr.version = cpu_to_le16(1); /* MOSDBackoff v1 */ 440762306a36Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 440862306a36Sopenharmony_ci 440962306a36Sopenharmony_ci return msg; 441062306a36Sopenharmony_ci} 441162306a36Sopenharmony_ci 441262306a36Sopenharmony_cistatic void handle_backoff_block(struct ceph_osd *osd, struct MOSDBackoff *m) 441362306a36Sopenharmony_ci{ 441462306a36Sopenharmony_ci struct ceph_spg_mapping *spg; 441562306a36Sopenharmony_ci struct ceph_osd_backoff *backoff; 441662306a36Sopenharmony_ci struct ceph_msg *msg; 441762306a36Sopenharmony_ci 441862306a36Sopenharmony_ci dout("%s osd%d spgid %llu.%xs%d id %llu\n", __func__, osd->o_osd, 441962306a36Sopenharmony_ci m->spgid.pgid.pool, m->spgid.pgid.seed, m->spgid.shard, m->id); 442062306a36Sopenharmony_ci 442162306a36Sopenharmony_ci spg = lookup_spg_mapping(&osd->o_backoff_mappings, &m->spgid); 442262306a36Sopenharmony_ci if (!spg) { 442362306a36Sopenharmony_ci spg = alloc_spg_mapping(); 442462306a36Sopenharmony_ci if (!spg) { 442562306a36Sopenharmony_ci pr_err("%s failed to allocate spg\n", __func__); 442662306a36Sopenharmony_ci return; 442762306a36Sopenharmony_ci } 442862306a36Sopenharmony_ci spg->spgid = m->spgid; /* struct */ 442962306a36Sopenharmony_ci insert_spg_mapping(&osd->o_backoff_mappings, spg); 443062306a36Sopenharmony_ci } 443162306a36Sopenharmony_ci 443262306a36Sopenharmony_ci backoff = alloc_backoff(); 443362306a36Sopenharmony_ci if (!backoff) { 443462306a36Sopenharmony_ci pr_err("%s failed to allocate backoff\n", __func__); 443562306a36Sopenharmony_ci return; 443662306a36Sopenharmony_ci } 443762306a36Sopenharmony_ci backoff->spgid = m->spgid; /* struct */ 443862306a36Sopenharmony_ci backoff->id = m->id; 443962306a36Sopenharmony_ci backoff->begin = m->begin; 444062306a36Sopenharmony_ci m->begin = NULL; /* backoff now owns this */ 444162306a36Sopenharmony_ci backoff->end = m->end; 444262306a36Sopenharmony_ci m->end = NULL; /* ditto */ 444362306a36Sopenharmony_ci 444462306a36Sopenharmony_ci insert_backoff(&spg->backoffs, backoff); 444562306a36Sopenharmony_ci insert_backoff_by_id(&osd->o_backoffs_by_id, backoff); 444662306a36Sopenharmony_ci 444762306a36Sopenharmony_ci /* 444862306a36Sopenharmony_ci * Ack with original backoff's epoch so that the OSD can 444962306a36Sopenharmony_ci * discard this if there was a PG split. 445062306a36Sopenharmony_ci */ 445162306a36Sopenharmony_ci msg = create_backoff_message(backoff, m->map_epoch); 445262306a36Sopenharmony_ci if (!msg) { 445362306a36Sopenharmony_ci pr_err("%s failed to allocate msg\n", __func__); 445462306a36Sopenharmony_ci return; 445562306a36Sopenharmony_ci } 445662306a36Sopenharmony_ci ceph_con_send(&osd->o_con, msg); 445762306a36Sopenharmony_ci} 445862306a36Sopenharmony_ci 445962306a36Sopenharmony_cistatic bool target_contained_by(const struct ceph_osd_request_target *t, 446062306a36Sopenharmony_ci const struct ceph_hobject_id *begin, 446162306a36Sopenharmony_ci const struct ceph_hobject_id *end) 446262306a36Sopenharmony_ci{ 446362306a36Sopenharmony_ci struct ceph_hobject_id hoid; 446462306a36Sopenharmony_ci int cmp; 446562306a36Sopenharmony_ci 446662306a36Sopenharmony_ci hoid_fill_from_target(&hoid, t); 446762306a36Sopenharmony_ci cmp = hoid_compare(&hoid, begin); 446862306a36Sopenharmony_ci return !cmp || (cmp > 0 && hoid_compare(&hoid, end) < 0); 446962306a36Sopenharmony_ci} 447062306a36Sopenharmony_ci 447162306a36Sopenharmony_cistatic void handle_backoff_unblock(struct ceph_osd *osd, 447262306a36Sopenharmony_ci const struct MOSDBackoff *m) 447362306a36Sopenharmony_ci{ 447462306a36Sopenharmony_ci struct ceph_spg_mapping *spg; 447562306a36Sopenharmony_ci struct ceph_osd_backoff *backoff; 447662306a36Sopenharmony_ci struct rb_node *n; 447762306a36Sopenharmony_ci 447862306a36Sopenharmony_ci dout("%s osd%d spgid %llu.%xs%d id %llu\n", __func__, osd->o_osd, 447962306a36Sopenharmony_ci m->spgid.pgid.pool, m->spgid.pgid.seed, m->spgid.shard, m->id); 448062306a36Sopenharmony_ci 448162306a36Sopenharmony_ci backoff = lookup_backoff_by_id(&osd->o_backoffs_by_id, m->id); 448262306a36Sopenharmony_ci if (!backoff) { 448362306a36Sopenharmony_ci pr_err("%s osd%d spgid %llu.%xs%d id %llu backoff dne\n", 448462306a36Sopenharmony_ci __func__, osd->o_osd, m->spgid.pgid.pool, 448562306a36Sopenharmony_ci m->spgid.pgid.seed, m->spgid.shard, m->id); 448662306a36Sopenharmony_ci return; 448762306a36Sopenharmony_ci } 448862306a36Sopenharmony_ci 448962306a36Sopenharmony_ci if (hoid_compare(backoff->begin, m->begin) && 449062306a36Sopenharmony_ci hoid_compare(backoff->end, m->end)) { 449162306a36Sopenharmony_ci pr_err("%s osd%d spgid %llu.%xs%d id %llu bad range?\n", 449262306a36Sopenharmony_ci __func__, osd->o_osd, m->spgid.pgid.pool, 449362306a36Sopenharmony_ci m->spgid.pgid.seed, m->spgid.shard, m->id); 449462306a36Sopenharmony_ci /* unblock it anyway... */ 449562306a36Sopenharmony_ci } 449662306a36Sopenharmony_ci 449762306a36Sopenharmony_ci spg = lookup_spg_mapping(&osd->o_backoff_mappings, &backoff->spgid); 449862306a36Sopenharmony_ci BUG_ON(!spg); 449962306a36Sopenharmony_ci 450062306a36Sopenharmony_ci erase_backoff(&spg->backoffs, backoff); 450162306a36Sopenharmony_ci erase_backoff_by_id(&osd->o_backoffs_by_id, backoff); 450262306a36Sopenharmony_ci free_backoff(backoff); 450362306a36Sopenharmony_ci 450462306a36Sopenharmony_ci if (RB_EMPTY_ROOT(&spg->backoffs)) { 450562306a36Sopenharmony_ci erase_spg_mapping(&osd->o_backoff_mappings, spg); 450662306a36Sopenharmony_ci free_spg_mapping(spg); 450762306a36Sopenharmony_ci } 450862306a36Sopenharmony_ci 450962306a36Sopenharmony_ci for (n = rb_first(&osd->o_requests); n; n = rb_next(n)) { 451062306a36Sopenharmony_ci struct ceph_osd_request *req = 451162306a36Sopenharmony_ci rb_entry(n, struct ceph_osd_request, r_node); 451262306a36Sopenharmony_ci 451362306a36Sopenharmony_ci if (!ceph_spg_compare(&req->r_t.spgid, &m->spgid)) { 451462306a36Sopenharmony_ci /* 451562306a36Sopenharmony_ci * Match against @m, not @backoff -- the PG may 451662306a36Sopenharmony_ci * have split on the OSD. 451762306a36Sopenharmony_ci */ 451862306a36Sopenharmony_ci if (target_contained_by(&req->r_t, m->begin, m->end)) { 451962306a36Sopenharmony_ci /* 452062306a36Sopenharmony_ci * If no other installed backoff applies, 452162306a36Sopenharmony_ci * resend. 452262306a36Sopenharmony_ci */ 452362306a36Sopenharmony_ci send_request(req); 452462306a36Sopenharmony_ci } 452562306a36Sopenharmony_ci } 452662306a36Sopenharmony_ci } 452762306a36Sopenharmony_ci} 452862306a36Sopenharmony_ci 452962306a36Sopenharmony_cistatic void handle_backoff(struct ceph_osd *osd, struct ceph_msg *msg) 453062306a36Sopenharmony_ci{ 453162306a36Sopenharmony_ci struct ceph_osd_client *osdc = osd->o_osdc; 453262306a36Sopenharmony_ci struct MOSDBackoff m; 453362306a36Sopenharmony_ci int ret; 453462306a36Sopenharmony_ci 453562306a36Sopenharmony_ci down_read(&osdc->lock); 453662306a36Sopenharmony_ci if (!osd_registered(osd)) { 453762306a36Sopenharmony_ci dout("%s osd%d unknown\n", __func__, osd->o_osd); 453862306a36Sopenharmony_ci up_read(&osdc->lock); 453962306a36Sopenharmony_ci return; 454062306a36Sopenharmony_ci } 454162306a36Sopenharmony_ci WARN_ON(osd->o_osd != le64_to_cpu(msg->hdr.src.num)); 454262306a36Sopenharmony_ci 454362306a36Sopenharmony_ci mutex_lock(&osd->lock); 454462306a36Sopenharmony_ci ret = decode_MOSDBackoff(msg, &m); 454562306a36Sopenharmony_ci if (ret) { 454662306a36Sopenharmony_ci pr_err("failed to decode MOSDBackoff: %d\n", ret); 454762306a36Sopenharmony_ci ceph_msg_dump(msg); 454862306a36Sopenharmony_ci goto out_unlock; 454962306a36Sopenharmony_ci } 455062306a36Sopenharmony_ci 455162306a36Sopenharmony_ci switch (m.op) { 455262306a36Sopenharmony_ci case CEPH_OSD_BACKOFF_OP_BLOCK: 455362306a36Sopenharmony_ci handle_backoff_block(osd, &m); 455462306a36Sopenharmony_ci break; 455562306a36Sopenharmony_ci case CEPH_OSD_BACKOFF_OP_UNBLOCK: 455662306a36Sopenharmony_ci handle_backoff_unblock(osd, &m); 455762306a36Sopenharmony_ci break; 455862306a36Sopenharmony_ci default: 455962306a36Sopenharmony_ci pr_err("%s osd%d unknown op %d\n", __func__, osd->o_osd, m.op); 456062306a36Sopenharmony_ci } 456162306a36Sopenharmony_ci 456262306a36Sopenharmony_ci free_hoid(m.begin); 456362306a36Sopenharmony_ci free_hoid(m.end); 456462306a36Sopenharmony_ci 456562306a36Sopenharmony_ciout_unlock: 456662306a36Sopenharmony_ci mutex_unlock(&osd->lock); 456762306a36Sopenharmony_ci up_read(&osdc->lock); 456862306a36Sopenharmony_ci} 456962306a36Sopenharmony_ci 457062306a36Sopenharmony_ci/* 457162306a36Sopenharmony_ci * Process osd watch notifications 457262306a36Sopenharmony_ci */ 457362306a36Sopenharmony_cistatic void handle_watch_notify(struct ceph_osd_client *osdc, 457462306a36Sopenharmony_ci struct ceph_msg *msg) 457562306a36Sopenharmony_ci{ 457662306a36Sopenharmony_ci void *p = msg->front.iov_base; 457762306a36Sopenharmony_ci void *const end = p + msg->front.iov_len; 457862306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq; 457962306a36Sopenharmony_ci struct linger_work *lwork; 458062306a36Sopenharmony_ci u8 proto_ver, opcode; 458162306a36Sopenharmony_ci u64 cookie, notify_id; 458262306a36Sopenharmony_ci u64 notifier_id = 0; 458362306a36Sopenharmony_ci s32 return_code = 0; 458462306a36Sopenharmony_ci void *payload = NULL; 458562306a36Sopenharmony_ci u32 payload_len = 0; 458662306a36Sopenharmony_ci 458762306a36Sopenharmony_ci ceph_decode_8_safe(&p, end, proto_ver, bad); 458862306a36Sopenharmony_ci ceph_decode_8_safe(&p, end, opcode, bad); 458962306a36Sopenharmony_ci ceph_decode_64_safe(&p, end, cookie, bad); 459062306a36Sopenharmony_ci p += 8; /* skip ver */ 459162306a36Sopenharmony_ci ceph_decode_64_safe(&p, end, notify_id, bad); 459262306a36Sopenharmony_ci 459362306a36Sopenharmony_ci if (proto_ver >= 1) { 459462306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, payload_len, bad); 459562306a36Sopenharmony_ci ceph_decode_need(&p, end, payload_len, bad); 459662306a36Sopenharmony_ci payload = p; 459762306a36Sopenharmony_ci p += payload_len; 459862306a36Sopenharmony_ci } 459962306a36Sopenharmony_ci 460062306a36Sopenharmony_ci if (le16_to_cpu(msg->hdr.version) >= 2) 460162306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, return_code, bad); 460262306a36Sopenharmony_ci 460362306a36Sopenharmony_ci if (le16_to_cpu(msg->hdr.version) >= 3) 460462306a36Sopenharmony_ci ceph_decode_64_safe(&p, end, notifier_id, bad); 460562306a36Sopenharmony_ci 460662306a36Sopenharmony_ci down_read(&osdc->lock); 460762306a36Sopenharmony_ci lreq = lookup_linger_osdc(&osdc->linger_requests, cookie); 460862306a36Sopenharmony_ci if (!lreq) { 460962306a36Sopenharmony_ci dout("%s opcode %d cookie %llu dne\n", __func__, opcode, 461062306a36Sopenharmony_ci cookie); 461162306a36Sopenharmony_ci goto out_unlock_osdc; 461262306a36Sopenharmony_ci } 461362306a36Sopenharmony_ci 461462306a36Sopenharmony_ci mutex_lock(&lreq->lock); 461562306a36Sopenharmony_ci dout("%s opcode %d cookie %llu lreq %p is_watch %d\n", __func__, 461662306a36Sopenharmony_ci opcode, cookie, lreq, lreq->is_watch); 461762306a36Sopenharmony_ci if (opcode == CEPH_WATCH_EVENT_DISCONNECT) { 461862306a36Sopenharmony_ci if (!lreq->last_error) { 461962306a36Sopenharmony_ci lreq->last_error = -ENOTCONN; 462062306a36Sopenharmony_ci queue_watch_error(lreq); 462162306a36Sopenharmony_ci } 462262306a36Sopenharmony_ci } else if (!lreq->is_watch) { 462362306a36Sopenharmony_ci /* CEPH_WATCH_EVENT_NOTIFY_COMPLETE */ 462462306a36Sopenharmony_ci if (lreq->notify_id && lreq->notify_id != notify_id) { 462562306a36Sopenharmony_ci dout("lreq %p notify_id %llu != %llu, ignoring\n", lreq, 462662306a36Sopenharmony_ci lreq->notify_id, notify_id); 462762306a36Sopenharmony_ci } else if (!completion_done(&lreq->notify_finish_wait)) { 462862306a36Sopenharmony_ci struct ceph_msg_data *data = 462962306a36Sopenharmony_ci msg->num_data_items ? &msg->data[0] : NULL; 463062306a36Sopenharmony_ci 463162306a36Sopenharmony_ci if (data) { 463262306a36Sopenharmony_ci if (lreq->preply_pages) { 463362306a36Sopenharmony_ci WARN_ON(data->type != 463462306a36Sopenharmony_ci CEPH_MSG_DATA_PAGES); 463562306a36Sopenharmony_ci *lreq->preply_pages = data->pages; 463662306a36Sopenharmony_ci *lreq->preply_len = data->length; 463762306a36Sopenharmony_ci data->own_pages = false; 463862306a36Sopenharmony_ci } 463962306a36Sopenharmony_ci } 464062306a36Sopenharmony_ci lreq->notify_finish_error = return_code; 464162306a36Sopenharmony_ci complete_all(&lreq->notify_finish_wait); 464262306a36Sopenharmony_ci } 464362306a36Sopenharmony_ci } else { 464462306a36Sopenharmony_ci /* CEPH_WATCH_EVENT_NOTIFY */ 464562306a36Sopenharmony_ci lwork = lwork_alloc(lreq, do_watch_notify); 464662306a36Sopenharmony_ci if (!lwork) { 464762306a36Sopenharmony_ci pr_err("failed to allocate notify-lwork\n"); 464862306a36Sopenharmony_ci goto out_unlock_lreq; 464962306a36Sopenharmony_ci } 465062306a36Sopenharmony_ci 465162306a36Sopenharmony_ci lwork->notify.notify_id = notify_id; 465262306a36Sopenharmony_ci lwork->notify.notifier_id = notifier_id; 465362306a36Sopenharmony_ci lwork->notify.payload = payload; 465462306a36Sopenharmony_ci lwork->notify.payload_len = payload_len; 465562306a36Sopenharmony_ci lwork->notify.msg = ceph_msg_get(msg); 465662306a36Sopenharmony_ci lwork_queue(lwork); 465762306a36Sopenharmony_ci } 465862306a36Sopenharmony_ci 465962306a36Sopenharmony_ciout_unlock_lreq: 466062306a36Sopenharmony_ci mutex_unlock(&lreq->lock); 466162306a36Sopenharmony_ciout_unlock_osdc: 466262306a36Sopenharmony_ci up_read(&osdc->lock); 466362306a36Sopenharmony_ci return; 466462306a36Sopenharmony_ci 466562306a36Sopenharmony_cibad: 466662306a36Sopenharmony_ci pr_err("osdc handle_watch_notify corrupt msg\n"); 466762306a36Sopenharmony_ci} 466862306a36Sopenharmony_ci 466962306a36Sopenharmony_ci/* 467062306a36Sopenharmony_ci * Register request, send initial attempt. 467162306a36Sopenharmony_ci */ 467262306a36Sopenharmony_civoid ceph_osdc_start_request(struct ceph_osd_client *osdc, 467362306a36Sopenharmony_ci struct ceph_osd_request *req) 467462306a36Sopenharmony_ci{ 467562306a36Sopenharmony_ci down_read(&osdc->lock); 467662306a36Sopenharmony_ci submit_request(req, false); 467762306a36Sopenharmony_ci up_read(&osdc->lock); 467862306a36Sopenharmony_ci} 467962306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_start_request); 468062306a36Sopenharmony_ci 468162306a36Sopenharmony_ci/* 468262306a36Sopenharmony_ci * Unregister request. If @req was registered, it isn't completed: 468362306a36Sopenharmony_ci * r_result isn't set and __complete_request() isn't invoked. 468462306a36Sopenharmony_ci * 468562306a36Sopenharmony_ci * If @req wasn't registered, this call may have raced with 468662306a36Sopenharmony_ci * handle_reply(), in which case r_result would already be set and 468762306a36Sopenharmony_ci * __complete_request() would be getting invoked, possibly even 468862306a36Sopenharmony_ci * concurrently with this call. 468962306a36Sopenharmony_ci */ 469062306a36Sopenharmony_civoid ceph_osdc_cancel_request(struct ceph_osd_request *req) 469162306a36Sopenharmony_ci{ 469262306a36Sopenharmony_ci struct ceph_osd_client *osdc = req->r_osdc; 469362306a36Sopenharmony_ci 469462306a36Sopenharmony_ci down_write(&osdc->lock); 469562306a36Sopenharmony_ci if (req->r_osd) 469662306a36Sopenharmony_ci cancel_request(req); 469762306a36Sopenharmony_ci up_write(&osdc->lock); 469862306a36Sopenharmony_ci} 469962306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_cancel_request); 470062306a36Sopenharmony_ci 470162306a36Sopenharmony_ci/* 470262306a36Sopenharmony_ci * @timeout: in jiffies, 0 means "wait forever" 470362306a36Sopenharmony_ci */ 470462306a36Sopenharmony_cistatic int wait_request_timeout(struct ceph_osd_request *req, 470562306a36Sopenharmony_ci unsigned long timeout) 470662306a36Sopenharmony_ci{ 470762306a36Sopenharmony_ci long left; 470862306a36Sopenharmony_ci 470962306a36Sopenharmony_ci dout("%s req %p tid %llu\n", __func__, req, req->r_tid); 471062306a36Sopenharmony_ci left = wait_for_completion_killable_timeout(&req->r_completion, 471162306a36Sopenharmony_ci ceph_timeout_jiffies(timeout)); 471262306a36Sopenharmony_ci if (left <= 0) { 471362306a36Sopenharmony_ci left = left ?: -ETIMEDOUT; 471462306a36Sopenharmony_ci ceph_osdc_cancel_request(req); 471562306a36Sopenharmony_ci } else { 471662306a36Sopenharmony_ci left = req->r_result; /* completed */ 471762306a36Sopenharmony_ci } 471862306a36Sopenharmony_ci 471962306a36Sopenharmony_ci return left; 472062306a36Sopenharmony_ci} 472162306a36Sopenharmony_ci 472262306a36Sopenharmony_ci/* 472362306a36Sopenharmony_ci * wait for a request to complete 472462306a36Sopenharmony_ci */ 472562306a36Sopenharmony_ciint ceph_osdc_wait_request(struct ceph_osd_client *osdc, 472662306a36Sopenharmony_ci struct ceph_osd_request *req) 472762306a36Sopenharmony_ci{ 472862306a36Sopenharmony_ci return wait_request_timeout(req, 0); 472962306a36Sopenharmony_ci} 473062306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_wait_request); 473162306a36Sopenharmony_ci 473262306a36Sopenharmony_ci/* 473362306a36Sopenharmony_ci * sync - wait for all in-flight requests to flush. avoid starvation. 473462306a36Sopenharmony_ci */ 473562306a36Sopenharmony_civoid ceph_osdc_sync(struct ceph_osd_client *osdc) 473662306a36Sopenharmony_ci{ 473762306a36Sopenharmony_ci struct rb_node *n, *p; 473862306a36Sopenharmony_ci u64 last_tid = atomic64_read(&osdc->last_tid); 473962306a36Sopenharmony_ci 474062306a36Sopenharmony_ciagain: 474162306a36Sopenharmony_ci down_read(&osdc->lock); 474262306a36Sopenharmony_ci for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { 474362306a36Sopenharmony_ci struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); 474462306a36Sopenharmony_ci 474562306a36Sopenharmony_ci mutex_lock(&osd->lock); 474662306a36Sopenharmony_ci for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) { 474762306a36Sopenharmony_ci struct ceph_osd_request *req = 474862306a36Sopenharmony_ci rb_entry(p, struct ceph_osd_request, r_node); 474962306a36Sopenharmony_ci 475062306a36Sopenharmony_ci if (req->r_tid > last_tid) 475162306a36Sopenharmony_ci break; 475262306a36Sopenharmony_ci 475362306a36Sopenharmony_ci if (!(req->r_flags & CEPH_OSD_FLAG_WRITE)) 475462306a36Sopenharmony_ci continue; 475562306a36Sopenharmony_ci 475662306a36Sopenharmony_ci ceph_osdc_get_request(req); 475762306a36Sopenharmony_ci mutex_unlock(&osd->lock); 475862306a36Sopenharmony_ci up_read(&osdc->lock); 475962306a36Sopenharmony_ci dout("%s waiting on req %p tid %llu last_tid %llu\n", 476062306a36Sopenharmony_ci __func__, req, req->r_tid, last_tid); 476162306a36Sopenharmony_ci wait_for_completion(&req->r_completion); 476262306a36Sopenharmony_ci ceph_osdc_put_request(req); 476362306a36Sopenharmony_ci goto again; 476462306a36Sopenharmony_ci } 476562306a36Sopenharmony_ci 476662306a36Sopenharmony_ci mutex_unlock(&osd->lock); 476762306a36Sopenharmony_ci } 476862306a36Sopenharmony_ci 476962306a36Sopenharmony_ci up_read(&osdc->lock); 477062306a36Sopenharmony_ci dout("%s done last_tid %llu\n", __func__, last_tid); 477162306a36Sopenharmony_ci} 477262306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_sync); 477362306a36Sopenharmony_ci 477462306a36Sopenharmony_ci/* 477562306a36Sopenharmony_ci * Returns a handle, caller owns a ref. 477662306a36Sopenharmony_ci */ 477762306a36Sopenharmony_cistruct ceph_osd_linger_request * 477862306a36Sopenharmony_ciceph_osdc_watch(struct ceph_osd_client *osdc, 477962306a36Sopenharmony_ci struct ceph_object_id *oid, 478062306a36Sopenharmony_ci struct ceph_object_locator *oloc, 478162306a36Sopenharmony_ci rados_watchcb2_t wcb, 478262306a36Sopenharmony_ci rados_watcherrcb_t errcb, 478362306a36Sopenharmony_ci void *data) 478462306a36Sopenharmony_ci{ 478562306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq; 478662306a36Sopenharmony_ci int ret; 478762306a36Sopenharmony_ci 478862306a36Sopenharmony_ci lreq = linger_alloc(osdc); 478962306a36Sopenharmony_ci if (!lreq) 479062306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 479162306a36Sopenharmony_ci 479262306a36Sopenharmony_ci lreq->is_watch = true; 479362306a36Sopenharmony_ci lreq->wcb = wcb; 479462306a36Sopenharmony_ci lreq->errcb = errcb; 479562306a36Sopenharmony_ci lreq->data = data; 479662306a36Sopenharmony_ci lreq->watch_valid_thru = jiffies; 479762306a36Sopenharmony_ci 479862306a36Sopenharmony_ci ceph_oid_copy(&lreq->t.base_oid, oid); 479962306a36Sopenharmony_ci ceph_oloc_copy(&lreq->t.base_oloc, oloc); 480062306a36Sopenharmony_ci lreq->t.flags = CEPH_OSD_FLAG_WRITE; 480162306a36Sopenharmony_ci ktime_get_real_ts64(&lreq->mtime); 480262306a36Sopenharmony_ci 480362306a36Sopenharmony_ci linger_submit(lreq); 480462306a36Sopenharmony_ci ret = linger_reg_commit_wait(lreq); 480562306a36Sopenharmony_ci if (ret) { 480662306a36Sopenharmony_ci linger_cancel(lreq); 480762306a36Sopenharmony_ci goto err_put_lreq; 480862306a36Sopenharmony_ci } 480962306a36Sopenharmony_ci 481062306a36Sopenharmony_ci return lreq; 481162306a36Sopenharmony_ci 481262306a36Sopenharmony_cierr_put_lreq: 481362306a36Sopenharmony_ci linger_put(lreq); 481462306a36Sopenharmony_ci return ERR_PTR(ret); 481562306a36Sopenharmony_ci} 481662306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_watch); 481762306a36Sopenharmony_ci 481862306a36Sopenharmony_ci/* 481962306a36Sopenharmony_ci * Releases a ref. 482062306a36Sopenharmony_ci * 482162306a36Sopenharmony_ci * Times out after mount_timeout to preserve rbd unmap behaviour 482262306a36Sopenharmony_ci * introduced in 2894e1d76974 ("rbd: timeout watch teardown on unmap 482362306a36Sopenharmony_ci * with mount_timeout"). 482462306a36Sopenharmony_ci */ 482562306a36Sopenharmony_ciint ceph_osdc_unwatch(struct ceph_osd_client *osdc, 482662306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq) 482762306a36Sopenharmony_ci{ 482862306a36Sopenharmony_ci struct ceph_options *opts = osdc->client->options; 482962306a36Sopenharmony_ci struct ceph_osd_request *req; 483062306a36Sopenharmony_ci int ret; 483162306a36Sopenharmony_ci 483262306a36Sopenharmony_ci req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO); 483362306a36Sopenharmony_ci if (!req) 483462306a36Sopenharmony_ci return -ENOMEM; 483562306a36Sopenharmony_ci 483662306a36Sopenharmony_ci ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); 483762306a36Sopenharmony_ci ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); 483862306a36Sopenharmony_ci req->r_flags = CEPH_OSD_FLAG_WRITE; 483962306a36Sopenharmony_ci ktime_get_real_ts64(&req->r_mtime); 484062306a36Sopenharmony_ci osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH, 484162306a36Sopenharmony_ci lreq->linger_id, 0); 484262306a36Sopenharmony_ci 484362306a36Sopenharmony_ci ret = ceph_osdc_alloc_messages(req, GFP_NOIO); 484462306a36Sopenharmony_ci if (ret) 484562306a36Sopenharmony_ci goto out_put_req; 484662306a36Sopenharmony_ci 484762306a36Sopenharmony_ci ceph_osdc_start_request(osdc, req); 484862306a36Sopenharmony_ci linger_cancel(lreq); 484962306a36Sopenharmony_ci linger_put(lreq); 485062306a36Sopenharmony_ci ret = wait_request_timeout(req, opts->mount_timeout); 485162306a36Sopenharmony_ci 485262306a36Sopenharmony_ciout_put_req: 485362306a36Sopenharmony_ci ceph_osdc_put_request(req); 485462306a36Sopenharmony_ci return ret; 485562306a36Sopenharmony_ci} 485662306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_unwatch); 485762306a36Sopenharmony_ci 485862306a36Sopenharmony_cistatic int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which, 485962306a36Sopenharmony_ci u64 notify_id, u64 cookie, void *payload, 486062306a36Sopenharmony_ci u32 payload_len) 486162306a36Sopenharmony_ci{ 486262306a36Sopenharmony_ci struct ceph_osd_req_op *op; 486362306a36Sopenharmony_ci struct ceph_pagelist *pl; 486462306a36Sopenharmony_ci int ret; 486562306a36Sopenharmony_ci 486662306a36Sopenharmony_ci op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0); 486762306a36Sopenharmony_ci 486862306a36Sopenharmony_ci pl = ceph_pagelist_alloc(GFP_NOIO); 486962306a36Sopenharmony_ci if (!pl) 487062306a36Sopenharmony_ci return -ENOMEM; 487162306a36Sopenharmony_ci 487262306a36Sopenharmony_ci ret = ceph_pagelist_encode_64(pl, notify_id); 487362306a36Sopenharmony_ci ret |= ceph_pagelist_encode_64(pl, cookie); 487462306a36Sopenharmony_ci if (payload) { 487562306a36Sopenharmony_ci ret |= ceph_pagelist_encode_32(pl, payload_len); 487662306a36Sopenharmony_ci ret |= ceph_pagelist_append(pl, payload, payload_len); 487762306a36Sopenharmony_ci } else { 487862306a36Sopenharmony_ci ret |= ceph_pagelist_encode_32(pl, 0); 487962306a36Sopenharmony_ci } 488062306a36Sopenharmony_ci if (ret) { 488162306a36Sopenharmony_ci ceph_pagelist_release(pl); 488262306a36Sopenharmony_ci return -ENOMEM; 488362306a36Sopenharmony_ci } 488462306a36Sopenharmony_ci 488562306a36Sopenharmony_ci ceph_osd_data_pagelist_init(&op->notify_ack.request_data, pl); 488662306a36Sopenharmony_ci op->indata_len = pl->length; 488762306a36Sopenharmony_ci return 0; 488862306a36Sopenharmony_ci} 488962306a36Sopenharmony_ci 489062306a36Sopenharmony_ciint ceph_osdc_notify_ack(struct ceph_osd_client *osdc, 489162306a36Sopenharmony_ci struct ceph_object_id *oid, 489262306a36Sopenharmony_ci struct ceph_object_locator *oloc, 489362306a36Sopenharmony_ci u64 notify_id, 489462306a36Sopenharmony_ci u64 cookie, 489562306a36Sopenharmony_ci void *payload, 489662306a36Sopenharmony_ci u32 payload_len) 489762306a36Sopenharmony_ci{ 489862306a36Sopenharmony_ci struct ceph_osd_request *req; 489962306a36Sopenharmony_ci int ret; 490062306a36Sopenharmony_ci 490162306a36Sopenharmony_ci req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO); 490262306a36Sopenharmony_ci if (!req) 490362306a36Sopenharmony_ci return -ENOMEM; 490462306a36Sopenharmony_ci 490562306a36Sopenharmony_ci ceph_oid_copy(&req->r_base_oid, oid); 490662306a36Sopenharmony_ci ceph_oloc_copy(&req->r_base_oloc, oloc); 490762306a36Sopenharmony_ci req->r_flags = CEPH_OSD_FLAG_READ; 490862306a36Sopenharmony_ci 490962306a36Sopenharmony_ci ret = osd_req_op_notify_ack_init(req, 0, notify_id, cookie, payload, 491062306a36Sopenharmony_ci payload_len); 491162306a36Sopenharmony_ci if (ret) 491262306a36Sopenharmony_ci goto out_put_req; 491362306a36Sopenharmony_ci 491462306a36Sopenharmony_ci ret = ceph_osdc_alloc_messages(req, GFP_NOIO); 491562306a36Sopenharmony_ci if (ret) 491662306a36Sopenharmony_ci goto out_put_req; 491762306a36Sopenharmony_ci 491862306a36Sopenharmony_ci ceph_osdc_start_request(osdc, req); 491962306a36Sopenharmony_ci ret = ceph_osdc_wait_request(osdc, req); 492062306a36Sopenharmony_ci 492162306a36Sopenharmony_ciout_put_req: 492262306a36Sopenharmony_ci ceph_osdc_put_request(req); 492362306a36Sopenharmony_ci return ret; 492462306a36Sopenharmony_ci} 492562306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_notify_ack); 492662306a36Sopenharmony_ci 492762306a36Sopenharmony_ci/* 492862306a36Sopenharmony_ci * @timeout: in seconds 492962306a36Sopenharmony_ci * 493062306a36Sopenharmony_ci * @preply_{pages,len} are initialized both on success and error. 493162306a36Sopenharmony_ci * The caller is responsible for: 493262306a36Sopenharmony_ci * 493362306a36Sopenharmony_ci * ceph_release_page_vector(reply_pages, calc_pages_for(0, reply_len)) 493462306a36Sopenharmony_ci */ 493562306a36Sopenharmony_ciint ceph_osdc_notify(struct ceph_osd_client *osdc, 493662306a36Sopenharmony_ci struct ceph_object_id *oid, 493762306a36Sopenharmony_ci struct ceph_object_locator *oloc, 493862306a36Sopenharmony_ci void *payload, 493962306a36Sopenharmony_ci u32 payload_len, 494062306a36Sopenharmony_ci u32 timeout, 494162306a36Sopenharmony_ci struct page ***preply_pages, 494262306a36Sopenharmony_ci size_t *preply_len) 494362306a36Sopenharmony_ci{ 494462306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq; 494562306a36Sopenharmony_ci int ret; 494662306a36Sopenharmony_ci 494762306a36Sopenharmony_ci WARN_ON(!timeout); 494862306a36Sopenharmony_ci if (preply_pages) { 494962306a36Sopenharmony_ci *preply_pages = NULL; 495062306a36Sopenharmony_ci *preply_len = 0; 495162306a36Sopenharmony_ci } 495262306a36Sopenharmony_ci 495362306a36Sopenharmony_ci lreq = linger_alloc(osdc); 495462306a36Sopenharmony_ci if (!lreq) 495562306a36Sopenharmony_ci return -ENOMEM; 495662306a36Sopenharmony_ci 495762306a36Sopenharmony_ci lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO); 495862306a36Sopenharmony_ci if (!lreq->request_pl) { 495962306a36Sopenharmony_ci ret = -ENOMEM; 496062306a36Sopenharmony_ci goto out_put_lreq; 496162306a36Sopenharmony_ci } 496262306a36Sopenharmony_ci 496362306a36Sopenharmony_ci ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */ 496462306a36Sopenharmony_ci ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout); 496562306a36Sopenharmony_ci ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len); 496662306a36Sopenharmony_ci ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len); 496762306a36Sopenharmony_ci if (ret) { 496862306a36Sopenharmony_ci ret = -ENOMEM; 496962306a36Sopenharmony_ci goto out_put_lreq; 497062306a36Sopenharmony_ci } 497162306a36Sopenharmony_ci 497262306a36Sopenharmony_ci /* for notify_id */ 497362306a36Sopenharmony_ci lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO); 497462306a36Sopenharmony_ci if (IS_ERR(lreq->notify_id_pages)) { 497562306a36Sopenharmony_ci ret = PTR_ERR(lreq->notify_id_pages); 497662306a36Sopenharmony_ci lreq->notify_id_pages = NULL; 497762306a36Sopenharmony_ci goto out_put_lreq; 497862306a36Sopenharmony_ci } 497962306a36Sopenharmony_ci 498062306a36Sopenharmony_ci lreq->preply_pages = preply_pages; 498162306a36Sopenharmony_ci lreq->preply_len = preply_len; 498262306a36Sopenharmony_ci 498362306a36Sopenharmony_ci ceph_oid_copy(&lreq->t.base_oid, oid); 498462306a36Sopenharmony_ci ceph_oloc_copy(&lreq->t.base_oloc, oloc); 498562306a36Sopenharmony_ci lreq->t.flags = CEPH_OSD_FLAG_READ; 498662306a36Sopenharmony_ci 498762306a36Sopenharmony_ci linger_submit(lreq); 498862306a36Sopenharmony_ci ret = linger_reg_commit_wait(lreq); 498962306a36Sopenharmony_ci if (!ret) 499062306a36Sopenharmony_ci ret = linger_notify_finish_wait(lreq, 499162306a36Sopenharmony_ci msecs_to_jiffies(2 * timeout * MSEC_PER_SEC)); 499262306a36Sopenharmony_ci else 499362306a36Sopenharmony_ci dout("lreq %p failed to initiate notify %d\n", lreq, ret); 499462306a36Sopenharmony_ci 499562306a36Sopenharmony_ci linger_cancel(lreq); 499662306a36Sopenharmony_ciout_put_lreq: 499762306a36Sopenharmony_ci linger_put(lreq); 499862306a36Sopenharmony_ci return ret; 499962306a36Sopenharmony_ci} 500062306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_notify); 500162306a36Sopenharmony_ci 500262306a36Sopenharmony_ci/* 500362306a36Sopenharmony_ci * Return the number of milliseconds since the watch was last 500462306a36Sopenharmony_ci * confirmed, or an error. If there is an error, the watch is no 500562306a36Sopenharmony_ci * longer valid, and should be destroyed with ceph_osdc_unwatch(). 500662306a36Sopenharmony_ci */ 500762306a36Sopenharmony_ciint ceph_osdc_watch_check(struct ceph_osd_client *osdc, 500862306a36Sopenharmony_ci struct ceph_osd_linger_request *lreq) 500962306a36Sopenharmony_ci{ 501062306a36Sopenharmony_ci unsigned long stamp, age; 501162306a36Sopenharmony_ci int ret; 501262306a36Sopenharmony_ci 501362306a36Sopenharmony_ci down_read(&osdc->lock); 501462306a36Sopenharmony_ci mutex_lock(&lreq->lock); 501562306a36Sopenharmony_ci stamp = lreq->watch_valid_thru; 501662306a36Sopenharmony_ci if (!list_empty(&lreq->pending_lworks)) { 501762306a36Sopenharmony_ci struct linger_work *lwork = 501862306a36Sopenharmony_ci list_first_entry(&lreq->pending_lworks, 501962306a36Sopenharmony_ci struct linger_work, 502062306a36Sopenharmony_ci pending_item); 502162306a36Sopenharmony_ci 502262306a36Sopenharmony_ci if (time_before(lwork->queued_stamp, stamp)) 502362306a36Sopenharmony_ci stamp = lwork->queued_stamp; 502462306a36Sopenharmony_ci } 502562306a36Sopenharmony_ci age = jiffies - stamp; 502662306a36Sopenharmony_ci dout("%s lreq %p linger_id %llu age %lu last_error %d\n", __func__, 502762306a36Sopenharmony_ci lreq, lreq->linger_id, age, lreq->last_error); 502862306a36Sopenharmony_ci /* we are truncating to msecs, so return a safe upper bound */ 502962306a36Sopenharmony_ci ret = lreq->last_error ?: 1 + jiffies_to_msecs(age); 503062306a36Sopenharmony_ci 503162306a36Sopenharmony_ci mutex_unlock(&lreq->lock); 503262306a36Sopenharmony_ci up_read(&osdc->lock); 503362306a36Sopenharmony_ci return ret; 503462306a36Sopenharmony_ci} 503562306a36Sopenharmony_ci 503662306a36Sopenharmony_cistatic int decode_watcher(void **p, void *end, struct ceph_watch_item *item) 503762306a36Sopenharmony_ci{ 503862306a36Sopenharmony_ci u8 struct_v; 503962306a36Sopenharmony_ci u32 struct_len; 504062306a36Sopenharmony_ci int ret; 504162306a36Sopenharmony_ci 504262306a36Sopenharmony_ci ret = ceph_start_decoding(p, end, 2, "watch_item_t", 504362306a36Sopenharmony_ci &struct_v, &struct_len); 504462306a36Sopenharmony_ci if (ret) 504562306a36Sopenharmony_ci goto bad; 504662306a36Sopenharmony_ci 504762306a36Sopenharmony_ci ret = -EINVAL; 504862306a36Sopenharmony_ci ceph_decode_copy_safe(p, end, &item->name, sizeof(item->name), bad); 504962306a36Sopenharmony_ci ceph_decode_64_safe(p, end, item->cookie, bad); 505062306a36Sopenharmony_ci ceph_decode_skip_32(p, end, bad); /* skip timeout seconds */ 505162306a36Sopenharmony_ci 505262306a36Sopenharmony_ci if (struct_v >= 2) { 505362306a36Sopenharmony_ci ret = ceph_decode_entity_addr(p, end, &item->addr); 505462306a36Sopenharmony_ci if (ret) 505562306a36Sopenharmony_ci goto bad; 505662306a36Sopenharmony_ci } else { 505762306a36Sopenharmony_ci ret = 0; 505862306a36Sopenharmony_ci } 505962306a36Sopenharmony_ci 506062306a36Sopenharmony_ci dout("%s %s%llu cookie %llu addr %s\n", __func__, 506162306a36Sopenharmony_ci ENTITY_NAME(item->name), item->cookie, 506262306a36Sopenharmony_ci ceph_pr_addr(&item->addr)); 506362306a36Sopenharmony_cibad: 506462306a36Sopenharmony_ci return ret; 506562306a36Sopenharmony_ci} 506662306a36Sopenharmony_ci 506762306a36Sopenharmony_cistatic int decode_watchers(void **p, void *end, 506862306a36Sopenharmony_ci struct ceph_watch_item **watchers, 506962306a36Sopenharmony_ci u32 *num_watchers) 507062306a36Sopenharmony_ci{ 507162306a36Sopenharmony_ci u8 struct_v; 507262306a36Sopenharmony_ci u32 struct_len; 507362306a36Sopenharmony_ci int i; 507462306a36Sopenharmony_ci int ret; 507562306a36Sopenharmony_ci 507662306a36Sopenharmony_ci ret = ceph_start_decoding(p, end, 1, "obj_list_watch_response_t", 507762306a36Sopenharmony_ci &struct_v, &struct_len); 507862306a36Sopenharmony_ci if (ret) 507962306a36Sopenharmony_ci return ret; 508062306a36Sopenharmony_ci 508162306a36Sopenharmony_ci *num_watchers = ceph_decode_32(p); 508262306a36Sopenharmony_ci *watchers = kcalloc(*num_watchers, sizeof(**watchers), GFP_NOIO); 508362306a36Sopenharmony_ci if (!*watchers) 508462306a36Sopenharmony_ci return -ENOMEM; 508562306a36Sopenharmony_ci 508662306a36Sopenharmony_ci for (i = 0; i < *num_watchers; i++) { 508762306a36Sopenharmony_ci ret = decode_watcher(p, end, *watchers + i); 508862306a36Sopenharmony_ci if (ret) { 508962306a36Sopenharmony_ci kfree(*watchers); 509062306a36Sopenharmony_ci return ret; 509162306a36Sopenharmony_ci } 509262306a36Sopenharmony_ci } 509362306a36Sopenharmony_ci 509462306a36Sopenharmony_ci return 0; 509562306a36Sopenharmony_ci} 509662306a36Sopenharmony_ci 509762306a36Sopenharmony_ci/* 509862306a36Sopenharmony_ci * On success, the caller is responsible for: 509962306a36Sopenharmony_ci * 510062306a36Sopenharmony_ci * kfree(watchers); 510162306a36Sopenharmony_ci */ 510262306a36Sopenharmony_ciint ceph_osdc_list_watchers(struct ceph_osd_client *osdc, 510362306a36Sopenharmony_ci struct ceph_object_id *oid, 510462306a36Sopenharmony_ci struct ceph_object_locator *oloc, 510562306a36Sopenharmony_ci struct ceph_watch_item **watchers, 510662306a36Sopenharmony_ci u32 *num_watchers) 510762306a36Sopenharmony_ci{ 510862306a36Sopenharmony_ci struct ceph_osd_request *req; 510962306a36Sopenharmony_ci struct page **pages; 511062306a36Sopenharmony_ci int ret; 511162306a36Sopenharmony_ci 511262306a36Sopenharmony_ci req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO); 511362306a36Sopenharmony_ci if (!req) 511462306a36Sopenharmony_ci return -ENOMEM; 511562306a36Sopenharmony_ci 511662306a36Sopenharmony_ci ceph_oid_copy(&req->r_base_oid, oid); 511762306a36Sopenharmony_ci ceph_oloc_copy(&req->r_base_oloc, oloc); 511862306a36Sopenharmony_ci req->r_flags = CEPH_OSD_FLAG_READ; 511962306a36Sopenharmony_ci 512062306a36Sopenharmony_ci pages = ceph_alloc_page_vector(1, GFP_NOIO); 512162306a36Sopenharmony_ci if (IS_ERR(pages)) { 512262306a36Sopenharmony_ci ret = PTR_ERR(pages); 512362306a36Sopenharmony_ci goto out_put_req; 512462306a36Sopenharmony_ci } 512562306a36Sopenharmony_ci 512662306a36Sopenharmony_ci osd_req_op_init(req, 0, CEPH_OSD_OP_LIST_WATCHERS, 0); 512762306a36Sopenharmony_ci ceph_osd_data_pages_init(osd_req_op_data(req, 0, list_watchers, 512862306a36Sopenharmony_ci response_data), 512962306a36Sopenharmony_ci pages, PAGE_SIZE, 0, false, true); 513062306a36Sopenharmony_ci 513162306a36Sopenharmony_ci ret = ceph_osdc_alloc_messages(req, GFP_NOIO); 513262306a36Sopenharmony_ci if (ret) 513362306a36Sopenharmony_ci goto out_put_req; 513462306a36Sopenharmony_ci 513562306a36Sopenharmony_ci ceph_osdc_start_request(osdc, req); 513662306a36Sopenharmony_ci ret = ceph_osdc_wait_request(osdc, req); 513762306a36Sopenharmony_ci if (ret >= 0) { 513862306a36Sopenharmony_ci void *p = page_address(pages[0]); 513962306a36Sopenharmony_ci void *const end = p + req->r_ops[0].outdata_len; 514062306a36Sopenharmony_ci 514162306a36Sopenharmony_ci ret = decode_watchers(&p, end, watchers, num_watchers); 514262306a36Sopenharmony_ci } 514362306a36Sopenharmony_ci 514462306a36Sopenharmony_ciout_put_req: 514562306a36Sopenharmony_ci ceph_osdc_put_request(req); 514662306a36Sopenharmony_ci return ret; 514762306a36Sopenharmony_ci} 514862306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_list_watchers); 514962306a36Sopenharmony_ci 515062306a36Sopenharmony_ci/* 515162306a36Sopenharmony_ci * Call all pending notify callbacks - for use after a watch is 515262306a36Sopenharmony_ci * unregistered, to make sure no more callbacks for it will be invoked 515362306a36Sopenharmony_ci */ 515462306a36Sopenharmony_civoid ceph_osdc_flush_notifies(struct ceph_osd_client *osdc) 515562306a36Sopenharmony_ci{ 515662306a36Sopenharmony_ci dout("%s osdc %p\n", __func__, osdc); 515762306a36Sopenharmony_ci flush_workqueue(osdc->notify_wq); 515862306a36Sopenharmony_ci} 515962306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_flush_notifies); 516062306a36Sopenharmony_ci 516162306a36Sopenharmony_civoid ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc) 516262306a36Sopenharmony_ci{ 516362306a36Sopenharmony_ci down_read(&osdc->lock); 516462306a36Sopenharmony_ci maybe_request_map(osdc); 516562306a36Sopenharmony_ci up_read(&osdc->lock); 516662306a36Sopenharmony_ci} 516762306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_maybe_request_map); 516862306a36Sopenharmony_ci 516962306a36Sopenharmony_ci/* 517062306a36Sopenharmony_ci * Execute an OSD class method on an object. 517162306a36Sopenharmony_ci * 517262306a36Sopenharmony_ci * @flags: CEPH_OSD_FLAG_* 517362306a36Sopenharmony_ci * @resp_len: in/out param for reply length 517462306a36Sopenharmony_ci */ 517562306a36Sopenharmony_ciint ceph_osdc_call(struct ceph_osd_client *osdc, 517662306a36Sopenharmony_ci struct ceph_object_id *oid, 517762306a36Sopenharmony_ci struct ceph_object_locator *oloc, 517862306a36Sopenharmony_ci const char *class, const char *method, 517962306a36Sopenharmony_ci unsigned int flags, 518062306a36Sopenharmony_ci struct page *req_page, size_t req_len, 518162306a36Sopenharmony_ci struct page **resp_pages, size_t *resp_len) 518262306a36Sopenharmony_ci{ 518362306a36Sopenharmony_ci struct ceph_osd_request *req; 518462306a36Sopenharmony_ci int ret; 518562306a36Sopenharmony_ci 518662306a36Sopenharmony_ci if (req_len > PAGE_SIZE) 518762306a36Sopenharmony_ci return -E2BIG; 518862306a36Sopenharmony_ci 518962306a36Sopenharmony_ci req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO); 519062306a36Sopenharmony_ci if (!req) 519162306a36Sopenharmony_ci return -ENOMEM; 519262306a36Sopenharmony_ci 519362306a36Sopenharmony_ci ceph_oid_copy(&req->r_base_oid, oid); 519462306a36Sopenharmony_ci ceph_oloc_copy(&req->r_base_oloc, oloc); 519562306a36Sopenharmony_ci req->r_flags = flags; 519662306a36Sopenharmony_ci 519762306a36Sopenharmony_ci ret = osd_req_op_cls_init(req, 0, class, method); 519862306a36Sopenharmony_ci if (ret) 519962306a36Sopenharmony_ci goto out_put_req; 520062306a36Sopenharmony_ci 520162306a36Sopenharmony_ci if (req_page) 520262306a36Sopenharmony_ci osd_req_op_cls_request_data_pages(req, 0, &req_page, req_len, 520362306a36Sopenharmony_ci 0, false, false); 520462306a36Sopenharmony_ci if (resp_pages) 520562306a36Sopenharmony_ci osd_req_op_cls_response_data_pages(req, 0, resp_pages, 520662306a36Sopenharmony_ci *resp_len, 0, false, false); 520762306a36Sopenharmony_ci 520862306a36Sopenharmony_ci ret = ceph_osdc_alloc_messages(req, GFP_NOIO); 520962306a36Sopenharmony_ci if (ret) 521062306a36Sopenharmony_ci goto out_put_req; 521162306a36Sopenharmony_ci 521262306a36Sopenharmony_ci ceph_osdc_start_request(osdc, req); 521362306a36Sopenharmony_ci ret = ceph_osdc_wait_request(osdc, req); 521462306a36Sopenharmony_ci if (ret >= 0) { 521562306a36Sopenharmony_ci ret = req->r_ops[0].rval; 521662306a36Sopenharmony_ci if (resp_pages) 521762306a36Sopenharmony_ci *resp_len = req->r_ops[0].outdata_len; 521862306a36Sopenharmony_ci } 521962306a36Sopenharmony_ci 522062306a36Sopenharmony_ciout_put_req: 522162306a36Sopenharmony_ci ceph_osdc_put_request(req); 522262306a36Sopenharmony_ci return ret; 522362306a36Sopenharmony_ci} 522462306a36Sopenharmony_ciEXPORT_SYMBOL(ceph_osdc_call); 522562306a36Sopenharmony_ci 522662306a36Sopenharmony_ci/* 522762306a36Sopenharmony_ci * reset all osd connections 522862306a36Sopenharmony_ci */ 522962306a36Sopenharmony_civoid ceph_osdc_reopen_osds(struct ceph_osd_client *osdc) 523062306a36Sopenharmony_ci{ 523162306a36Sopenharmony_ci struct rb_node *n; 523262306a36Sopenharmony_ci 523362306a36Sopenharmony_ci down_write(&osdc->lock); 523462306a36Sopenharmony_ci for (n = rb_first(&osdc->osds); n; ) { 523562306a36Sopenharmony_ci struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); 523662306a36Sopenharmony_ci 523762306a36Sopenharmony_ci n = rb_next(n); 523862306a36Sopenharmony_ci if (!reopen_osd(osd)) 523962306a36Sopenharmony_ci kick_osd_requests(osd); 524062306a36Sopenharmony_ci } 524162306a36Sopenharmony_ci up_write(&osdc->lock); 524262306a36Sopenharmony_ci} 524362306a36Sopenharmony_ci 524462306a36Sopenharmony_ci/* 524562306a36Sopenharmony_ci * init, shutdown 524662306a36Sopenharmony_ci */ 524762306a36Sopenharmony_ciint ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) 524862306a36Sopenharmony_ci{ 524962306a36Sopenharmony_ci int err; 525062306a36Sopenharmony_ci 525162306a36Sopenharmony_ci dout("init\n"); 525262306a36Sopenharmony_ci osdc->client = client; 525362306a36Sopenharmony_ci init_rwsem(&osdc->lock); 525462306a36Sopenharmony_ci osdc->osds = RB_ROOT; 525562306a36Sopenharmony_ci INIT_LIST_HEAD(&osdc->osd_lru); 525662306a36Sopenharmony_ci spin_lock_init(&osdc->osd_lru_lock); 525762306a36Sopenharmony_ci osd_init(&osdc->homeless_osd); 525862306a36Sopenharmony_ci osdc->homeless_osd.o_osdc = osdc; 525962306a36Sopenharmony_ci osdc->homeless_osd.o_osd = CEPH_HOMELESS_OSD; 526062306a36Sopenharmony_ci osdc->last_linger_id = CEPH_LINGER_ID_START; 526162306a36Sopenharmony_ci osdc->linger_requests = RB_ROOT; 526262306a36Sopenharmony_ci osdc->map_checks = RB_ROOT; 526362306a36Sopenharmony_ci osdc->linger_map_checks = RB_ROOT; 526462306a36Sopenharmony_ci INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout); 526562306a36Sopenharmony_ci INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); 526662306a36Sopenharmony_ci 526762306a36Sopenharmony_ci err = -ENOMEM; 526862306a36Sopenharmony_ci osdc->osdmap = ceph_osdmap_alloc(); 526962306a36Sopenharmony_ci if (!osdc->osdmap) 527062306a36Sopenharmony_ci goto out; 527162306a36Sopenharmony_ci 527262306a36Sopenharmony_ci osdc->req_mempool = mempool_create_slab_pool(10, 527362306a36Sopenharmony_ci ceph_osd_request_cache); 527462306a36Sopenharmony_ci if (!osdc->req_mempool) 527562306a36Sopenharmony_ci goto out_map; 527662306a36Sopenharmony_ci 527762306a36Sopenharmony_ci err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP, 527862306a36Sopenharmony_ci PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10, "osd_op"); 527962306a36Sopenharmony_ci if (err < 0) 528062306a36Sopenharmony_ci goto out_mempool; 528162306a36Sopenharmony_ci err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY, 528262306a36Sopenharmony_ci PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10, 528362306a36Sopenharmony_ci "osd_op_reply"); 528462306a36Sopenharmony_ci if (err < 0) 528562306a36Sopenharmony_ci goto out_msgpool; 528662306a36Sopenharmony_ci 528762306a36Sopenharmony_ci err = -ENOMEM; 528862306a36Sopenharmony_ci osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify"); 528962306a36Sopenharmony_ci if (!osdc->notify_wq) 529062306a36Sopenharmony_ci goto out_msgpool_reply; 529162306a36Sopenharmony_ci 529262306a36Sopenharmony_ci osdc->completion_wq = create_singlethread_workqueue("ceph-completion"); 529362306a36Sopenharmony_ci if (!osdc->completion_wq) 529462306a36Sopenharmony_ci goto out_notify_wq; 529562306a36Sopenharmony_ci 529662306a36Sopenharmony_ci schedule_delayed_work(&osdc->timeout_work, 529762306a36Sopenharmony_ci osdc->client->options->osd_keepalive_timeout); 529862306a36Sopenharmony_ci schedule_delayed_work(&osdc->osds_timeout_work, 529962306a36Sopenharmony_ci round_jiffies_relative(osdc->client->options->osd_idle_ttl)); 530062306a36Sopenharmony_ci 530162306a36Sopenharmony_ci return 0; 530262306a36Sopenharmony_ci 530362306a36Sopenharmony_ciout_notify_wq: 530462306a36Sopenharmony_ci destroy_workqueue(osdc->notify_wq); 530562306a36Sopenharmony_ciout_msgpool_reply: 530662306a36Sopenharmony_ci ceph_msgpool_destroy(&osdc->msgpool_op_reply); 530762306a36Sopenharmony_ciout_msgpool: 530862306a36Sopenharmony_ci ceph_msgpool_destroy(&osdc->msgpool_op); 530962306a36Sopenharmony_ciout_mempool: 531062306a36Sopenharmony_ci mempool_destroy(osdc->req_mempool); 531162306a36Sopenharmony_ciout_map: 531262306a36Sopenharmony_ci ceph_osdmap_destroy(osdc->osdmap); 531362306a36Sopenharmony_ciout: 531462306a36Sopenharmony_ci return err; 531562306a36Sopenharmony_ci} 531662306a36Sopenharmony_ci 531762306a36Sopenharmony_civoid ceph_osdc_stop(struct ceph_osd_client *osdc) 531862306a36Sopenharmony_ci{ 531962306a36Sopenharmony_ci destroy_workqueue(osdc->completion_wq); 532062306a36Sopenharmony_ci destroy_workqueue(osdc->notify_wq); 532162306a36Sopenharmony_ci cancel_delayed_work_sync(&osdc->timeout_work); 532262306a36Sopenharmony_ci cancel_delayed_work_sync(&osdc->osds_timeout_work); 532362306a36Sopenharmony_ci 532462306a36Sopenharmony_ci down_write(&osdc->lock); 532562306a36Sopenharmony_ci while (!RB_EMPTY_ROOT(&osdc->osds)) { 532662306a36Sopenharmony_ci struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), 532762306a36Sopenharmony_ci struct ceph_osd, o_node); 532862306a36Sopenharmony_ci close_osd(osd); 532962306a36Sopenharmony_ci } 533062306a36Sopenharmony_ci up_write(&osdc->lock); 533162306a36Sopenharmony_ci WARN_ON(refcount_read(&osdc->homeless_osd.o_ref) != 1); 533262306a36Sopenharmony_ci osd_cleanup(&osdc->homeless_osd); 533362306a36Sopenharmony_ci 533462306a36Sopenharmony_ci WARN_ON(!list_empty(&osdc->osd_lru)); 533562306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT(&osdc->linger_requests)); 533662306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT(&osdc->map_checks)); 533762306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT(&osdc->linger_map_checks)); 533862306a36Sopenharmony_ci WARN_ON(atomic_read(&osdc->num_requests)); 533962306a36Sopenharmony_ci WARN_ON(atomic_read(&osdc->num_homeless)); 534062306a36Sopenharmony_ci 534162306a36Sopenharmony_ci ceph_osdmap_destroy(osdc->osdmap); 534262306a36Sopenharmony_ci mempool_destroy(osdc->req_mempool); 534362306a36Sopenharmony_ci ceph_msgpool_destroy(&osdc->msgpool_op); 534462306a36Sopenharmony_ci ceph_msgpool_destroy(&osdc->msgpool_op_reply); 534562306a36Sopenharmony_ci} 534662306a36Sopenharmony_ci 534762306a36Sopenharmony_ciint osd_req_op_copy_from_init(struct ceph_osd_request *req, 534862306a36Sopenharmony_ci u64 src_snapid, u64 src_version, 534962306a36Sopenharmony_ci struct ceph_object_id *src_oid, 535062306a36Sopenharmony_ci struct ceph_object_locator *src_oloc, 535162306a36Sopenharmony_ci u32 src_fadvise_flags, 535262306a36Sopenharmony_ci u32 dst_fadvise_flags, 535362306a36Sopenharmony_ci u32 truncate_seq, u64 truncate_size, 535462306a36Sopenharmony_ci u8 copy_from_flags) 535562306a36Sopenharmony_ci{ 535662306a36Sopenharmony_ci struct ceph_osd_req_op *op; 535762306a36Sopenharmony_ci struct page **pages; 535862306a36Sopenharmony_ci void *p, *end; 535962306a36Sopenharmony_ci 536062306a36Sopenharmony_ci pages = ceph_alloc_page_vector(1, GFP_KERNEL); 536162306a36Sopenharmony_ci if (IS_ERR(pages)) 536262306a36Sopenharmony_ci return PTR_ERR(pages); 536362306a36Sopenharmony_ci 536462306a36Sopenharmony_ci op = osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2, 536562306a36Sopenharmony_ci dst_fadvise_flags); 536662306a36Sopenharmony_ci op->copy_from.snapid = src_snapid; 536762306a36Sopenharmony_ci op->copy_from.src_version = src_version; 536862306a36Sopenharmony_ci op->copy_from.flags = copy_from_flags; 536962306a36Sopenharmony_ci op->copy_from.src_fadvise_flags = src_fadvise_flags; 537062306a36Sopenharmony_ci 537162306a36Sopenharmony_ci p = page_address(pages[0]); 537262306a36Sopenharmony_ci end = p + PAGE_SIZE; 537362306a36Sopenharmony_ci ceph_encode_string(&p, end, src_oid->name, src_oid->name_len); 537462306a36Sopenharmony_ci encode_oloc(&p, end, src_oloc); 537562306a36Sopenharmony_ci ceph_encode_32(&p, truncate_seq); 537662306a36Sopenharmony_ci ceph_encode_64(&p, truncate_size); 537762306a36Sopenharmony_ci op->indata_len = PAGE_SIZE - (end - p); 537862306a36Sopenharmony_ci 537962306a36Sopenharmony_ci ceph_osd_data_pages_init(&op->copy_from.osd_data, pages, 538062306a36Sopenharmony_ci op->indata_len, 0, false, true); 538162306a36Sopenharmony_ci return 0; 538262306a36Sopenharmony_ci} 538362306a36Sopenharmony_ciEXPORT_SYMBOL(osd_req_op_copy_from_init); 538462306a36Sopenharmony_ci 538562306a36Sopenharmony_ciint __init ceph_osdc_setup(void) 538662306a36Sopenharmony_ci{ 538762306a36Sopenharmony_ci size_t size = sizeof(struct ceph_osd_request) + 538862306a36Sopenharmony_ci CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op); 538962306a36Sopenharmony_ci 539062306a36Sopenharmony_ci BUG_ON(ceph_osd_request_cache); 539162306a36Sopenharmony_ci ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", size, 539262306a36Sopenharmony_ci 0, 0, NULL); 539362306a36Sopenharmony_ci 539462306a36Sopenharmony_ci return ceph_osd_request_cache ? 0 : -ENOMEM; 539562306a36Sopenharmony_ci} 539662306a36Sopenharmony_ci 539762306a36Sopenharmony_civoid ceph_osdc_cleanup(void) 539862306a36Sopenharmony_ci{ 539962306a36Sopenharmony_ci BUG_ON(!ceph_osd_request_cache); 540062306a36Sopenharmony_ci kmem_cache_destroy(ceph_osd_request_cache); 540162306a36Sopenharmony_ci ceph_osd_request_cache = NULL; 540262306a36Sopenharmony_ci} 540362306a36Sopenharmony_ci 540462306a36Sopenharmony_ci/* 540562306a36Sopenharmony_ci * handle incoming message 540662306a36Sopenharmony_ci */ 540762306a36Sopenharmony_cistatic void osd_dispatch(struct ceph_connection *con, struct ceph_msg *msg) 540862306a36Sopenharmony_ci{ 540962306a36Sopenharmony_ci struct ceph_osd *osd = con->private; 541062306a36Sopenharmony_ci struct ceph_osd_client *osdc = osd->o_osdc; 541162306a36Sopenharmony_ci int type = le16_to_cpu(msg->hdr.type); 541262306a36Sopenharmony_ci 541362306a36Sopenharmony_ci switch (type) { 541462306a36Sopenharmony_ci case CEPH_MSG_OSD_MAP: 541562306a36Sopenharmony_ci ceph_osdc_handle_map(osdc, msg); 541662306a36Sopenharmony_ci break; 541762306a36Sopenharmony_ci case CEPH_MSG_OSD_OPREPLY: 541862306a36Sopenharmony_ci handle_reply(osd, msg); 541962306a36Sopenharmony_ci break; 542062306a36Sopenharmony_ci case CEPH_MSG_OSD_BACKOFF: 542162306a36Sopenharmony_ci handle_backoff(osd, msg); 542262306a36Sopenharmony_ci break; 542362306a36Sopenharmony_ci case CEPH_MSG_WATCH_NOTIFY: 542462306a36Sopenharmony_ci handle_watch_notify(osdc, msg); 542562306a36Sopenharmony_ci break; 542662306a36Sopenharmony_ci 542762306a36Sopenharmony_ci default: 542862306a36Sopenharmony_ci pr_err("received unknown message type %d %s\n", type, 542962306a36Sopenharmony_ci ceph_msg_type_name(type)); 543062306a36Sopenharmony_ci } 543162306a36Sopenharmony_ci 543262306a36Sopenharmony_ci ceph_msg_put(msg); 543362306a36Sopenharmony_ci} 543462306a36Sopenharmony_ci 543562306a36Sopenharmony_ci/* How much sparse data was requested? */ 543662306a36Sopenharmony_cistatic u64 sparse_data_requested(struct ceph_osd_request *req) 543762306a36Sopenharmony_ci{ 543862306a36Sopenharmony_ci u64 len = 0; 543962306a36Sopenharmony_ci 544062306a36Sopenharmony_ci if (req->r_flags & CEPH_OSD_FLAG_READ) { 544162306a36Sopenharmony_ci int i; 544262306a36Sopenharmony_ci 544362306a36Sopenharmony_ci for (i = 0; i < req->r_num_ops; ++i) { 544462306a36Sopenharmony_ci struct ceph_osd_req_op *op = &req->r_ops[i]; 544562306a36Sopenharmony_ci 544662306a36Sopenharmony_ci if (op->op == CEPH_OSD_OP_SPARSE_READ) 544762306a36Sopenharmony_ci len += op->extent.length; 544862306a36Sopenharmony_ci } 544962306a36Sopenharmony_ci } 545062306a36Sopenharmony_ci return len; 545162306a36Sopenharmony_ci} 545262306a36Sopenharmony_ci 545362306a36Sopenharmony_ci/* 545462306a36Sopenharmony_ci * Lookup and return message for incoming reply. Don't try to do 545562306a36Sopenharmony_ci * anything about a larger than preallocated data portion of the 545662306a36Sopenharmony_ci * message at the moment - for now, just skip the message. 545762306a36Sopenharmony_ci */ 545862306a36Sopenharmony_cistatic struct ceph_msg *get_reply(struct ceph_connection *con, 545962306a36Sopenharmony_ci struct ceph_msg_header *hdr, 546062306a36Sopenharmony_ci int *skip) 546162306a36Sopenharmony_ci{ 546262306a36Sopenharmony_ci struct ceph_osd *osd = con->private; 546362306a36Sopenharmony_ci struct ceph_osd_client *osdc = osd->o_osdc; 546462306a36Sopenharmony_ci struct ceph_msg *m = NULL; 546562306a36Sopenharmony_ci struct ceph_osd_request *req; 546662306a36Sopenharmony_ci int front_len = le32_to_cpu(hdr->front_len); 546762306a36Sopenharmony_ci int data_len = le32_to_cpu(hdr->data_len); 546862306a36Sopenharmony_ci u64 tid = le64_to_cpu(hdr->tid); 546962306a36Sopenharmony_ci u64 srlen; 547062306a36Sopenharmony_ci 547162306a36Sopenharmony_ci down_read(&osdc->lock); 547262306a36Sopenharmony_ci if (!osd_registered(osd)) { 547362306a36Sopenharmony_ci dout("%s osd%d unknown, skipping\n", __func__, osd->o_osd); 547462306a36Sopenharmony_ci *skip = 1; 547562306a36Sopenharmony_ci goto out_unlock_osdc; 547662306a36Sopenharmony_ci } 547762306a36Sopenharmony_ci WARN_ON(osd->o_osd != le64_to_cpu(hdr->src.num)); 547862306a36Sopenharmony_ci 547962306a36Sopenharmony_ci mutex_lock(&osd->lock); 548062306a36Sopenharmony_ci req = lookup_request(&osd->o_requests, tid); 548162306a36Sopenharmony_ci if (!req) { 548262306a36Sopenharmony_ci dout("%s osd%d tid %llu unknown, skipping\n", __func__, 548362306a36Sopenharmony_ci osd->o_osd, tid); 548462306a36Sopenharmony_ci *skip = 1; 548562306a36Sopenharmony_ci goto out_unlock_session; 548662306a36Sopenharmony_ci } 548762306a36Sopenharmony_ci 548862306a36Sopenharmony_ci ceph_msg_revoke_incoming(req->r_reply); 548962306a36Sopenharmony_ci 549062306a36Sopenharmony_ci if (front_len > req->r_reply->front_alloc_len) { 549162306a36Sopenharmony_ci pr_warn("%s osd%d tid %llu front %d > preallocated %d\n", 549262306a36Sopenharmony_ci __func__, osd->o_osd, req->r_tid, front_len, 549362306a36Sopenharmony_ci req->r_reply->front_alloc_len); 549462306a36Sopenharmony_ci m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, 549562306a36Sopenharmony_ci false); 549662306a36Sopenharmony_ci if (!m) 549762306a36Sopenharmony_ci goto out_unlock_session; 549862306a36Sopenharmony_ci ceph_msg_put(req->r_reply); 549962306a36Sopenharmony_ci req->r_reply = m; 550062306a36Sopenharmony_ci } 550162306a36Sopenharmony_ci 550262306a36Sopenharmony_ci srlen = sparse_data_requested(req); 550362306a36Sopenharmony_ci if (!srlen && data_len > req->r_reply->data_length) { 550462306a36Sopenharmony_ci pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n", 550562306a36Sopenharmony_ci __func__, osd->o_osd, req->r_tid, data_len, 550662306a36Sopenharmony_ci req->r_reply->data_length); 550762306a36Sopenharmony_ci m = NULL; 550862306a36Sopenharmony_ci *skip = 1; 550962306a36Sopenharmony_ci goto out_unlock_session; 551062306a36Sopenharmony_ci } 551162306a36Sopenharmony_ci 551262306a36Sopenharmony_ci m = ceph_msg_get(req->r_reply); 551362306a36Sopenharmony_ci m->sparse_read_total = srlen; 551462306a36Sopenharmony_ci 551562306a36Sopenharmony_ci dout("get_reply tid %lld %p\n", tid, m); 551662306a36Sopenharmony_ci 551762306a36Sopenharmony_ciout_unlock_session: 551862306a36Sopenharmony_ci mutex_unlock(&osd->lock); 551962306a36Sopenharmony_ciout_unlock_osdc: 552062306a36Sopenharmony_ci up_read(&osdc->lock); 552162306a36Sopenharmony_ci return m; 552262306a36Sopenharmony_ci} 552362306a36Sopenharmony_ci 552462306a36Sopenharmony_cistatic struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr) 552562306a36Sopenharmony_ci{ 552662306a36Sopenharmony_ci struct ceph_msg *m; 552762306a36Sopenharmony_ci int type = le16_to_cpu(hdr->type); 552862306a36Sopenharmony_ci u32 front_len = le32_to_cpu(hdr->front_len); 552962306a36Sopenharmony_ci u32 data_len = le32_to_cpu(hdr->data_len); 553062306a36Sopenharmony_ci 553162306a36Sopenharmony_ci m = ceph_msg_new2(type, front_len, 1, GFP_NOIO, false); 553262306a36Sopenharmony_ci if (!m) 553362306a36Sopenharmony_ci return NULL; 553462306a36Sopenharmony_ci 553562306a36Sopenharmony_ci if (data_len) { 553662306a36Sopenharmony_ci struct page **pages; 553762306a36Sopenharmony_ci 553862306a36Sopenharmony_ci pages = ceph_alloc_page_vector(calc_pages_for(0, data_len), 553962306a36Sopenharmony_ci GFP_NOIO); 554062306a36Sopenharmony_ci if (IS_ERR(pages)) { 554162306a36Sopenharmony_ci ceph_msg_put(m); 554262306a36Sopenharmony_ci return NULL; 554362306a36Sopenharmony_ci } 554462306a36Sopenharmony_ci 554562306a36Sopenharmony_ci ceph_msg_data_add_pages(m, pages, data_len, 0, true); 554662306a36Sopenharmony_ci } 554762306a36Sopenharmony_ci 554862306a36Sopenharmony_ci return m; 554962306a36Sopenharmony_ci} 555062306a36Sopenharmony_ci 555162306a36Sopenharmony_cistatic struct ceph_msg *osd_alloc_msg(struct ceph_connection *con, 555262306a36Sopenharmony_ci struct ceph_msg_header *hdr, 555362306a36Sopenharmony_ci int *skip) 555462306a36Sopenharmony_ci{ 555562306a36Sopenharmony_ci struct ceph_osd *osd = con->private; 555662306a36Sopenharmony_ci int type = le16_to_cpu(hdr->type); 555762306a36Sopenharmony_ci 555862306a36Sopenharmony_ci *skip = 0; 555962306a36Sopenharmony_ci switch (type) { 556062306a36Sopenharmony_ci case CEPH_MSG_OSD_MAP: 556162306a36Sopenharmony_ci case CEPH_MSG_OSD_BACKOFF: 556262306a36Sopenharmony_ci case CEPH_MSG_WATCH_NOTIFY: 556362306a36Sopenharmony_ci return alloc_msg_with_page_vector(hdr); 556462306a36Sopenharmony_ci case CEPH_MSG_OSD_OPREPLY: 556562306a36Sopenharmony_ci return get_reply(con, hdr, skip); 556662306a36Sopenharmony_ci default: 556762306a36Sopenharmony_ci pr_warn("%s osd%d unknown msg type %d, skipping\n", __func__, 556862306a36Sopenharmony_ci osd->o_osd, type); 556962306a36Sopenharmony_ci *skip = 1; 557062306a36Sopenharmony_ci return NULL; 557162306a36Sopenharmony_ci } 557262306a36Sopenharmony_ci} 557362306a36Sopenharmony_ci 557462306a36Sopenharmony_ci/* 557562306a36Sopenharmony_ci * Wrappers to refcount containing ceph_osd struct 557662306a36Sopenharmony_ci */ 557762306a36Sopenharmony_cistatic struct ceph_connection *osd_get_con(struct ceph_connection *con) 557862306a36Sopenharmony_ci{ 557962306a36Sopenharmony_ci struct ceph_osd *osd = con->private; 558062306a36Sopenharmony_ci if (get_osd(osd)) 558162306a36Sopenharmony_ci return con; 558262306a36Sopenharmony_ci return NULL; 558362306a36Sopenharmony_ci} 558462306a36Sopenharmony_ci 558562306a36Sopenharmony_cistatic void osd_put_con(struct ceph_connection *con) 558662306a36Sopenharmony_ci{ 558762306a36Sopenharmony_ci struct ceph_osd *osd = con->private; 558862306a36Sopenharmony_ci put_osd(osd); 558962306a36Sopenharmony_ci} 559062306a36Sopenharmony_ci 559162306a36Sopenharmony_ci/* 559262306a36Sopenharmony_ci * authentication 559362306a36Sopenharmony_ci */ 559462306a36Sopenharmony_ci 559562306a36Sopenharmony_ci/* 559662306a36Sopenharmony_ci * Note: returned pointer is the address of a structure that's 559762306a36Sopenharmony_ci * managed separately. Caller must *not* attempt to free it. 559862306a36Sopenharmony_ci */ 559962306a36Sopenharmony_cistatic struct ceph_auth_handshake * 560062306a36Sopenharmony_ciosd_get_authorizer(struct ceph_connection *con, int *proto, int force_new) 560162306a36Sopenharmony_ci{ 560262306a36Sopenharmony_ci struct ceph_osd *o = con->private; 560362306a36Sopenharmony_ci struct ceph_osd_client *osdc = o->o_osdc; 560462306a36Sopenharmony_ci struct ceph_auth_client *ac = osdc->client->monc.auth; 560562306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &o->o_auth; 560662306a36Sopenharmony_ci int ret; 560762306a36Sopenharmony_ci 560862306a36Sopenharmony_ci ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD, 560962306a36Sopenharmony_ci force_new, proto, NULL, NULL); 561062306a36Sopenharmony_ci if (ret) 561162306a36Sopenharmony_ci return ERR_PTR(ret); 561262306a36Sopenharmony_ci 561362306a36Sopenharmony_ci return auth; 561462306a36Sopenharmony_ci} 561562306a36Sopenharmony_ci 561662306a36Sopenharmony_cistatic int osd_add_authorizer_challenge(struct ceph_connection *con, 561762306a36Sopenharmony_ci void *challenge_buf, int challenge_buf_len) 561862306a36Sopenharmony_ci{ 561962306a36Sopenharmony_ci struct ceph_osd *o = con->private; 562062306a36Sopenharmony_ci struct ceph_osd_client *osdc = o->o_osdc; 562162306a36Sopenharmony_ci struct ceph_auth_client *ac = osdc->client->monc.auth; 562262306a36Sopenharmony_ci 562362306a36Sopenharmony_ci return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer, 562462306a36Sopenharmony_ci challenge_buf, challenge_buf_len); 562562306a36Sopenharmony_ci} 562662306a36Sopenharmony_ci 562762306a36Sopenharmony_cistatic int osd_verify_authorizer_reply(struct ceph_connection *con) 562862306a36Sopenharmony_ci{ 562962306a36Sopenharmony_ci struct ceph_osd *o = con->private; 563062306a36Sopenharmony_ci struct ceph_osd_client *osdc = o->o_osdc; 563162306a36Sopenharmony_ci struct ceph_auth_client *ac = osdc->client->monc.auth; 563262306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &o->o_auth; 563362306a36Sopenharmony_ci 563462306a36Sopenharmony_ci return ceph_auth_verify_authorizer_reply(ac, auth->authorizer, 563562306a36Sopenharmony_ci auth->authorizer_reply_buf, auth->authorizer_reply_buf_len, 563662306a36Sopenharmony_ci NULL, NULL, NULL, NULL); 563762306a36Sopenharmony_ci} 563862306a36Sopenharmony_ci 563962306a36Sopenharmony_cistatic int osd_invalidate_authorizer(struct ceph_connection *con) 564062306a36Sopenharmony_ci{ 564162306a36Sopenharmony_ci struct ceph_osd *o = con->private; 564262306a36Sopenharmony_ci struct ceph_osd_client *osdc = o->o_osdc; 564362306a36Sopenharmony_ci struct ceph_auth_client *ac = osdc->client->monc.auth; 564462306a36Sopenharmony_ci 564562306a36Sopenharmony_ci ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD); 564662306a36Sopenharmony_ci return ceph_monc_validate_auth(&osdc->client->monc); 564762306a36Sopenharmony_ci} 564862306a36Sopenharmony_ci 564962306a36Sopenharmony_cistatic int osd_get_auth_request(struct ceph_connection *con, 565062306a36Sopenharmony_ci void *buf, int *buf_len, 565162306a36Sopenharmony_ci void **authorizer, int *authorizer_len) 565262306a36Sopenharmony_ci{ 565362306a36Sopenharmony_ci struct ceph_osd *o = con->private; 565462306a36Sopenharmony_ci struct ceph_auth_client *ac = o->o_osdc->client->monc.auth; 565562306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &o->o_auth; 565662306a36Sopenharmony_ci int ret; 565762306a36Sopenharmony_ci 565862306a36Sopenharmony_ci ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD, 565962306a36Sopenharmony_ci buf, buf_len); 566062306a36Sopenharmony_ci if (ret) 566162306a36Sopenharmony_ci return ret; 566262306a36Sopenharmony_ci 566362306a36Sopenharmony_ci *authorizer = auth->authorizer_buf; 566462306a36Sopenharmony_ci *authorizer_len = auth->authorizer_buf_len; 566562306a36Sopenharmony_ci return 0; 566662306a36Sopenharmony_ci} 566762306a36Sopenharmony_ci 566862306a36Sopenharmony_cistatic int osd_handle_auth_reply_more(struct ceph_connection *con, 566962306a36Sopenharmony_ci void *reply, int reply_len, 567062306a36Sopenharmony_ci void *buf, int *buf_len, 567162306a36Sopenharmony_ci void **authorizer, int *authorizer_len) 567262306a36Sopenharmony_ci{ 567362306a36Sopenharmony_ci struct ceph_osd *o = con->private; 567462306a36Sopenharmony_ci struct ceph_auth_client *ac = o->o_osdc->client->monc.auth; 567562306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &o->o_auth; 567662306a36Sopenharmony_ci int ret; 567762306a36Sopenharmony_ci 567862306a36Sopenharmony_ci ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len, 567962306a36Sopenharmony_ci buf, buf_len); 568062306a36Sopenharmony_ci if (ret) 568162306a36Sopenharmony_ci return ret; 568262306a36Sopenharmony_ci 568362306a36Sopenharmony_ci *authorizer = auth->authorizer_buf; 568462306a36Sopenharmony_ci *authorizer_len = auth->authorizer_buf_len; 568562306a36Sopenharmony_ci return 0; 568662306a36Sopenharmony_ci} 568762306a36Sopenharmony_ci 568862306a36Sopenharmony_cistatic int osd_handle_auth_done(struct ceph_connection *con, 568962306a36Sopenharmony_ci u64 global_id, void *reply, int reply_len, 569062306a36Sopenharmony_ci u8 *session_key, int *session_key_len, 569162306a36Sopenharmony_ci u8 *con_secret, int *con_secret_len) 569262306a36Sopenharmony_ci{ 569362306a36Sopenharmony_ci struct ceph_osd *o = con->private; 569462306a36Sopenharmony_ci struct ceph_auth_client *ac = o->o_osdc->client->monc.auth; 569562306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &o->o_auth; 569662306a36Sopenharmony_ci 569762306a36Sopenharmony_ci return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len, 569862306a36Sopenharmony_ci session_key, session_key_len, 569962306a36Sopenharmony_ci con_secret, con_secret_len); 570062306a36Sopenharmony_ci} 570162306a36Sopenharmony_ci 570262306a36Sopenharmony_cistatic int osd_handle_auth_bad_method(struct ceph_connection *con, 570362306a36Sopenharmony_ci int used_proto, int result, 570462306a36Sopenharmony_ci const int *allowed_protos, int proto_cnt, 570562306a36Sopenharmony_ci const int *allowed_modes, int mode_cnt) 570662306a36Sopenharmony_ci{ 570762306a36Sopenharmony_ci struct ceph_osd *o = con->private; 570862306a36Sopenharmony_ci struct ceph_mon_client *monc = &o->o_osdc->client->monc; 570962306a36Sopenharmony_ci int ret; 571062306a36Sopenharmony_ci 571162306a36Sopenharmony_ci if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_OSD, 571262306a36Sopenharmony_ci used_proto, result, 571362306a36Sopenharmony_ci allowed_protos, proto_cnt, 571462306a36Sopenharmony_ci allowed_modes, mode_cnt)) { 571562306a36Sopenharmony_ci ret = ceph_monc_validate_auth(monc); 571662306a36Sopenharmony_ci if (ret) 571762306a36Sopenharmony_ci return ret; 571862306a36Sopenharmony_ci } 571962306a36Sopenharmony_ci 572062306a36Sopenharmony_ci return -EACCES; 572162306a36Sopenharmony_ci} 572262306a36Sopenharmony_ci 572362306a36Sopenharmony_cistatic void osd_reencode_message(struct ceph_msg *msg) 572462306a36Sopenharmony_ci{ 572562306a36Sopenharmony_ci int type = le16_to_cpu(msg->hdr.type); 572662306a36Sopenharmony_ci 572762306a36Sopenharmony_ci if (type == CEPH_MSG_OSD_OP) 572862306a36Sopenharmony_ci encode_request_finish(msg); 572962306a36Sopenharmony_ci} 573062306a36Sopenharmony_ci 573162306a36Sopenharmony_cistatic int osd_sign_message(struct ceph_msg *msg) 573262306a36Sopenharmony_ci{ 573362306a36Sopenharmony_ci struct ceph_osd *o = msg->con->private; 573462306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &o->o_auth; 573562306a36Sopenharmony_ci 573662306a36Sopenharmony_ci return ceph_auth_sign_message(auth, msg); 573762306a36Sopenharmony_ci} 573862306a36Sopenharmony_ci 573962306a36Sopenharmony_cistatic int osd_check_message_signature(struct ceph_msg *msg) 574062306a36Sopenharmony_ci{ 574162306a36Sopenharmony_ci struct ceph_osd *o = msg->con->private; 574262306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &o->o_auth; 574362306a36Sopenharmony_ci 574462306a36Sopenharmony_ci return ceph_auth_check_message_signature(auth, msg); 574562306a36Sopenharmony_ci} 574662306a36Sopenharmony_ci 574762306a36Sopenharmony_cistatic void advance_cursor(struct ceph_msg_data_cursor *cursor, size_t len, 574862306a36Sopenharmony_ci bool zero) 574962306a36Sopenharmony_ci{ 575062306a36Sopenharmony_ci while (len) { 575162306a36Sopenharmony_ci struct page *page; 575262306a36Sopenharmony_ci size_t poff, plen; 575362306a36Sopenharmony_ci 575462306a36Sopenharmony_ci page = ceph_msg_data_next(cursor, &poff, &plen); 575562306a36Sopenharmony_ci if (plen > len) 575662306a36Sopenharmony_ci plen = len; 575762306a36Sopenharmony_ci if (zero) 575862306a36Sopenharmony_ci zero_user_segment(page, poff, poff + plen); 575962306a36Sopenharmony_ci len -= plen; 576062306a36Sopenharmony_ci ceph_msg_data_advance(cursor, plen); 576162306a36Sopenharmony_ci } 576262306a36Sopenharmony_ci} 576362306a36Sopenharmony_ci 576462306a36Sopenharmony_cistatic int prep_next_sparse_read(struct ceph_connection *con, 576562306a36Sopenharmony_ci struct ceph_msg_data_cursor *cursor) 576662306a36Sopenharmony_ci{ 576762306a36Sopenharmony_ci struct ceph_osd *o = con->private; 576862306a36Sopenharmony_ci struct ceph_sparse_read *sr = &o->o_sparse_read; 576962306a36Sopenharmony_ci struct ceph_osd_request *req; 577062306a36Sopenharmony_ci struct ceph_osd_req_op *op; 577162306a36Sopenharmony_ci 577262306a36Sopenharmony_ci spin_lock(&o->o_requests_lock); 577362306a36Sopenharmony_ci req = lookup_request(&o->o_requests, le64_to_cpu(con->in_msg->hdr.tid)); 577462306a36Sopenharmony_ci if (!req) { 577562306a36Sopenharmony_ci spin_unlock(&o->o_requests_lock); 577662306a36Sopenharmony_ci return -EBADR; 577762306a36Sopenharmony_ci } 577862306a36Sopenharmony_ci 577962306a36Sopenharmony_ci if (o->o_sparse_op_idx < 0) { 578062306a36Sopenharmony_ci dout("%s: [%d] starting new sparse read req\n", 578162306a36Sopenharmony_ci __func__, o->o_osd); 578262306a36Sopenharmony_ci } else { 578362306a36Sopenharmony_ci u64 end; 578462306a36Sopenharmony_ci 578562306a36Sopenharmony_ci op = &req->r_ops[o->o_sparse_op_idx]; 578662306a36Sopenharmony_ci 578762306a36Sopenharmony_ci WARN_ON_ONCE(op->extent.sparse_ext); 578862306a36Sopenharmony_ci 578962306a36Sopenharmony_ci /* hand back buffer we took earlier */ 579062306a36Sopenharmony_ci op->extent.sparse_ext = sr->sr_extent; 579162306a36Sopenharmony_ci sr->sr_extent = NULL; 579262306a36Sopenharmony_ci op->extent.sparse_ext_cnt = sr->sr_count; 579362306a36Sopenharmony_ci sr->sr_ext_len = 0; 579462306a36Sopenharmony_ci dout("%s: [%d] completed extent array len %d cursor->resid %zd\n", 579562306a36Sopenharmony_ci __func__, o->o_osd, op->extent.sparse_ext_cnt, cursor->resid); 579662306a36Sopenharmony_ci /* Advance to end of data for this operation */ 579762306a36Sopenharmony_ci end = ceph_sparse_ext_map_end(op); 579862306a36Sopenharmony_ci if (end < sr->sr_req_len) 579962306a36Sopenharmony_ci advance_cursor(cursor, sr->sr_req_len - end, false); 580062306a36Sopenharmony_ci } 580162306a36Sopenharmony_ci 580262306a36Sopenharmony_ci ceph_init_sparse_read(sr); 580362306a36Sopenharmony_ci 580462306a36Sopenharmony_ci /* find next op in this request (if any) */ 580562306a36Sopenharmony_ci while (++o->o_sparse_op_idx < req->r_num_ops) { 580662306a36Sopenharmony_ci op = &req->r_ops[o->o_sparse_op_idx]; 580762306a36Sopenharmony_ci if (op->op == CEPH_OSD_OP_SPARSE_READ) 580862306a36Sopenharmony_ci goto found; 580962306a36Sopenharmony_ci } 581062306a36Sopenharmony_ci 581162306a36Sopenharmony_ci /* reset for next sparse read request */ 581262306a36Sopenharmony_ci spin_unlock(&o->o_requests_lock); 581362306a36Sopenharmony_ci o->o_sparse_op_idx = -1; 581462306a36Sopenharmony_ci return 0; 581562306a36Sopenharmony_cifound: 581662306a36Sopenharmony_ci sr->sr_req_off = op->extent.offset; 581762306a36Sopenharmony_ci sr->sr_req_len = op->extent.length; 581862306a36Sopenharmony_ci sr->sr_pos = sr->sr_req_off; 581962306a36Sopenharmony_ci dout("%s: [%d] new sparse read op at idx %d 0x%llx~0x%llx\n", __func__, 582062306a36Sopenharmony_ci o->o_osd, o->o_sparse_op_idx, sr->sr_req_off, sr->sr_req_len); 582162306a36Sopenharmony_ci 582262306a36Sopenharmony_ci /* hand off request's sparse extent map buffer */ 582362306a36Sopenharmony_ci sr->sr_ext_len = op->extent.sparse_ext_cnt; 582462306a36Sopenharmony_ci op->extent.sparse_ext_cnt = 0; 582562306a36Sopenharmony_ci sr->sr_extent = op->extent.sparse_ext; 582662306a36Sopenharmony_ci op->extent.sparse_ext = NULL; 582762306a36Sopenharmony_ci 582862306a36Sopenharmony_ci spin_unlock(&o->o_requests_lock); 582962306a36Sopenharmony_ci return 1; 583062306a36Sopenharmony_ci} 583162306a36Sopenharmony_ci 583262306a36Sopenharmony_ci#ifdef __BIG_ENDIAN 583362306a36Sopenharmony_cistatic inline void convert_extent_map(struct ceph_sparse_read *sr) 583462306a36Sopenharmony_ci{ 583562306a36Sopenharmony_ci int i; 583662306a36Sopenharmony_ci 583762306a36Sopenharmony_ci for (i = 0; i < sr->sr_count; i++) { 583862306a36Sopenharmony_ci struct ceph_sparse_extent *ext = &sr->sr_extent[i]; 583962306a36Sopenharmony_ci 584062306a36Sopenharmony_ci ext->off = le64_to_cpu((__force __le64)ext->off); 584162306a36Sopenharmony_ci ext->len = le64_to_cpu((__force __le64)ext->len); 584262306a36Sopenharmony_ci } 584362306a36Sopenharmony_ci} 584462306a36Sopenharmony_ci#else 584562306a36Sopenharmony_cistatic inline void convert_extent_map(struct ceph_sparse_read *sr) 584662306a36Sopenharmony_ci{ 584762306a36Sopenharmony_ci} 584862306a36Sopenharmony_ci#endif 584962306a36Sopenharmony_ci 585062306a36Sopenharmony_ci#define MAX_EXTENTS 4096 585162306a36Sopenharmony_ci 585262306a36Sopenharmony_cistatic int osd_sparse_read(struct ceph_connection *con, 585362306a36Sopenharmony_ci struct ceph_msg_data_cursor *cursor, 585462306a36Sopenharmony_ci char **pbuf) 585562306a36Sopenharmony_ci{ 585662306a36Sopenharmony_ci struct ceph_osd *o = con->private; 585762306a36Sopenharmony_ci struct ceph_sparse_read *sr = &o->o_sparse_read; 585862306a36Sopenharmony_ci u32 count = sr->sr_count; 585962306a36Sopenharmony_ci u64 eoff, elen, len = 0; 586062306a36Sopenharmony_ci int i, ret; 586162306a36Sopenharmony_ci 586262306a36Sopenharmony_ci switch (sr->sr_state) { 586362306a36Sopenharmony_ci case CEPH_SPARSE_READ_HDR: 586462306a36Sopenharmony_cinext_op: 586562306a36Sopenharmony_ci ret = prep_next_sparse_read(con, cursor); 586662306a36Sopenharmony_ci if (ret <= 0) 586762306a36Sopenharmony_ci return ret; 586862306a36Sopenharmony_ci 586962306a36Sopenharmony_ci /* number of extents */ 587062306a36Sopenharmony_ci ret = sizeof(sr->sr_count); 587162306a36Sopenharmony_ci *pbuf = (char *)&sr->sr_count; 587262306a36Sopenharmony_ci sr->sr_state = CEPH_SPARSE_READ_EXTENTS; 587362306a36Sopenharmony_ci break; 587462306a36Sopenharmony_ci case CEPH_SPARSE_READ_EXTENTS: 587562306a36Sopenharmony_ci /* Convert sr_count to host-endian */ 587662306a36Sopenharmony_ci count = le32_to_cpu((__force __le32)sr->sr_count); 587762306a36Sopenharmony_ci sr->sr_count = count; 587862306a36Sopenharmony_ci dout("[%d] got %u extents\n", o->o_osd, count); 587962306a36Sopenharmony_ci 588062306a36Sopenharmony_ci if (count > 0) { 588162306a36Sopenharmony_ci if (!sr->sr_extent || count > sr->sr_ext_len) { 588262306a36Sopenharmony_ci /* 588362306a36Sopenharmony_ci * Apply a hard cap to the number of extents. 588462306a36Sopenharmony_ci * If we have more, assume something is wrong. 588562306a36Sopenharmony_ci */ 588662306a36Sopenharmony_ci if (count > MAX_EXTENTS) { 588762306a36Sopenharmony_ci dout("%s: OSD returned 0x%x extents in a single reply!\n", 588862306a36Sopenharmony_ci __func__, count); 588962306a36Sopenharmony_ci return -EREMOTEIO; 589062306a36Sopenharmony_ci } 589162306a36Sopenharmony_ci 589262306a36Sopenharmony_ci /* no extent array provided, or too short */ 589362306a36Sopenharmony_ci kfree(sr->sr_extent); 589462306a36Sopenharmony_ci sr->sr_extent = kmalloc_array(count, 589562306a36Sopenharmony_ci sizeof(*sr->sr_extent), 589662306a36Sopenharmony_ci GFP_NOIO); 589762306a36Sopenharmony_ci if (!sr->sr_extent) 589862306a36Sopenharmony_ci return -ENOMEM; 589962306a36Sopenharmony_ci sr->sr_ext_len = count; 590062306a36Sopenharmony_ci } 590162306a36Sopenharmony_ci ret = count * sizeof(*sr->sr_extent); 590262306a36Sopenharmony_ci *pbuf = (char *)sr->sr_extent; 590362306a36Sopenharmony_ci sr->sr_state = CEPH_SPARSE_READ_DATA_LEN; 590462306a36Sopenharmony_ci break; 590562306a36Sopenharmony_ci } 590662306a36Sopenharmony_ci /* No extents? Read data len */ 590762306a36Sopenharmony_ci fallthrough; 590862306a36Sopenharmony_ci case CEPH_SPARSE_READ_DATA_LEN: 590962306a36Sopenharmony_ci convert_extent_map(sr); 591062306a36Sopenharmony_ci ret = sizeof(sr->sr_datalen); 591162306a36Sopenharmony_ci *pbuf = (char *)&sr->sr_datalen; 591262306a36Sopenharmony_ci sr->sr_state = CEPH_SPARSE_READ_DATA_PRE; 591362306a36Sopenharmony_ci break; 591462306a36Sopenharmony_ci case CEPH_SPARSE_READ_DATA_PRE: 591562306a36Sopenharmony_ci /* Convert sr_datalen to host-endian */ 591662306a36Sopenharmony_ci sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen); 591762306a36Sopenharmony_ci for (i = 0; i < count; i++) 591862306a36Sopenharmony_ci len += sr->sr_extent[i].len; 591962306a36Sopenharmony_ci if (sr->sr_datalen != len) { 592062306a36Sopenharmony_ci pr_warn_ratelimited("data len %u != extent len %llu\n", 592162306a36Sopenharmony_ci sr->sr_datalen, len); 592262306a36Sopenharmony_ci return -EREMOTEIO; 592362306a36Sopenharmony_ci } 592462306a36Sopenharmony_ci sr->sr_state = CEPH_SPARSE_READ_DATA; 592562306a36Sopenharmony_ci fallthrough; 592662306a36Sopenharmony_ci case CEPH_SPARSE_READ_DATA: 592762306a36Sopenharmony_ci if (sr->sr_index >= count) { 592862306a36Sopenharmony_ci sr->sr_state = CEPH_SPARSE_READ_HDR; 592962306a36Sopenharmony_ci goto next_op; 593062306a36Sopenharmony_ci } 593162306a36Sopenharmony_ci 593262306a36Sopenharmony_ci eoff = sr->sr_extent[sr->sr_index].off; 593362306a36Sopenharmony_ci elen = sr->sr_extent[sr->sr_index].len; 593462306a36Sopenharmony_ci 593562306a36Sopenharmony_ci dout("[%d] ext %d off 0x%llx len 0x%llx\n", 593662306a36Sopenharmony_ci o->o_osd, sr->sr_index, eoff, elen); 593762306a36Sopenharmony_ci 593862306a36Sopenharmony_ci if (elen > INT_MAX) { 593962306a36Sopenharmony_ci dout("Sparse read extent length too long (0x%llx)\n", 594062306a36Sopenharmony_ci elen); 594162306a36Sopenharmony_ci return -EREMOTEIO; 594262306a36Sopenharmony_ci } 594362306a36Sopenharmony_ci 594462306a36Sopenharmony_ci /* zero out anything from sr_pos to start of extent */ 594562306a36Sopenharmony_ci if (sr->sr_pos < eoff) 594662306a36Sopenharmony_ci advance_cursor(cursor, eoff - sr->sr_pos, true); 594762306a36Sopenharmony_ci 594862306a36Sopenharmony_ci /* Set position to end of extent */ 594962306a36Sopenharmony_ci sr->sr_pos = eoff + elen; 595062306a36Sopenharmony_ci 595162306a36Sopenharmony_ci /* send back the new length and nullify the ptr */ 595262306a36Sopenharmony_ci cursor->sr_resid = elen; 595362306a36Sopenharmony_ci ret = elen; 595462306a36Sopenharmony_ci *pbuf = NULL; 595562306a36Sopenharmony_ci 595662306a36Sopenharmony_ci /* Bump the array index */ 595762306a36Sopenharmony_ci ++sr->sr_index; 595862306a36Sopenharmony_ci break; 595962306a36Sopenharmony_ci } 596062306a36Sopenharmony_ci return ret; 596162306a36Sopenharmony_ci} 596262306a36Sopenharmony_ci 596362306a36Sopenharmony_cistatic const struct ceph_connection_operations osd_con_ops = { 596462306a36Sopenharmony_ci .get = osd_get_con, 596562306a36Sopenharmony_ci .put = osd_put_con, 596662306a36Sopenharmony_ci .sparse_read = osd_sparse_read, 596762306a36Sopenharmony_ci .alloc_msg = osd_alloc_msg, 596862306a36Sopenharmony_ci .dispatch = osd_dispatch, 596962306a36Sopenharmony_ci .fault = osd_fault, 597062306a36Sopenharmony_ci .reencode_message = osd_reencode_message, 597162306a36Sopenharmony_ci .get_authorizer = osd_get_authorizer, 597262306a36Sopenharmony_ci .add_authorizer_challenge = osd_add_authorizer_challenge, 597362306a36Sopenharmony_ci .verify_authorizer_reply = osd_verify_authorizer_reply, 597462306a36Sopenharmony_ci .invalidate_authorizer = osd_invalidate_authorizer, 597562306a36Sopenharmony_ci .sign_message = osd_sign_message, 597662306a36Sopenharmony_ci .check_message_signature = osd_check_message_signature, 597762306a36Sopenharmony_ci .get_auth_request = osd_get_auth_request, 597862306a36Sopenharmony_ci .handle_auth_reply_more = osd_handle_auth_reply_more, 597962306a36Sopenharmony_ci .handle_auth_done = osd_handle_auth_done, 598062306a36Sopenharmony_ci .handle_auth_bad_method = osd_handle_auth_bad_method, 598162306a36Sopenharmony_ci}; 5982