162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#include <linux/ceph/ceph_debug.h> 362306a36Sopenharmony_ci 462306a36Sopenharmony_ci#include <linux/backing-dev.h> 562306a36Sopenharmony_ci#include <linux/fs.h> 662306a36Sopenharmony_ci#include <linux/mm.h> 762306a36Sopenharmony_ci#include <linux/swap.h> 862306a36Sopenharmony_ci#include <linux/pagemap.h> 962306a36Sopenharmony_ci#include <linux/slab.h> 1062306a36Sopenharmony_ci#include <linux/pagevec.h> 1162306a36Sopenharmony_ci#include <linux/task_io_accounting_ops.h> 1262306a36Sopenharmony_ci#include <linux/signal.h> 1362306a36Sopenharmony_ci#include <linux/iversion.h> 1462306a36Sopenharmony_ci#include <linux/ktime.h> 1562306a36Sopenharmony_ci#include <linux/netfs.h> 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#include "super.h" 1862306a36Sopenharmony_ci#include "mds_client.h" 1962306a36Sopenharmony_ci#include "cache.h" 2062306a36Sopenharmony_ci#include "metric.h" 2162306a36Sopenharmony_ci#include "crypto.h" 2262306a36Sopenharmony_ci#include <linux/ceph/osd_client.h> 2362306a36Sopenharmony_ci#include <linux/ceph/striper.h> 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci/* 2662306a36Sopenharmony_ci * Ceph address space ops. 2762306a36Sopenharmony_ci * 2862306a36Sopenharmony_ci * There are a few funny things going on here. 2962306a36Sopenharmony_ci * 3062306a36Sopenharmony_ci * The page->private field is used to reference a struct 3162306a36Sopenharmony_ci * ceph_snap_context for _every_ dirty page. This indicates which 3262306a36Sopenharmony_ci * snapshot the page was logically dirtied in, and thus which snap 3362306a36Sopenharmony_ci * context needs to be associated with the osd write during writeback. 3462306a36Sopenharmony_ci * 3562306a36Sopenharmony_ci * Similarly, struct ceph_inode_info maintains a set of counters to 3662306a36Sopenharmony_ci * count dirty pages on the inode. In the absence of snapshots, 3762306a36Sopenharmony_ci * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count. 3862306a36Sopenharmony_ci * 3962306a36Sopenharmony_ci * When a snapshot is taken (that is, when the client receives 4062306a36Sopenharmony_ci * notification that a snapshot was taken), each inode with caps and 4162306a36Sopenharmony_ci * with dirty pages (dirty pages implies there is a cap) gets a new 4262306a36Sopenharmony_ci * ceph_cap_snap in the i_cap_snaps list (which is sorted in ascending 4362306a36Sopenharmony_ci * order, new snaps go to the tail). The i_wrbuffer_ref_head count is 4462306a36Sopenharmony_ci * moved to capsnap->dirty. (Unless a sync write is currently in 4562306a36Sopenharmony_ci * progress. In that case, the capsnap is said to be "pending", new 4662306a36Sopenharmony_ci * writes cannot start, and the capsnap isn't "finalized" until the 4762306a36Sopenharmony_ci * write completes (or fails) and a final size/mtime for the inode for 4862306a36Sopenharmony_ci * that snap can be settled upon.) i_wrbuffer_ref_head is reset to 0. 4962306a36Sopenharmony_ci * 5062306a36Sopenharmony_ci * On writeback, we must submit writes to the osd IN SNAP ORDER. So, 5162306a36Sopenharmony_ci * we look for the first capsnap in i_cap_snaps and write out pages in 5262306a36Sopenharmony_ci * that snap context _only_. Then we move on to the next capsnap, 5362306a36Sopenharmony_ci * eventually reaching the "live" or "head" context (i.e., pages that 5462306a36Sopenharmony_ci * are not yet snapped) and are writing the most recently dirtied 5562306a36Sopenharmony_ci * pages. 5662306a36Sopenharmony_ci * 5762306a36Sopenharmony_ci * Invalidate and so forth must take care to ensure the dirty page 5862306a36Sopenharmony_ci * accounting is preserved. 5962306a36Sopenharmony_ci */ 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci#define CONGESTION_ON_THRESH(congestion_kb) (congestion_kb >> (PAGE_SHIFT-10)) 6262306a36Sopenharmony_ci#define CONGESTION_OFF_THRESH(congestion_kb) \ 6362306a36Sopenharmony_ci (CONGESTION_ON_THRESH(congestion_kb) - \ 6462306a36Sopenharmony_ci (CONGESTION_ON_THRESH(congestion_kb) >> 2)) 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_cistatic int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len, 6762306a36Sopenharmony_ci struct folio **foliop, void **_fsdata); 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_cistatic inline struct ceph_snap_context *page_snap_context(struct page *page) 7062306a36Sopenharmony_ci{ 7162306a36Sopenharmony_ci if (PagePrivate(page)) 7262306a36Sopenharmony_ci return (void *)page->private; 7362306a36Sopenharmony_ci return NULL; 7462306a36Sopenharmony_ci} 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci/* 7762306a36Sopenharmony_ci * Dirty a page. Optimistically adjust accounting, on the assumption 7862306a36Sopenharmony_ci * that we won't race with invalidate. If we do, readjust. 7962306a36Sopenharmony_ci */ 8062306a36Sopenharmony_cistatic bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) 8162306a36Sopenharmony_ci{ 8262306a36Sopenharmony_ci struct inode *inode; 8362306a36Sopenharmony_ci struct ceph_inode_info *ci; 8462306a36Sopenharmony_ci struct ceph_snap_context *snapc; 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci if (folio_test_dirty(folio)) { 8762306a36Sopenharmony_ci dout("%p dirty_folio %p idx %lu -- already dirty\n", 8862306a36Sopenharmony_ci mapping->host, folio, folio->index); 8962306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_test_private(folio), folio); 9062306a36Sopenharmony_ci return false; 9162306a36Sopenharmony_ci } 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci inode = mapping->host; 9462306a36Sopenharmony_ci ci = ceph_inode(inode); 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci /* dirty the head */ 9762306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 9862306a36Sopenharmony_ci BUG_ON(ci->i_wr_ref == 0); // caller should hold Fw reference 9962306a36Sopenharmony_ci if (__ceph_have_pending_cap_snap(ci)) { 10062306a36Sopenharmony_ci struct ceph_cap_snap *capsnap = 10162306a36Sopenharmony_ci list_last_entry(&ci->i_cap_snaps, 10262306a36Sopenharmony_ci struct ceph_cap_snap, 10362306a36Sopenharmony_ci ci_item); 10462306a36Sopenharmony_ci snapc = ceph_get_snap_context(capsnap->context); 10562306a36Sopenharmony_ci capsnap->dirty_pages++; 10662306a36Sopenharmony_ci } else { 10762306a36Sopenharmony_ci BUG_ON(!ci->i_head_snapc); 10862306a36Sopenharmony_ci snapc = ceph_get_snap_context(ci->i_head_snapc); 10962306a36Sopenharmony_ci ++ci->i_wrbuffer_ref_head; 11062306a36Sopenharmony_ci } 11162306a36Sopenharmony_ci if (ci->i_wrbuffer_ref == 0) 11262306a36Sopenharmony_ci ihold(inode); 11362306a36Sopenharmony_ci ++ci->i_wrbuffer_ref; 11462306a36Sopenharmony_ci dout("%p dirty_folio %p idx %lu head %d/%d -> %d/%d " 11562306a36Sopenharmony_ci "snapc %p seq %lld (%d snaps)\n", 11662306a36Sopenharmony_ci mapping->host, folio, folio->index, 11762306a36Sopenharmony_ci ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1, 11862306a36Sopenharmony_ci ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, 11962306a36Sopenharmony_ci snapc, snapc->seq, snapc->num_snaps); 12062306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci /* 12362306a36Sopenharmony_ci * Reference snap context in folio->private. Also set 12462306a36Sopenharmony_ci * PagePrivate so that we get invalidate_folio callback. 12562306a36Sopenharmony_ci */ 12662306a36Sopenharmony_ci VM_WARN_ON_FOLIO(folio->private, folio); 12762306a36Sopenharmony_ci folio_attach_private(folio, snapc); 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci return ceph_fscache_dirty_folio(mapping, folio); 13062306a36Sopenharmony_ci} 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci/* 13362306a36Sopenharmony_ci * If we are truncating the full folio (i.e. offset == 0), adjust the 13462306a36Sopenharmony_ci * dirty folio counters appropriately. Only called if there is private 13562306a36Sopenharmony_ci * data on the folio. 13662306a36Sopenharmony_ci */ 13762306a36Sopenharmony_cistatic void ceph_invalidate_folio(struct folio *folio, size_t offset, 13862306a36Sopenharmony_ci size_t length) 13962306a36Sopenharmony_ci{ 14062306a36Sopenharmony_ci struct inode *inode; 14162306a36Sopenharmony_ci struct ceph_inode_info *ci; 14262306a36Sopenharmony_ci struct ceph_snap_context *snapc; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci inode = folio->mapping->host; 14562306a36Sopenharmony_ci ci = ceph_inode(inode); 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci if (offset != 0 || length != folio_size(folio)) { 14862306a36Sopenharmony_ci dout("%p invalidate_folio idx %lu partial dirty page %zu~%zu\n", 14962306a36Sopenharmony_ci inode, folio->index, offset, length); 15062306a36Sopenharmony_ci return; 15162306a36Sopenharmony_ci } 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci WARN_ON(!folio_test_locked(folio)); 15462306a36Sopenharmony_ci if (folio_test_private(folio)) { 15562306a36Sopenharmony_ci dout("%p invalidate_folio idx %lu full dirty page\n", 15662306a36Sopenharmony_ci inode, folio->index); 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci snapc = folio_detach_private(folio); 15962306a36Sopenharmony_ci ceph_put_wrbuffer_cap_refs(ci, 1, snapc); 16062306a36Sopenharmony_ci ceph_put_snap_context(snapc); 16162306a36Sopenharmony_ci } 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci folio_wait_fscache(folio); 16462306a36Sopenharmony_ci} 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_cistatic bool ceph_release_folio(struct folio *folio, gfp_t gfp) 16762306a36Sopenharmony_ci{ 16862306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci dout("%llx:%llx release_folio idx %lu (%sdirty)\n", 17162306a36Sopenharmony_ci ceph_vinop(inode), 17262306a36Sopenharmony_ci folio->index, folio_test_dirty(folio) ? "" : "not "); 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci if (folio_test_private(folio)) 17562306a36Sopenharmony_ci return false; 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci if (folio_test_fscache(folio)) { 17862306a36Sopenharmony_ci if (current_is_kswapd() || !(gfp & __GFP_FS)) 17962306a36Sopenharmony_ci return false; 18062306a36Sopenharmony_ci folio_wait_fscache(folio); 18162306a36Sopenharmony_ci } 18262306a36Sopenharmony_ci ceph_fscache_note_page_release(inode); 18362306a36Sopenharmony_ci return true; 18462306a36Sopenharmony_ci} 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_cistatic void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) 18762306a36Sopenharmony_ci{ 18862306a36Sopenharmony_ci struct inode *inode = rreq->inode; 18962306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 19062306a36Sopenharmony_ci struct ceph_file_layout *lo = &ci->i_layout; 19162306a36Sopenharmony_ci unsigned long max_pages = inode->i_sb->s_bdi->ra_pages; 19262306a36Sopenharmony_ci loff_t end = rreq->start + rreq->len, new_end; 19362306a36Sopenharmony_ci struct ceph_netfs_request_data *priv = rreq->netfs_priv; 19462306a36Sopenharmony_ci unsigned long max_len; 19562306a36Sopenharmony_ci u32 blockoff; 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci if (priv) { 19862306a36Sopenharmony_ci /* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */ 19962306a36Sopenharmony_ci if (priv->file_ra_disabled) 20062306a36Sopenharmony_ci max_pages = 0; 20162306a36Sopenharmony_ci else 20262306a36Sopenharmony_ci max_pages = priv->file_ra_pages; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci } 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci /* Readahead is disabled */ 20762306a36Sopenharmony_ci if (!max_pages) 20862306a36Sopenharmony_ci return; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci max_len = max_pages << PAGE_SHIFT; 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci /* 21362306a36Sopenharmony_ci * Try to expand the length forward by rounding up it to the next 21462306a36Sopenharmony_ci * block, but do not exceed the file size, unless the original 21562306a36Sopenharmony_ci * request already exceeds it. 21662306a36Sopenharmony_ci */ 21762306a36Sopenharmony_ci new_end = min(round_up(end, lo->stripe_unit), rreq->i_size); 21862306a36Sopenharmony_ci if (new_end > end && new_end <= rreq->start + max_len) 21962306a36Sopenharmony_ci rreq->len = new_end - rreq->start; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci /* Try to expand the start downward */ 22262306a36Sopenharmony_ci div_u64_rem(rreq->start, lo->stripe_unit, &blockoff); 22362306a36Sopenharmony_ci if (rreq->len + blockoff <= max_len) { 22462306a36Sopenharmony_ci rreq->start -= blockoff; 22562306a36Sopenharmony_ci rreq->len += blockoff; 22662306a36Sopenharmony_ci } 22762306a36Sopenharmony_ci} 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_cistatic bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq) 23062306a36Sopenharmony_ci{ 23162306a36Sopenharmony_ci struct inode *inode = subreq->rreq->inode; 23262306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 23362306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 23462306a36Sopenharmony_ci u64 objno, objoff; 23562306a36Sopenharmony_ci u32 xlen; 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci /* Truncate the extent at the end of the current block */ 23862306a36Sopenharmony_ci ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len, 23962306a36Sopenharmony_ci &objno, &objoff, &xlen); 24062306a36Sopenharmony_ci subreq->len = min(xlen, fsc->mount_options->rsize); 24162306a36Sopenharmony_ci return true; 24262306a36Sopenharmony_ci} 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_cistatic void finish_netfs_read(struct ceph_osd_request *req) 24562306a36Sopenharmony_ci{ 24662306a36Sopenharmony_ci struct inode *inode = req->r_inode; 24762306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 24862306a36Sopenharmony_ci struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); 24962306a36Sopenharmony_ci struct netfs_io_subrequest *subreq = req->r_priv; 25062306a36Sopenharmony_ci struct ceph_osd_req_op *op = &req->r_ops[0]; 25162306a36Sopenharmony_ci int err = req->r_result; 25262306a36Sopenharmony_ci bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ); 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency, 25562306a36Sopenharmony_ci req->r_end_latency, osd_data->length, err); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci dout("%s: result %d subreq->len=%zu i_size=%lld\n", __func__, req->r_result, 25862306a36Sopenharmony_ci subreq->len, i_size_read(req->r_inode)); 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci /* no object means success but no data */ 26162306a36Sopenharmony_ci if (err == -ENOENT) 26262306a36Sopenharmony_ci err = 0; 26362306a36Sopenharmony_ci else if (err == -EBLOCKLISTED) 26462306a36Sopenharmony_ci fsc->blocklisted = true; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci if (err >= 0) { 26762306a36Sopenharmony_ci if (sparse && err > 0) 26862306a36Sopenharmony_ci err = ceph_sparse_ext_map_end(op); 26962306a36Sopenharmony_ci if (err < subreq->len) 27062306a36Sopenharmony_ci __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 27162306a36Sopenharmony_ci if (IS_ENCRYPTED(inode) && err > 0) { 27262306a36Sopenharmony_ci err = ceph_fscrypt_decrypt_extents(inode, 27362306a36Sopenharmony_ci osd_data->pages, subreq->start, 27462306a36Sopenharmony_ci op->extent.sparse_ext, 27562306a36Sopenharmony_ci op->extent.sparse_ext_cnt); 27662306a36Sopenharmony_ci if (err > subreq->len) 27762306a36Sopenharmony_ci err = subreq->len; 27862306a36Sopenharmony_ci } 27962306a36Sopenharmony_ci } 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { 28262306a36Sopenharmony_ci ceph_put_page_vector(osd_data->pages, 28362306a36Sopenharmony_ci calc_pages_for(osd_data->alignment, 28462306a36Sopenharmony_ci osd_data->length), false); 28562306a36Sopenharmony_ci } 28662306a36Sopenharmony_ci netfs_subreq_terminated(subreq, err, false); 28762306a36Sopenharmony_ci iput(req->r_inode); 28862306a36Sopenharmony_ci ceph_dec_osd_stopping_blocker(fsc->mdsc); 28962306a36Sopenharmony_ci} 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_cistatic bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) 29262306a36Sopenharmony_ci{ 29362306a36Sopenharmony_ci struct netfs_io_request *rreq = subreq->rreq; 29462306a36Sopenharmony_ci struct inode *inode = rreq->inode; 29562306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *rinfo; 29662306a36Sopenharmony_ci struct ceph_mds_reply_info_in *iinfo; 29762306a36Sopenharmony_ci struct ceph_mds_request *req; 29862306a36Sopenharmony_ci struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); 29962306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 30062306a36Sopenharmony_ci struct iov_iter iter; 30162306a36Sopenharmony_ci ssize_t err = 0; 30262306a36Sopenharmony_ci size_t len; 30362306a36Sopenharmony_ci int mode; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 30662306a36Sopenharmony_ci __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci if (subreq->start >= inode->i_size) 30962306a36Sopenharmony_ci goto out; 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci /* We need to fetch the inline data. */ 31262306a36Sopenharmony_ci mode = ceph_try_to_choose_auth_mds(inode, CEPH_STAT_CAP_INLINE_DATA); 31362306a36Sopenharmony_ci req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode); 31462306a36Sopenharmony_ci if (IS_ERR(req)) { 31562306a36Sopenharmony_ci err = PTR_ERR(req); 31662306a36Sopenharmony_ci goto out; 31762306a36Sopenharmony_ci } 31862306a36Sopenharmony_ci req->r_ino1 = ci->i_vino; 31962306a36Sopenharmony_ci req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA); 32062306a36Sopenharmony_ci req->r_num_caps = 2; 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci err = ceph_mdsc_do_request(mdsc, NULL, req); 32362306a36Sopenharmony_ci if (err < 0) 32462306a36Sopenharmony_ci goto out; 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci rinfo = &req->r_reply_info; 32762306a36Sopenharmony_ci iinfo = &rinfo->targeti; 32862306a36Sopenharmony_ci if (iinfo->inline_version == CEPH_INLINE_NONE) { 32962306a36Sopenharmony_ci /* The data got uninlined */ 33062306a36Sopenharmony_ci ceph_mdsc_put_request(req); 33162306a36Sopenharmony_ci return false; 33262306a36Sopenharmony_ci } 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len); 33562306a36Sopenharmony_ci iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len); 33662306a36Sopenharmony_ci err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter); 33762306a36Sopenharmony_ci if (err == 0) 33862306a36Sopenharmony_ci err = -EFAULT; 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci ceph_mdsc_put_request(req); 34162306a36Sopenharmony_ciout: 34262306a36Sopenharmony_ci netfs_subreq_terminated(subreq, err, false); 34362306a36Sopenharmony_ci return true; 34462306a36Sopenharmony_ci} 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_cistatic void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) 34762306a36Sopenharmony_ci{ 34862306a36Sopenharmony_ci struct netfs_io_request *rreq = subreq->rreq; 34962306a36Sopenharmony_ci struct inode *inode = rreq->inode; 35062306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 35162306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 35262306a36Sopenharmony_ci struct ceph_osd_request *req = NULL; 35362306a36Sopenharmony_ci struct ceph_vino vino = ceph_vino(inode); 35462306a36Sopenharmony_ci struct iov_iter iter; 35562306a36Sopenharmony_ci int err = 0; 35662306a36Sopenharmony_ci u64 len = subreq->len; 35762306a36Sopenharmony_ci bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); 35862306a36Sopenharmony_ci u64 off = subreq->start; 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci if (ceph_inode_is_shutdown(inode)) { 36162306a36Sopenharmony_ci err = -EIO; 36262306a36Sopenharmony_ci goto out; 36362306a36Sopenharmony_ci } 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq)) 36662306a36Sopenharmony_ci return; 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci ceph_fscrypt_adjust_off_and_len(inode, &off, &len); 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, 37162306a36Sopenharmony_ci off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ, 37262306a36Sopenharmony_ci CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica, 37362306a36Sopenharmony_ci NULL, ci->i_truncate_seq, ci->i_truncate_size, false); 37462306a36Sopenharmony_ci if (IS_ERR(req)) { 37562306a36Sopenharmony_ci err = PTR_ERR(req); 37662306a36Sopenharmony_ci req = NULL; 37762306a36Sopenharmony_ci goto out; 37862306a36Sopenharmony_ci } 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci if (sparse) { 38162306a36Sopenharmony_ci err = ceph_alloc_sparse_ext_map(&req->r_ops[0]); 38262306a36Sopenharmony_ci if (err) 38362306a36Sopenharmony_ci goto out; 38462306a36Sopenharmony_ci } 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len); 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len); 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci /* 39162306a36Sopenharmony_ci * FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for 39262306a36Sopenharmony_ci * encrypted inodes. We'd need infrastructure that handles an iov_iter 39362306a36Sopenharmony_ci * instead of page arrays, and we don't have that as of yet. Once the 39462306a36Sopenharmony_ci * dust settles on the write helpers and encrypt/decrypt routines for 39562306a36Sopenharmony_ci * netfs, we should be able to rework this. 39662306a36Sopenharmony_ci */ 39762306a36Sopenharmony_ci if (IS_ENCRYPTED(inode)) { 39862306a36Sopenharmony_ci struct page **pages; 39962306a36Sopenharmony_ci size_t page_off; 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off); 40262306a36Sopenharmony_ci if (err < 0) { 40362306a36Sopenharmony_ci dout("%s: iov_ter_get_pages_alloc returned %d\n", 40462306a36Sopenharmony_ci __func__, err); 40562306a36Sopenharmony_ci goto out; 40662306a36Sopenharmony_ci } 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci /* should always give us a page-aligned read */ 40962306a36Sopenharmony_ci WARN_ON_ONCE(page_off); 41062306a36Sopenharmony_ci len = err; 41162306a36Sopenharmony_ci err = 0; 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, 41462306a36Sopenharmony_ci false); 41562306a36Sopenharmony_ci } else { 41662306a36Sopenharmony_ci osd_req_op_extent_osd_iter(req, 0, &iter); 41762306a36Sopenharmony_ci } 41862306a36Sopenharmony_ci if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) { 41962306a36Sopenharmony_ci err = -EIO; 42062306a36Sopenharmony_ci goto out; 42162306a36Sopenharmony_ci } 42262306a36Sopenharmony_ci req->r_callback = finish_netfs_read; 42362306a36Sopenharmony_ci req->r_priv = subreq; 42462306a36Sopenharmony_ci req->r_inode = inode; 42562306a36Sopenharmony_ci ihold(inode); 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci ceph_osdc_start_request(req->r_osdc, req); 42862306a36Sopenharmony_ciout: 42962306a36Sopenharmony_ci ceph_osdc_put_request(req); 43062306a36Sopenharmony_ci if (err) 43162306a36Sopenharmony_ci netfs_subreq_terminated(subreq, err, false); 43262306a36Sopenharmony_ci dout("%s: result %d\n", __func__, err); 43362306a36Sopenharmony_ci} 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_cistatic int ceph_init_request(struct netfs_io_request *rreq, struct file *file) 43662306a36Sopenharmony_ci{ 43762306a36Sopenharmony_ci struct inode *inode = rreq->inode; 43862306a36Sopenharmony_ci int got = 0, want = CEPH_CAP_FILE_CACHE; 43962306a36Sopenharmony_ci struct ceph_netfs_request_data *priv; 44062306a36Sopenharmony_ci int ret = 0; 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci if (rreq->origin != NETFS_READAHEAD) 44362306a36Sopenharmony_ci return 0; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci priv = kzalloc(sizeof(*priv), GFP_NOFS); 44662306a36Sopenharmony_ci if (!priv) 44762306a36Sopenharmony_ci return -ENOMEM; 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci if (file) { 45062306a36Sopenharmony_ci struct ceph_rw_context *rw_ctx; 45162306a36Sopenharmony_ci struct ceph_file_info *fi = file->private_data; 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci priv->file_ra_pages = file->f_ra.ra_pages; 45462306a36Sopenharmony_ci priv->file_ra_disabled = file->f_mode & FMODE_RANDOM; 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci rw_ctx = ceph_find_rw_context(fi); 45762306a36Sopenharmony_ci if (rw_ctx) { 45862306a36Sopenharmony_ci rreq->netfs_priv = priv; 45962306a36Sopenharmony_ci return 0; 46062306a36Sopenharmony_ci } 46162306a36Sopenharmony_ci } 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci /* 46462306a36Sopenharmony_ci * readahead callers do not necessarily hold Fcb caps 46562306a36Sopenharmony_ci * (e.g. fadvise, madvise). 46662306a36Sopenharmony_ci */ 46762306a36Sopenharmony_ci ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got); 46862306a36Sopenharmony_ci if (ret < 0) { 46962306a36Sopenharmony_ci dout("start_read %p, error getting cap\n", inode); 47062306a36Sopenharmony_ci goto out; 47162306a36Sopenharmony_ci } 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci if (!(got & want)) { 47462306a36Sopenharmony_ci dout("start_read %p, no cache cap\n", inode); 47562306a36Sopenharmony_ci ret = -EACCES; 47662306a36Sopenharmony_ci goto out; 47762306a36Sopenharmony_ci } 47862306a36Sopenharmony_ci if (ret == 0) { 47962306a36Sopenharmony_ci ret = -EACCES; 48062306a36Sopenharmony_ci goto out; 48162306a36Sopenharmony_ci } 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci priv->caps = got; 48462306a36Sopenharmony_ci rreq->netfs_priv = priv; 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ciout: 48762306a36Sopenharmony_ci if (ret < 0) 48862306a36Sopenharmony_ci kfree(priv); 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci return ret; 49162306a36Sopenharmony_ci} 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_cistatic void ceph_netfs_free_request(struct netfs_io_request *rreq) 49462306a36Sopenharmony_ci{ 49562306a36Sopenharmony_ci struct ceph_netfs_request_data *priv = rreq->netfs_priv; 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci if (!priv) 49862306a36Sopenharmony_ci return; 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci if (priv->caps) 50162306a36Sopenharmony_ci ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps); 50262306a36Sopenharmony_ci kfree(priv); 50362306a36Sopenharmony_ci rreq->netfs_priv = NULL; 50462306a36Sopenharmony_ci} 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ciconst struct netfs_request_ops ceph_netfs_ops = { 50762306a36Sopenharmony_ci .init_request = ceph_init_request, 50862306a36Sopenharmony_ci .free_request = ceph_netfs_free_request, 50962306a36Sopenharmony_ci .begin_cache_operation = ceph_begin_cache_operation, 51062306a36Sopenharmony_ci .issue_read = ceph_netfs_issue_read, 51162306a36Sopenharmony_ci .expand_readahead = ceph_netfs_expand_readahead, 51262306a36Sopenharmony_ci .clamp_length = ceph_netfs_clamp_length, 51362306a36Sopenharmony_ci .check_write_begin = ceph_netfs_check_write_begin, 51462306a36Sopenharmony_ci}; 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci#ifdef CONFIG_CEPH_FSCACHE 51762306a36Sopenharmony_cistatic void ceph_set_page_fscache(struct page *page) 51862306a36Sopenharmony_ci{ 51962306a36Sopenharmony_ci set_page_fscache(page); 52062306a36Sopenharmony_ci} 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_cistatic void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) 52362306a36Sopenharmony_ci{ 52462306a36Sopenharmony_ci struct inode *inode = priv; 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci if (IS_ERR_VALUE(error) && error != -ENOBUFS) 52762306a36Sopenharmony_ci ceph_fscache_invalidate(inode, false); 52862306a36Sopenharmony_ci} 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_cistatic void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) 53162306a36Sopenharmony_ci{ 53262306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 53362306a36Sopenharmony_ci struct fscache_cookie *cookie = ceph_fscache_cookie(ci); 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode), 53662306a36Sopenharmony_ci ceph_fscache_write_terminated, inode, caching); 53762306a36Sopenharmony_ci} 53862306a36Sopenharmony_ci#else 53962306a36Sopenharmony_cistatic inline void ceph_set_page_fscache(struct page *page) 54062306a36Sopenharmony_ci{ 54162306a36Sopenharmony_ci} 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_cistatic inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) 54462306a36Sopenharmony_ci{ 54562306a36Sopenharmony_ci} 54662306a36Sopenharmony_ci#endif /* CONFIG_CEPH_FSCACHE */ 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_cistruct ceph_writeback_ctl 54962306a36Sopenharmony_ci{ 55062306a36Sopenharmony_ci loff_t i_size; 55162306a36Sopenharmony_ci u64 truncate_size; 55262306a36Sopenharmony_ci u32 truncate_seq; 55362306a36Sopenharmony_ci bool size_stable; 55462306a36Sopenharmony_ci bool head_snapc; 55562306a36Sopenharmony_ci}; 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci/* 55862306a36Sopenharmony_ci * Get ref for the oldest snapc for an inode with dirty data... that is, the 55962306a36Sopenharmony_ci * only snap context we are allowed to write back. 56062306a36Sopenharmony_ci */ 56162306a36Sopenharmony_cistatic struct ceph_snap_context * 56262306a36Sopenharmony_ciget_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl, 56362306a36Sopenharmony_ci struct ceph_snap_context *page_snapc) 56462306a36Sopenharmony_ci{ 56562306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 56662306a36Sopenharmony_ci struct ceph_snap_context *snapc = NULL; 56762306a36Sopenharmony_ci struct ceph_cap_snap *capsnap = NULL; 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 57062306a36Sopenharmony_ci list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 57162306a36Sopenharmony_ci dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, 57262306a36Sopenharmony_ci capsnap->context, capsnap->dirty_pages); 57362306a36Sopenharmony_ci if (!capsnap->dirty_pages) 57462306a36Sopenharmony_ci continue; 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci /* get i_size, truncate_{seq,size} for page_snapc? */ 57762306a36Sopenharmony_ci if (snapc && capsnap->context != page_snapc) 57862306a36Sopenharmony_ci continue; 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci if (ctl) { 58162306a36Sopenharmony_ci if (capsnap->writing) { 58262306a36Sopenharmony_ci ctl->i_size = i_size_read(inode); 58362306a36Sopenharmony_ci ctl->size_stable = false; 58462306a36Sopenharmony_ci } else { 58562306a36Sopenharmony_ci ctl->i_size = capsnap->size; 58662306a36Sopenharmony_ci ctl->size_stable = true; 58762306a36Sopenharmony_ci } 58862306a36Sopenharmony_ci ctl->truncate_size = capsnap->truncate_size; 58962306a36Sopenharmony_ci ctl->truncate_seq = capsnap->truncate_seq; 59062306a36Sopenharmony_ci ctl->head_snapc = false; 59162306a36Sopenharmony_ci } 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci if (snapc) 59462306a36Sopenharmony_ci break; 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci snapc = ceph_get_snap_context(capsnap->context); 59762306a36Sopenharmony_ci if (!page_snapc || 59862306a36Sopenharmony_ci page_snapc == snapc || 59962306a36Sopenharmony_ci page_snapc->seq > snapc->seq) 60062306a36Sopenharmony_ci break; 60162306a36Sopenharmony_ci } 60262306a36Sopenharmony_ci if (!snapc && ci->i_wrbuffer_ref_head) { 60362306a36Sopenharmony_ci snapc = ceph_get_snap_context(ci->i_head_snapc); 60462306a36Sopenharmony_ci dout(" head snapc %p has %d dirty pages\n", 60562306a36Sopenharmony_ci snapc, ci->i_wrbuffer_ref_head); 60662306a36Sopenharmony_ci if (ctl) { 60762306a36Sopenharmony_ci ctl->i_size = i_size_read(inode); 60862306a36Sopenharmony_ci ctl->truncate_size = ci->i_truncate_size; 60962306a36Sopenharmony_ci ctl->truncate_seq = ci->i_truncate_seq; 61062306a36Sopenharmony_ci ctl->size_stable = false; 61162306a36Sopenharmony_ci ctl->head_snapc = true; 61262306a36Sopenharmony_ci } 61362306a36Sopenharmony_ci } 61462306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 61562306a36Sopenharmony_ci return snapc; 61662306a36Sopenharmony_ci} 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_cistatic u64 get_writepages_data_length(struct inode *inode, 61962306a36Sopenharmony_ci struct page *page, u64 start) 62062306a36Sopenharmony_ci{ 62162306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 62262306a36Sopenharmony_ci struct ceph_snap_context *snapc; 62362306a36Sopenharmony_ci struct ceph_cap_snap *capsnap = NULL; 62462306a36Sopenharmony_ci u64 end = i_size_read(inode); 62562306a36Sopenharmony_ci u64 ret; 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci snapc = page_snap_context(ceph_fscrypt_pagecache_page(page)); 62862306a36Sopenharmony_ci if (snapc != ci->i_head_snapc) { 62962306a36Sopenharmony_ci bool found = false; 63062306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 63162306a36Sopenharmony_ci list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 63262306a36Sopenharmony_ci if (capsnap->context == snapc) { 63362306a36Sopenharmony_ci if (!capsnap->writing) 63462306a36Sopenharmony_ci end = capsnap->size; 63562306a36Sopenharmony_ci found = true; 63662306a36Sopenharmony_ci break; 63762306a36Sopenharmony_ci } 63862306a36Sopenharmony_ci } 63962306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 64062306a36Sopenharmony_ci WARN_ON(!found); 64162306a36Sopenharmony_ci } 64262306a36Sopenharmony_ci if (end > ceph_fscrypt_page_offset(page) + thp_size(page)) 64362306a36Sopenharmony_ci end = ceph_fscrypt_page_offset(page) + thp_size(page); 64462306a36Sopenharmony_ci ret = end > start ? end - start : 0; 64562306a36Sopenharmony_ci if (ret && fscrypt_is_bounce_page(page)) 64662306a36Sopenharmony_ci ret = round_up(ret, CEPH_FSCRYPT_BLOCK_SIZE); 64762306a36Sopenharmony_ci return ret; 64862306a36Sopenharmony_ci} 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci/* 65162306a36Sopenharmony_ci * Write a single page, but leave the page locked. 65262306a36Sopenharmony_ci * 65362306a36Sopenharmony_ci * If we get a write error, mark the mapping for error, but still adjust the 65462306a36Sopenharmony_ci * dirty page accounting (i.e., page is no longer dirty). 65562306a36Sopenharmony_ci */ 65662306a36Sopenharmony_cistatic int writepage_nounlock(struct page *page, struct writeback_control *wbc) 65762306a36Sopenharmony_ci{ 65862306a36Sopenharmony_ci struct folio *folio = page_folio(page); 65962306a36Sopenharmony_ci struct inode *inode = page->mapping->host; 66062306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 66162306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 66262306a36Sopenharmony_ci struct ceph_snap_context *snapc, *oldest; 66362306a36Sopenharmony_ci loff_t page_off = page_offset(page); 66462306a36Sopenharmony_ci int err; 66562306a36Sopenharmony_ci loff_t len = thp_size(page); 66662306a36Sopenharmony_ci loff_t wlen; 66762306a36Sopenharmony_ci struct ceph_writeback_ctl ceph_wbc; 66862306a36Sopenharmony_ci struct ceph_osd_client *osdc = &fsc->client->osdc; 66962306a36Sopenharmony_ci struct ceph_osd_request *req; 67062306a36Sopenharmony_ci bool caching = ceph_is_cache_enabled(inode); 67162306a36Sopenharmony_ci struct page *bounce_page = NULL; 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci dout("writepage %p idx %lu\n", page, page->index); 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci if (ceph_inode_is_shutdown(inode)) 67662306a36Sopenharmony_ci return -EIO; 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci /* verify this is a writeable snap context */ 67962306a36Sopenharmony_ci snapc = page_snap_context(page); 68062306a36Sopenharmony_ci if (!snapc) { 68162306a36Sopenharmony_ci dout("writepage %p page %p not dirty?\n", inode, page); 68262306a36Sopenharmony_ci return 0; 68362306a36Sopenharmony_ci } 68462306a36Sopenharmony_ci oldest = get_oldest_context(inode, &ceph_wbc, snapc); 68562306a36Sopenharmony_ci if (snapc->seq > oldest->seq) { 68662306a36Sopenharmony_ci dout("writepage %p page %p snapc %p not writeable - noop\n", 68762306a36Sopenharmony_ci inode, page, snapc); 68862306a36Sopenharmony_ci /* we should only noop if called by kswapd */ 68962306a36Sopenharmony_ci WARN_ON(!(current->flags & PF_MEMALLOC)); 69062306a36Sopenharmony_ci ceph_put_snap_context(oldest); 69162306a36Sopenharmony_ci redirty_page_for_writepage(wbc, page); 69262306a36Sopenharmony_ci return 0; 69362306a36Sopenharmony_ci } 69462306a36Sopenharmony_ci ceph_put_snap_context(oldest); 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci /* is this a partial page at end of file? */ 69762306a36Sopenharmony_ci if (page_off >= ceph_wbc.i_size) { 69862306a36Sopenharmony_ci dout("folio at %lu beyond eof %llu\n", folio->index, 69962306a36Sopenharmony_ci ceph_wbc.i_size); 70062306a36Sopenharmony_ci folio_invalidate(folio, 0, folio_size(folio)); 70162306a36Sopenharmony_ci return 0; 70262306a36Sopenharmony_ci } 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ci if (ceph_wbc.i_size < page_off + len) 70562306a36Sopenharmony_ci len = ceph_wbc.i_size - page_off; 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci wlen = IS_ENCRYPTED(inode) ? round_up(len, CEPH_FSCRYPT_BLOCK_SIZE) : len; 70862306a36Sopenharmony_ci dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n", 70962306a36Sopenharmony_ci inode, page, page->index, page_off, wlen, snapc, snapc->seq); 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci if (atomic_long_inc_return(&fsc->writeback_count) > 71262306a36Sopenharmony_ci CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) 71362306a36Sopenharmony_ci fsc->write_congested = true; 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), 71662306a36Sopenharmony_ci page_off, &wlen, 0, 1, CEPH_OSD_OP_WRITE, 71762306a36Sopenharmony_ci CEPH_OSD_FLAG_WRITE, snapc, 71862306a36Sopenharmony_ci ceph_wbc.truncate_seq, 71962306a36Sopenharmony_ci ceph_wbc.truncate_size, true); 72062306a36Sopenharmony_ci if (IS_ERR(req)) { 72162306a36Sopenharmony_ci redirty_page_for_writepage(wbc, page); 72262306a36Sopenharmony_ci return PTR_ERR(req); 72362306a36Sopenharmony_ci } 72462306a36Sopenharmony_ci 72562306a36Sopenharmony_ci if (wlen < len) 72662306a36Sopenharmony_ci len = wlen; 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci set_page_writeback(page); 72962306a36Sopenharmony_ci if (caching) 73062306a36Sopenharmony_ci ceph_set_page_fscache(page); 73162306a36Sopenharmony_ci ceph_fscache_write_to_cache(inode, page_off, len, caching); 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci if (IS_ENCRYPTED(inode)) { 73462306a36Sopenharmony_ci bounce_page = fscrypt_encrypt_pagecache_blocks(page, 73562306a36Sopenharmony_ci CEPH_FSCRYPT_BLOCK_SIZE, 0, 73662306a36Sopenharmony_ci GFP_NOFS); 73762306a36Sopenharmony_ci if (IS_ERR(bounce_page)) { 73862306a36Sopenharmony_ci redirty_page_for_writepage(wbc, page); 73962306a36Sopenharmony_ci end_page_writeback(page); 74062306a36Sopenharmony_ci ceph_osdc_put_request(req); 74162306a36Sopenharmony_ci return PTR_ERR(bounce_page); 74262306a36Sopenharmony_ci } 74362306a36Sopenharmony_ci } 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci /* it may be a short write due to an object boundary */ 74662306a36Sopenharmony_ci WARN_ON_ONCE(len > thp_size(page)); 74762306a36Sopenharmony_ci osd_req_op_extent_osd_data_pages(req, 0, 74862306a36Sopenharmony_ci bounce_page ? &bounce_page : &page, wlen, 0, 74962306a36Sopenharmony_ci false, false); 75062306a36Sopenharmony_ci dout("writepage %llu~%llu (%llu bytes, %sencrypted)\n", 75162306a36Sopenharmony_ci page_off, len, wlen, IS_ENCRYPTED(inode) ? "" : "not "); 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci req->r_mtime = inode->i_mtime; 75462306a36Sopenharmony_ci ceph_osdc_start_request(osdc, req); 75562306a36Sopenharmony_ci err = ceph_osdc_wait_request(osdc, req); 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency, 75862306a36Sopenharmony_ci req->r_end_latency, len, err); 75962306a36Sopenharmony_ci fscrypt_free_bounce_page(bounce_page); 76062306a36Sopenharmony_ci ceph_osdc_put_request(req); 76162306a36Sopenharmony_ci if (err == 0) 76262306a36Sopenharmony_ci err = len; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci if (err < 0) { 76562306a36Sopenharmony_ci struct writeback_control tmp_wbc; 76662306a36Sopenharmony_ci if (!wbc) 76762306a36Sopenharmony_ci wbc = &tmp_wbc; 76862306a36Sopenharmony_ci if (err == -ERESTARTSYS) { 76962306a36Sopenharmony_ci /* killed by SIGKILL */ 77062306a36Sopenharmony_ci dout("writepage interrupted page %p\n", page); 77162306a36Sopenharmony_ci redirty_page_for_writepage(wbc, page); 77262306a36Sopenharmony_ci end_page_writeback(page); 77362306a36Sopenharmony_ci return err; 77462306a36Sopenharmony_ci } 77562306a36Sopenharmony_ci if (err == -EBLOCKLISTED) 77662306a36Sopenharmony_ci fsc->blocklisted = true; 77762306a36Sopenharmony_ci dout("writepage setting page/mapping error %d %p\n", 77862306a36Sopenharmony_ci err, page); 77962306a36Sopenharmony_ci mapping_set_error(&inode->i_data, err); 78062306a36Sopenharmony_ci wbc->pages_skipped++; 78162306a36Sopenharmony_ci } else { 78262306a36Sopenharmony_ci dout("writepage cleaned page %p\n", page); 78362306a36Sopenharmony_ci err = 0; /* vfs expects us to return 0 */ 78462306a36Sopenharmony_ci } 78562306a36Sopenharmony_ci oldest = detach_page_private(page); 78662306a36Sopenharmony_ci WARN_ON_ONCE(oldest != snapc); 78762306a36Sopenharmony_ci end_page_writeback(page); 78862306a36Sopenharmony_ci ceph_put_wrbuffer_cap_refs(ci, 1, snapc); 78962306a36Sopenharmony_ci ceph_put_snap_context(snapc); /* page's reference */ 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_ci if (atomic_long_dec_return(&fsc->writeback_count) < 79262306a36Sopenharmony_ci CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb)) 79362306a36Sopenharmony_ci fsc->write_congested = false; 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci return err; 79662306a36Sopenharmony_ci} 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_cistatic int ceph_writepage(struct page *page, struct writeback_control *wbc) 79962306a36Sopenharmony_ci{ 80062306a36Sopenharmony_ci int err; 80162306a36Sopenharmony_ci struct inode *inode = page->mapping->host; 80262306a36Sopenharmony_ci BUG_ON(!inode); 80362306a36Sopenharmony_ci ihold(inode); 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_NONE && 80662306a36Sopenharmony_ci ceph_inode_to_client(inode)->write_congested) 80762306a36Sopenharmony_ci return AOP_WRITEPAGE_ACTIVATE; 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci wait_on_page_fscache(page); 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci err = writepage_nounlock(page, wbc); 81262306a36Sopenharmony_ci if (err == -ERESTARTSYS) { 81362306a36Sopenharmony_ci /* direct memory reclaimer was killed by SIGKILL. return 0 81462306a36Sopenharmony_ci * to prevent caller from setting mapping/page error */ 81562306a36Sopenharmony_ci err = 0; 81662306a36Sopenharmony_ci } 81762306a36Sopenharmony_ci unlock_page(page); 81862306a36Sopenharmony_ci iput(inode); 81962306a36Sopenharmony_ci return err; 82062306a36Sopenharmony_ci} 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci/* 82362306a36Sopenharmony_ci * async writeback completion handler. 82462306a36Sopenharmony_ci * 82562306a36Sopenharmony_ci * If we get an error, set the mapping error bit, but not the individual 82662306a36Sopenharmony_ci * page error bits. 82762306a36Sopenharmony_ci */ 82862306a36Sopenharmony_cistatic void writepages_finish(struct ceph_osd_request *req) 82962306a36Sopenharmony_ci{ 83062306a36Sopenharmony_ci struct inode *inode = req->r_inode; 83162306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 83262306a36Sopenharmony_ci struct ceph_osd_data *osd_data; 83362306a36Sopenharmony_ci struct page *page; 83462306a36Sopenharmony_ci int num_pages, total_pages = 0; 83562306a36Sopenharmony_ci int i, j; 83662306a36Sopenharmony_ci int rc = req->r_result; 83762306a36Sopenharmony_ci struct ceph_snap_context *snapc = req->r_snapc; 83862306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 83962306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 84062306a36Sopenharmony_ci unsigned int len = 0; 84162306a36Sopenharmony_ci bool remove_page; 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_ci dout("writepages_finish %p rc %d\n", inode, rc); 84462306a36Sopenharmony_ci if (rc < 0) { 84562306a36Sopenharmony_ci mapping_set_error(mapping, rc); 84662306a36Sopenharmony_ci ceph_set_error_write(ci); 84762306a36Sopenharmony_ci if (rc == -EBLOCKLISTED) 84862306a36Sopenharmony_ci fsc->blocklisted = true; 84962306a36Sopenharmony_ci } else { 85062306a36Sopenharmony_ci ceph_clear_error_write(ci); 85162306a36Sopenharmony_ci } 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci /* 85462306a36Sopenharmony_ci * We lost the cache cap, need to truncate the page before 85562306a36Sopenharmony_ci * it is unlocked, otherwise we'd truncate it later in the 85662306a36Sopenharmony_ci * page truncation thread, possibly losing some data that 85762306a36Sopenharmony_ci * raced its way in 85862306a36Sopenharmony_ci */ 85962306a36Sopenharmony_ci remove_page = !(ceph_caps_issued(ci) & 86062306a36Sopenharmony_ci (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)); 86162306a36Sopenharmony_ci 86262306a36Sopenharmony_ci /* clean all pages */ 86362306a36Sopenharmony_ci for (i = 0; i < req->r_num_ops; i++) { 86462306a36Sopenharmony_ci if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) { 86562306a36Sopenharmony_ci pr_warn("%s incorrect op %d req %p index %d tid %llu\n", 86662306a36Sopenharmony_ci __func__, req->r_ops[i].op, req, i, req->r_tid); 86762306a36Sopenharmony_ci break; 86862306a36Sopenharmony_ci } 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci osd_data = osd_req_op_extent_osd_data(req, i); 87162306a36Sopenharmony_ci BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES); 87262306a36Sopenharmony_ci len += osd_data->length; 87362306a36Sopenharmony_ci num_pages = calc_pages_for((u64)osd_data->alignment, 87462306a36Sopenharmony_ci (u64)osd_data->length); 87562306a36Sopenharmony_ci total_pages += num_pages; 87662306a36Sopenharmony_ci for (j = 0; j < num_pages; j++) { 87762306a36Sopenharmony_ci page = osd_data->pages[j]; 87862306a36Sopenharmony_ci if (fscrypt_is_bounce_page(page)) { 87962306a36Sopenharmony_ci page = fscrypt_pagecache_page(page); 88062306a36Sopenharmony_ci fscrypt_free_bounce_page(osd_data->pages[j]); 88162306a36Sopenharmony_ci osd_data->pages[j] = page; 88262306a36Sopenharmony_ci } 88362306a36Sopenharmony_ci BUG_ON(!page); 88462306a36Sopenharmony_ci WARN_ON(!PageUptodate(page)); 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci if (atomic_long_dec_return(&fsc->writeback_count) < 88762306a36Sopenharmony_ci CONGESTION_OFF_THRESH( 88862306a36Sopenharmony_ci fsc->mount_options->congestion_kb)) 88962306a36Sopenharmony_ci fsc->write_congested = false; 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci ceph_put_snap_context(detach_page_private(page)); 89262306a36Sopenharmony_ci end_page_writeback(page); 89362306a36Sopenharmony_ci dout("unlocking %p\n", page); 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci if (remove_page) 89662306a36Sopenharmony_ci generic_error_remove_page(inode->i_mapping, 89762306a36Sopenharmony_ci page); 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_ci unlock_page(page); 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n", 90262306a36Sopenharmony_ci inode, osd_data->length, rc >= 0 ? num_pages : 0); 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci release_pages(osd_data->pages, num_pages); 90562306a36Sopenharmony_ci } 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency, 90862306a36Sopenharmony_ci req->r_end_latency, len, rc); 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ci ceph_put_wrbuffer_cap_refs(ci, total_pages, snapc); 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci osd_data = osd_req_op_extent_osd_data(req, 0); 91362306a36Sopenharmony_ci if (osd_data->pages_from_pool) 91462306a36Sopenharmony_ci mempool_free(osd_data->pages, ceph_wb_pagevec_pool); 91562306a36Sopenharmony_ci else 91662306a36Sopenharmony_ci kfree(osd_data->pages); 91762306a36Sopenharmony_ci ceph_osdc_put_request(req); 91862306a36Sopenharmony_ci ceph_dec_osd_stopping_blocker(fsc->mdsc); 91962306a36Sopenharmony_ci} 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci/* 92262306a36Sopenharmony_ci * initiate async writeback 92362306a36Sopenharmony_ci */ 92462306a36Sopenharmony_cistatic int ceph_writepages_start(struct address_space *mapping, 92562306a36Sopenharmony_ci struct writeback_control *wbc) 92662306a36Sopenharmony_ci{ 92762306a36Sopenharmony_ci struct inode *inode = mapping->host; 92862306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 92962306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 93062306a36Sopenharmony_ci struct ceph_vino vino = ceph_vino(inode); 93162306a36Sopenharmony_ci pgoff_t index, start_index, end = -1; 93262306a36Sopenharmony_ci struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; 93362306a36Sopenharmony_ci struct folio_batch fbatch; 93462306a36Sopenharmony_ci int rc = 0; 93562306a36Sopenharmony_ci unsigned int wsize = i_blocksize(inode); 93662306a36Sopenharmony_ci struct ceph_osd_request *req = NULL; 93762306a36Sopenharmony_ci struct ceph_writeback_ctl ceph_wbc; 93862306a36Sopenharmony_ci bool should_loop, range_whole = false; 93962306a36Sopenharmony_ci bool done = false; 94062306a36Sopenharmony_ci bool caching = ceph_is_cache_enabled(inode); 94162306a36Sopenharmony_ci xa_mark_t tag; 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_NONE && 94462306a36Sopenharmony_ci fsc->write_congested) 94562306a36Sopenharmony_ci return 0; 94662306a36Sopenharmony_ci 94762306a36Sopenharmony_ci dout("writepages_start %p (mode=%s)\n", inode, 94862306a36Sopenharmony_ci wbc->sync_mode == WB_SYNC_NONE ? "NONE" : 94962306a36Sopenharmony_ci (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ci if (ceph_inode_is_shutdown(inode)) { 95262306a36Sopenharmony_ci if (ci->i_wrbuffer_ref > 0) { 95362306a36Sopenharmony_ci pr_warn_ratelimited( 95462306a36Sopenharmony_ci "writepage_start %p %lld forced umount\n", 95562306a36Sopenharmony_ci inode, ceph_ino(inode)); 95662306a36Sopenharmony_ci } 95762306a36Sopenharmony_ci mapping_set_error(mapping, -EIO); 95862306a36Sopenharmony_ci return -EIO; /* we're in a forced umount, don't write! */ 95962306a36Sopenharmony_ci } 96062306a36Sopenharmony_ci if (fsc->mount_options->wsize < wsize) 96162306a36Sopenharmony_ci wsize = fsc->mount_options->wsize; 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci folio_batch_init(&fbatch); 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci start_index = wbc->range_cyclic ? mapping->writeback_index : 0; 96662306a36Sopenharmony_ci index = start_index; 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) { 96962306a36Sopenharmony_ci tag = PAGECACHE_TAG_TOWRITE; 97062306a36Sopenharmony_ci } else { 97162306a36Sopenharmony_ci tag = PAGECACHE_TAG_DIRTY; 97262306a36Sopenharmony_ci } 97362306a36Sopenharmony_ciretry: 97462306a36Sopenharmony_ci /* find oldest snap context with dirty data */ 97562306a36Sopenharmony_ci snapc = get_oldest_context(inode, &ceph_wbc, NULL); 97662306a36Sopenharmony_ci if (!snapc) { 97762306a36Sopenharmony_ci /* hmm, why does writepages get called when there 97862306a36Sopenharmony_ci is no dirty data? */ 97962306a36Sopenharmony_ci dout(" no snap context with dirty data?\n"); 98062306a36Sopenharmony_ci goto out; 98162306a36Sopenharmony_ci } 98262306a36Sopenharmony_ci dout(" oldest snapc is %p seq %lld (%d snaps)\n", 98362306a36Sopenharmony_ci snapc, snapc->seq, snapc->num_snaps); 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci should_loop = false; 98662306a36Sopenharmony_ci if (ceph_wbc.head_snapc && snapc != last_snapc) { 98762306a36Sopenharmony_ci /* where to start/end? */ 98862306a36Sopenharmony_ci if (wbc->range_cyclic) { 98962306a36Sopenharmony_ci index = start_index; 99062306a36Sopenharmony_ci end = -1; 99162306a36Sopenharmony_ci if (index > 0) 99262306a36Sopenharmony_ci should_loop = true; 99362306a36Sopenharmony_ci dout(" cyclic, start at %lu\n", index); 99462306a36Sopenharmony_ci } else { 99562306a36Sopenharmony_ci index = wbc->range_start >> PAGE_SHIFT; 99662306a36Sopenharmony_ci end = wbc->range_end >> PAGE_SHIFT; 99762306a36Sopenharmony_ci if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 99862306a36Sopenharmony_ci range_whole = true; 99962306a36Sopenharmony_ci dout(" not cyclic, %lu to %lu\n", index, end); 100062306a36Sopenharmony_ci } 100162306a36Sopenharmony_ci } else if (!ceph_wbc.head_snapc) { 100262306a36Sopenharmony_ci /* Do not respect wbc->range_{start,end}. Dirty pages 100362306a36Sopenharmony_ci * in that range can be associated with newer snapc. 100462306a36Sopenharmony_ci * They are not writeable until we write all dirty pages 100562306a36Sopenharmony_ci * associated with 'snapc' get written */ 100662306a36Sopenharmony_ci if (index > 0) 100762306a36Sopenharmony_ci should_loop = true; 100862306a36Sopenharmony_ci dout(" non-head snapc, range whole\n"); 100962306a36Sopenharmony_ci } 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 101262306a36Sopenharmony_ci tag_pages_for_writeback(mapping, index, end); 101362306a36Sopenharmony_ci 101462306a36Sopenharmony_ci ceph_put_snap_context(last_snapc); 101562306a36Sopenharmony_ci last_snapc = snapc; 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci while (!done && index <= end) { 101862306a36Sopenharmony_ci int num_ops = 0, op_idx; 101962306a36Sopenharmony_ci unsigned i, nr_folios, max_pages, locked_pages = 0; 102062306a36Sopenharmony_ci struct page **pages = NULL, **data_pages; 102162306a36Sopenharmony_ci struct page *page; 102262306a36Sopenharmony_ci pgoff_t strip_unit_end = 0; 102362306a36Sopenharmony_ci u64 offset = 0, len = 0; 102462306a36Sopenharmony_ci bool from_pool = false; 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_ci max_pages = wsize >> PAGE_SHIFT; 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ciget_more_pages: 102962306a36Sopenharmony_ci nr_folios = filemap_get_folios_tag(mapping, &index, 103062306a36Sopenharmony_ci end, tag, &fbatch); 103162306a36Sopenharmony_ci dout("pagevec_lookup_range_tag got %d\n", nr_folios); 103262306a36Sopenharmony_ci if (!nr_folios && !locked_pages) 103362306a36Sopenharmony_ci break; 103462306a36Sopenharmony_ci for (i = 0; i < nr_folios && locked_pages < max_pages; i++) { 103562306a36Sopenharmony_ci page = &fbatch.folios[i]->page; 103662306a36Sopenharmony_ci dout("? %p idx %lu\n", page, page->index); 103762306a36Sopenharmony_ci if (locked_pages == 0) 103862306a36Sopenharmony_ci lock_page(page); /* first page */ 103962306a36Sopenharmony_ci else if (!trylock_page(page)) 104062306a36Sopenharmony_ci break; 104162306a36Sopenharmony_ci 104262306a36Sopenharmony_ci /* only dirty pages, or our accounting breaks */ 104362306a36Sopenharmony_ci if (unlikely(!PageDirty(page)) || 104462306a36Sopenharmony_ci unlikely(page->mapping != mapping)) { 104562306a36Sopenharmony_ci dout("!dirty or !mapping %p\n", page); 104662306a36Sopenharmony_ci unlock_page(page); 104762306a36Sopenharmony_ci continue; 104862306a36Sopenharmony_ci } 104962306a36Sopenharmony_ci /* only if matching snap context */ 105062306a36Sopenharmony_ci pgsnapc = page_snap_context(page); 105162306a36Sopenharmony_ci if (pgsnapc != snapc) { 105262306a36Sopenharmony_ci dout("page snapc %p %lld != oldest %p %lld\n", 105362306a36Sopenharmony_ci pgsnapc, pgsnapc->seq, snapc, snapc->seq); 105462306a36Sopenharmony_ci if (!should_loop && 105562306a36Sopenharmony_ci !ceph_wbc.head_snapc && 105662306a36Sopenharmony_ci wbc->sync_mode != WB_SYNC_NONE) 105762306a36Sopenharmony_ci should_loop = true; 105862306a36Sopenharmony_ci unlock_page(page); 105962306a36Sopenharmony_ci continue; 106062306a36Sopenharmony_ci } 106162306a36Sopenharmony_ci if (page_offset(page) >= ceph_wbc.i_size) { 106262306a36Sopenharmony_ci struct folio *folio = page_folio(page); 106362306a36Sopenharmony_ci 106462306a36Sopenharmony_ci dout("folio at %lu beyond eof %llu\n", 106562306a36Sopenharmony_ci folio->index, ceph_wbc.i_size); 106662306a36Sopenharmony_ci if ((ceph_wbc.size_stable || 106762306a36Sopenharmony_ci folio_pos(folio) >= i_size_read(inode)) && 106862306a36Sopenharmony_ci folio_clear_dirty_for_io(folio)) 106962306a36Sopenharmony_ci folio_invalidate(folio, 0, 107062306a36Sopenharmony_ci folio_size(folio)); 107162306a36Sopenharmony_ci folio_unlock(folio); 107262306a36Sopenharmony_ci continue; 107362306a36Sopenharmony_ci } 107462306a36Sopenharmony_ci if (strip_unit_end && (page->index > strip_unit_end)) { 107562306a36Sopenharmony_ci dout("end of strip unit %p\n", page); 107662306a36Sopenharmony_ci unlock_page(page); 107762306a36Sopenharmony_ci break; 107862306a36Sopenharmony_ci } 107962306a36Sopenharmony_ci if (PageWriteback(page) || PageFsCache(page)) { 108062306a36Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_NONE) { 108162306a36Sopenharmony_ci dout("%p under writeback\n", page); 108262306a36Sopenharmony_ci unlock_page(page); 108362306a36Sopenharmony_ci continue; 108462306a36Sopenharmony_ci } 108562306a36Sopenharmony_ci dout("waiting on writeback %p\n", page); 108662306a36Sopenharmony_ci wait_on_page_writeback(page); 108762306a36Sopenharmony_ci wait_on_page_fscache(page); 108862306a36Sopenharmony_ci } 108962306a36Sopenharmony_ci 109062306a36Sopenharmony_ci if (!clear_page_dirty_for_io(page)) { 109162306a36Sopenharmony_ci dout("%p !clear_page_dirty_for_io\n", page); 109262306a36Sopenharmony_ci unlock_page(page); 109362306a36Sopenharmony_ci continue; 109462306a36Sopenharmony_ci } 109562306a36Sopenharmony_ci 109662306a36Sopenharmony_ci /* 109762306a36Sopenharmony_ci * We have something to write. If this is 109862306a36Sopenharmony_ci * the first locked page this time through, 109962306a36Sopenharmony_ci * calculate max possinle write size and 110062306a36Sopenharmony_ci * allocate a page array 110162306a36Sopenharmony_ci */ 110262306a36Sopenharmony_ci if (locked_pages == 0) { 110362306a36Sopenharmony_ci u64 objnum; 110462306a36Sopenharmony_ci u64 objoff; 110562306a36Sopenharmony_ci u32 xlen; 110662306a36Sopenharmony_ci 110762306a36Sopenharmony_ci /* prepare async write request */ 110862306a36Sopenharmony_ci offset = (u64)page_offset(page); 110962306a36Sopenharmony_ci ceph_calc_file_object_mapping(&ci->i_layout, 111062306a36Sopenharmony_ci offset, wsize, 111162306a36Sopenharmony_ci &objnum, &objoff, 111262306a36Sopenharmony_ci &xlen); 111362306a36Sopenharmony_ci len = xlen; 111462306a36Sopenharmony_ci 111562306a36Sopenharmony_ci num_ops = 1; 111662306a36Sopenharmony_ci strip_unit_end = page->index + 111762306a36Sopenharmony_ci ((len - 1) >> PAGE_SHIFT); 111862306a36Sopenharmony_ci 111962306a36Sopenharmony_ci BUG_ON(pages); 112062306a36Sopenharmony_ci max_pages = calc_pages_for(0, (u64)len); 112162306a36Sopenharmony_ci pages = kmalloc_array(max_pages, 112262306a36Sopenharmony_ci sizeof(*pages), 112362306a36Sopenharmony_ci GFP_NOFS); 112462306a36Sopenharmony_ci if (!pages) { 112562306a36Sopenharmony_ci from_pool = true; 112662306a36Sopenharmony_ci pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS); 112762306a36Sopenharmony_ci BUG_ON(!pages); 112862306a36Sopenharmony_ci } 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_ci len = 0; 113162306a36Sopenharmony_ci } else if (page->index != 113262306a36Sopenharmony_ci (offset + len) >> PAGE_SHIFT) { 113362306a36Sopenharmony_ci if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS : 113462306a36Sopenharmony_ci CEPH_OSD_MAX_OPS)) { 113562306a36Sopenharmony_ci redirty_page_for_writepage(wbc, page); 113662306a36Sopenharmony_ci unlock_page(page); 113762306a36Sopenharmony_ci break; 113862306a36Sopenharmony_ci } 113962306a36Sopenharmony_ci 114062306a36Sopenharmony_ci num_ops++; 114162306a36Sopenharmony_ci offset = (u64)page_offset(page); 114262306a36Sopenharmony_ci len = 0; 114362306a36Sopenharmony_ci } 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci /* note position of first page in fbatch */ 114662306a36Sopenharmony_ci dout("%p will write page %p idx %lu\n", 114762306a36Sopenharmony_ci inode, page, page->index); 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci if (atomic_long_inc_return(&fsc->writeback_count) > 115062306a36Sopenharmony_ci CONGESTION_ON_THRESH( 115162306a36Sopenharmony_ci fsc->mount_options->congestion_kb)) 115262306a36Sopenharmony_ci fsc->write_congested = true; 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci if (IS_ENCRYPTED(inode)) { 115562306a36Sopenharmony_ci pages[locked_pages] = 115662306a36Sopenharmony_ci fscrypt_encrypt_pagecache_blocks(page, 115762306a36Sopenharmony_ci PAGE_SIZE, 0, 115862306a36Sopenharmony_ci locked_pages ? GFP_NOWAIT : GFP_NOFS); 115962306a36Sopenharmony_ci if (IS_ERR(pages[locked_pages])) { 116062306a36Sopenharmony_ci if (PTR_ERR(pages[locked_pages]) == -EINVAL) 116162306a36Sopenharmony_ci pr_err("%s: inode->i_blkbits=%hhu\n", 116262306a36Sopenharmony_ci __func__, inode->i_blkbits); 116362306a36Sopenharmony_ci /* better not fail on first page! */ 116462306a36Sopenharmony_ci BUG_ON(locked_pages == 0); 116562306a36Sopenharmony_ci pages[locked_pages] = NULL; 116662306a36Sopenharmony_ci redirty_page_for_writepage(wbc, page); 116762306a36Sopenharmony_ci unlock_page(page); 116862306a36Sopenharmony_ci break; 116962306a36Sopenharmony_ci } 117062306a36Sopenharmony_ci ++locked_pages; 117162306a36Sopenharmony_ci } else { 117262306a36Sopenharmony_ci pages[locked_pages++] = page; 117362306a36Sopenharmony_ci } 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_ci fbatch.folios[i] = NULL; 117662306a36Sopenharmony_ci len += thp_size(page); 117762306a36Sopenharmony_ci } 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci /* did we get anything? */ 118062306a36Sopenharmony_ci if (!locked_pages) 118162306a36Sopenharmony_ci goto release_folios; 118262306a36Sopenharmony_ci if (i) { 118362306a36Sopenharmony_ci unsigned j, n = 0; 118462306a36Sopenharmony_ci /* shift unused page to beginning of fbatch */ 118562306a36Sopenharmony_ci for (j = 0; j < nr_folios; j++) { 118662306a36Sopenharmony_ci if (!fbatch.folios[j]) 118762306a36Sopenharmony_ci continue; 118862306a36Sopenharmony_ci if (n < j) 118962306a36Sopenharmony_ci fbatch.folios[n] = fbatch.folios[j]; 119062306a36Sopenharmony_ci n++; 119162306a36Sopenharmony_ci } 119262306a36Sopenharmony_ci fbatch.nr = n; 119362306a36Sopenharmony_ci 119462306a36Sopenharmony_ci if (nr_folios && i == nr_folios && 119562306a36Sopenharmony_ci locked_pages < max_pages) { 119662306a36Sopenharmony_ci dout("reached end fbatch, trying for more\n"); 119762306a36Sopenharmony_ci folio_batch_release(&fbatch); 119862306a36Sopenharmony_ci goto get_more_pages; 119962306a36Sopenharmony_ci } 120062306a36Sopenharmony_ci } 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_cinew_request: 120362306a36Sopenharmony_ci offset = ceph_fscrypt_page_offset(pages[0]); 120462306a36Sopenharmony_ci len = wsize; 120562306a36Sopenharmony_ci 120662306a36Sopenharmony_ci req = ceph_osdc_new_request(&fsc->client->osdc, 120762306a36Sopenharmony_ci &ci->i_layout, vino, 120862306a36Sopenharmony_ci offset, &len, 0, num_ops, 120962306a36Sopenharmony_ci CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, 121062306a36Sopenharmony_ci snapc, ceph_wbc.truncate_seq, 121162306a36Sopenharmony_ci ceph_wbc.truncate_size, false); 121262306a36Sopenharmony_ci if (IS_ERR(req)) { 121362306a36Sopenharmony_ci req = ceph_osdc_new_request(&fsc->client->osdc, 121462306a36Sopenharmony_ci &ci->i_layout, vino, 121562306a36Sopenharmony_ci offset, &len, 0, 121662306a36Sopenharmony_ci min(num_ops, 121762306a36Sopenharmony_ci CEPH_OSD_SLAB_OPS), 121862306a36Sopenharmony_ci CEPH_OSD_OP_WRITE, 121962306a36Sopenharmony_ci CEPH_OSD_FLAG_WRITE, 122062306a36Sopenharmony_ci snapc, ceph_wbc.truncate_seq, 122162306a36Sopenharmony_ci ceph_wbc.truncate_size, true); 122262306a36Sopenharmony_ci BUG_ON(IS_ERR(req)); 122362306a36Sopenharmony_ci } 122462306a36Sopenharmony_ci BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 1]) + 122562306a36Sopenharmony_ci thp_size(pages[locked_pages - 1]) - offset); 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_ci if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) { 122862306a36Sopenharmony_ci rc = -EIO; 122962306a36Sopenharmony_ci goto release_folios; 123062306a36Sopenharmony_ci } 123162306a36Sopenharmony_ci req->r_callback = writepages_finish; 123262306a36Sopenharmony_ci req->r_inode = inode; 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci /* Format the osd request message and submit the write */ 123562306a36Sopenharmony_ci len = 0; 123662306a36Sopenharmony_ci data_pages = pages; 123762306a36Sopenharmony_ci op_idx = 0; 123862306a36Sopenharmony_ci for (i = 0; i < locked_pages; i++) { 123962306a36Sopenharmony_ci struct page *page = ceph_fscrypt_pagecache_page(pages[i]); 124062306a36Sopenharmony_ci 124162306a36Sopenharmony_ci u64 cur_offset = page_offset(page); 124262306a36Sopenharmony_ci /* 124362306a36Sopenharmony_ci * Discontinuity in page range? Ceph can handle that by just passing 124462306a36Sopenharmony_ci * multiple extents in the write op. 124562306a36Sopenharmony_ci */ 124662306a36Sopenharmony_ci if (offset + len != cur_offset) { 124762306a36Sopenharmony_ci /* If it's full, stop here */ 124862306a36Sopenharmony_ci if (op_idx + 1 == req->r_num_ops) 124962306a36Sopenharmony_ci break; 125062306a36Sopenharmony_ci 125162306a36Sopenharmony_ci /* Kick off an fscache write with what we have so far. */ 125262306a36Sopenharmony_ci ceph_fscache_write_to_cache(inode, offset, len, caching); 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_ci /* Start a new extent */ 125562306a36Sopenharmony_ci osd_req_op_extent_dup_last(req, op_idx, 125662306a36Sopenharmony_ci cur_offset - offset); 125762306a36Sopenharmony_ci dout("writepages got pages at %llu~%llu\n", 125862306a36Sopenharmony_ci offset, len); 125962306a36Sopenharmony_ci osd_req_op_extent_osd_data_pages(req, op_idx, 126062306a36Sopenharmony_ci data_pages, len, 0, 126162306a36Sopenharmony_ci from_pool, false); 126262306a36Sopenharmony_ci osd_req_op_extent_update(req, op_idx, len); 126362306a36Sopenharmony_ci 126462306a36Sopenharmony_ci len = 0; 126562306a36Sopenharmony_ci offset = cur_offset; 126662306a36Sopenharmony_ci data_pages = pages + i; 126762306a36Sopenharmony_ci op_idx++; 126862306a36Sopenharmony_ci } 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci set_page_writeback(page); 127162306a36Sopenharmony_ci if (caching) 127262306a36Sopenharmony_ci ceph_set_page_fscache(page); 127362306a36Sopenharmony_ci len += thp_size(page); 127462306a36Sopenharmony_ci } 127562306a36Sopenharmony_ci ceph_fscache_write_to_cache(inode, offset, len, caching); 127662306a36Sopenharmony_ci 127762306a36Sopenharmony_ci if (ceph_wbc.size_stable) { 127862306a36Sopenharmony_ci len = min(len, ceph_wbc.i_size - offset); 127962306a36Sopenharmony_ci } else if (i == locked_pages) { 128062306a36Sopenharmony_ci /* writepages_finish() clears writeback pages 128162306a36Sopenharmony_ci * according to the data length, so make sure 128262306a36Sopenharmony_ci * data length covers all locked pages */ 128362306a36Sopenharmony_ci u64 min_len = len + 1 - thp_size(page); 128462306a36Sopenharmony_ci len = get_writepages_data_length(inode, pages[i - 1], 128562306a36Sopenharmony_ci offset); 128662306a36Sopenharmony_ci len = max(len, min_len); 128762306a36Sopenharmony_ci } 128862306a36Sopenharmony_ci if (IS_ENCRYPTED(inode)) 128962306a36Sopenharmony_ci len = round_up(len, CEPH_FSCRYPT_BLOCK_SIZE); 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci dout("writepages got pages at %llu~%llu\n", offset, len); 129262306a36Sopenharmony_ci 129362306a36Sopenharmony_ci if (IS_ENCRYPTED(inode) && 129462306a36Sopenharmony_ci ((offset | len) & ~CEPH_FSCRYPT_BLOCK_MASK)) 129562306a36Sopenharmony_ci pr_warn("%s: bad encrypted write offset=%lld len=%llu\n", 129662306a36Sopenharmony_ci __func__, offset, len); 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_ci osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, 129962306a36Sopenharmony_ci 0, from_pool, false); 130062306a36Sopenharmony_ci osd_req_op_extent_update(req, op_idx, len); 130162306a36Sopenharmony_ci 130262306a36Sopenharmony_ci BUG_ON(op_idx + 1 != req->r_num_ops); 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci from_pool = false; 130562306a36Sopenharmony_ci if (i < locked_pages) { 130662306a36Sopenharmony_ci BUG_ON(num_ops <= req->r_num_ops); 130762306a36Sopenharmony_ci num_ops -= req->r_num_ops; 130862306a36Sopenharmony_ci locked_pages -= i; 130962306a36Sopenharmony_ci 131062306a36Sopenharmony_ci /* allocate new pages array for next request */ 131162306a36Sopenharmony_ci data_pages = pages; 131262306a36Sopenharmony_ci pages = kmalloc_array(locked_pages, sizeof(*pages), 131362306a36Sopenharmony_ci GFP_NOFS); 131462306a36Sopenharmony_ci if (!pages) { 131562306a36Sopenharmony_ci from_pool = true; 131662306a36Sopenharmony_ci pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS); 131762306a36Sopenharmony_ci BUG_ON(!pages); 131862306a36Sopenharmony_ci } 131962306a36Sopenharmony_ci memcpy(pages, data_pages + i, 132062306a36Sopenharmony_ci locked_pages * sizeof(*pages)); 132162306a36Sopenharmony_ci memset(data_pages + i, 0, 132262306a36Sopenharmony_ci locked_pages * sizeof(*pages)); 132362306a36Sopenharmony_ci } else { 132462306a36Sopenharmony_ci BUG_ON(num_ops != req->r_num_ops); 132562306a36Sopenharmony_ci index = pages[i - 1]->index + 1; 132662306a36Sopenharmony_ci /* request message now owns the pages array */ 132762306a36Sopenharmony_ci pages = NULL; 132862306a36Sopenharmony_ci } 132962306a36Sopenharmony_ci 133062306a36Sopenharmony_ci req->r_mtime = inode->i_mtime; 133162306a36Sopenharmony_ci ceph_osdc_start_request(&fsc->client->osdc, req); 133262306a36Sopenharmony_ci req = NULL; 133362306a36Sopenharmony_ci 133462306a36Sopenharmony_ci wbc->nr_to_write -= i; 133562306a36Sopenharmony_ci if (pages) 133662306a36Sopenharmony_ci goto new_request; 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci /* 133962306a36Sopenharmony_ci * We stop writing back only if we are not doing 134062306a36Sopenharmony_ci * integrity sync. In case of integrity sync we have to 134162306a36Sopenharmony_ci * keep going until we have written all the pages 134262306a36Sopenharmony_ci * we tagged for writeback prior to entering this loop. 134362306a36Sopenharmony_ci */ 134462306a36Sopenharmony_ci if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) 134562306a36Sopenharmony_ci done = true; 134662306a36Sopenharmony_ci 134762306a36Sopenharmony_cirelease_folios: 134862306a36Sopenharmony_ci dout("folio_batch release on %d folios (%p)\n", (int)fbatch.nr, 134962306a36Sopenharmony_ci fbatch.nr ? fbatch.folios[0] : NULL); 135062306a36Sopenharmony_ci folio_batch_release(&fbatch); 135162306a36Sopenharmony_ci } 135262306a36Sopenharmony_ci 135362306a36Sopenharmony_ci if (should_loop && !done) { 135462306a36Sopenharmony_ci /* more to do; loop back to beginning of file */ 135562306a36Sopenharmony_ci dout("writepages looping back to beginning of file\n"); 135662306a36Sopenharmony_ci end = start_index - 1; /* OK even when start_index == 0 */ 135762306a36Sopenharmony_ci 135862306a36Sopenharmony_ci /* to write dirty pages associated with next snapc, 135962306a36Sopenharmony_ci * we need to wait until current writes complete */ 136062306a36Sopenharmony_ci if (wbc->sync_mode != WB_SYNC_NONE && 136162306a36Sopenharmony_ci start_index == 0 && /* all dirty pages were checked */ 136262306a36Sopenharmony_ci !ceph_wbc.head_snapc) { 136362306a36Sopenharmony_ci struct page *page; 136462306a36Sopenharmony_ci unsigned i, nr; 136562306a36Sopenharmony_ci index = 0; 136662306a36Sopenharmony_ci while ((index <= end) && 136762306a36Sopenharmony_ci (nr = filemap_get_folios_tag(mapping, &index, 136862306a36Sopenharmony_ci (pgoff_t)-1, 136962306a36Sopenharmony_ci PAGECACHE_TAG_WRITEBACK, 137062306a36Sopenharmony_ci &fbatch))) { 137162306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 137262306a36Sopenharmony_ci page = &fbatch.folios[i]->page; 137362306a36Sopenharmony_ci if (page_snap_context(page) != snapc) 137462306a36Sopenharmony_ci continue; 137562306a36Sopenharmony_ci wait_on_page_writeback(page); 137662306a36Sopenharmony_ci } 137762306a36Sopenharmony_ci folio_batch_release(&fbatch); 137862306a36Sopenharmony_ci cond_resched(); 137962306a36Sopenharmony_ci } 138062306a36Sopenharmony_ci } 138162306a36Sopenharmony_ci 138262306a36Sopenharmony_ci start_index = 0; 138362306a36Sopenharmony_ci index = 0; 138462306a36Sopenharmony_ci goto retry; 138562306a36Sopenharmony_ci } 138662306a36Sopenharmony_ci 138762306a36Sopenharmony_ci if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 138862306a36Sopenharmony_ci mapping->writeback_index = index; 138962306a36Sopenharmony_ci 139062306a36Sopenharmony_ciout: 139162306a36Sopenharmony_ci ceph_osdc_put_request(req); 139262306a36Sopenharmony_ci ceph_put_snap_context(last_snapc); 139362306a36Sopenharmony_ci dout("writepages dend - startone, rc = %d\n", rc); 139462306a36Sopenharmony_ci return rc; 139562306a36Sopenharmony_ci} 139662306a36Sopenharmony_ci 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci 139962306a36Sopenharmony_ci/* 140062306a36Sopenharmony_ci * See if a given @snapc is either writeable, or already written. 140162306a36Sopenharmony_ci */ 140262306a36Sopenharmony_cistatic int context_is_writeable_or_written(struct inode *inode, 140362306a36Sopenharmony_ci struct ceph_snap_context *snapc) 140462306a36Sopenharmony_ci{ 140562306a36Sopenharmony_ci struct ceph_snap_context *oldest = get_oldest_context(inode, NULL, NULL); 140662306a36Sopenharmony_ci int ret = !oldest || snapc->seq <= oldest->seq; 140762306a36Sopenharmony_ci 140862306a36Sopenharmony_ci ceph_put_snap_context(oldest); 140962306a36Sopenharmony_ci return ret; 141062306a36Sopenharmony_ci} 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_ci/** 141362306a36Sopenharmony_ci * ceph_find_incompatible - find an incompatible context and return it 141462306a36Sopenharmony_ci * @page: page being dirtied 141562306a36Sopenharmony_ci * 141662306a36Sopenharmony_ci * We are only allowed to write into/dirty a page if the page is 141762306a36Sopenharmony_ci * clean, or already dirty within the same snap context. Returns a 141862306a36Sopenharmony_ci * conflicting context if there is one, NULL if there isn't, or a 141962306a36Sopenharmony_ci * negative error code on other errors. 142062306a36Sopenharmony_ci * 142162306a36Sopenharmony_ci * Must be called with page lock held. 142262306a36Sopenharmony_ci */ 142362306a36Sopenharmony_cistatic struct ceph_snap_context * 142462306a36Sopenharmony_ciceph_find_incompatible(struct page *page) 142562306a36Sopenharmony_ci{ 142662306a36Sopenharmony_ci struct inode *inode = page->mapping->host; 142762306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_ci if (ceph_inode_is_shutdown(inode)) { 143062306a36Sopenharmony_ci dout(" page %p %llx:%llx is shutdown\n", page, 143162306a36Sopenharmony_ci ceph_vinop(inode)); 143262306a36Sopenharmony_ci return ERR_PTR(-ESTALE); 143362306a36Sopenharmony_ci } 143462306a36Sopenharmony_ci 143562306a36Sopenharmony_ci for (;;) { 143662306a36Sopenharmony_ci struct ceph_snap_context *snapc, *oldest; 143762306a36Sopenharmony_ci 143862306a36Sopenharmony_ci wait_on_page_writeback(page); 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci snapc = page_snap_context(page); 144162306a36Sopenharmony_ci if (!snapc || snapc == ci->i_head_snapc) 144262306a36Sopenharmony_ci break; 144362306a36Sopenharmony_ci 144462306a36Sopenharmony_ci /* 144562306a36Sopenharmony_ci * this page is already dirty in another (older) snap 144662306a36Sopenharmony_ci * context! is it writeable now? 144762306a36Sopenharmony_ci */ 144862306a36Sopenharmony_ci oldest = get_oldest_context(inode, NULL, NULL); 144962306a36Sopenharmony_ci if (snapc->seq > oldest->seq) { 145062306a36Sopenharmony_ci /* not writeable -- return it for the caller to deal with */ 145162306a36Sopenharmony_ci ceph_put_snap_context(oldest); 145262306a36Sopenharmony_ci dout(" page %p snapc %p not current or oldest\n", page, snapc); 145362306a36Sopenharmony_ci return ceph_get_snap_context(snapc); 145462306a36Sopenharmony_ci } 145562306a36Sopenharmony_ci ceph_put_snap_context(oldest); 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_ci /* yay, writeable, do it now (without dropping page lock) */ 145862306a36Sopenharmony_ci dout(" page %p snapc %p not current, but oldest\n", page, snapc); 145962306a36Sopenharmony_ci if (clear_page_dirty_for_io(page)) { 146062306a36Sopenharmony_ci int r = writepage_nounlock(page, NULL); 146162306a36Sopenharmony_ci if (r < 0) 146262306a36Sopenharmony_ci return ERR_PTR(r); 146362306a36Sopenharmony_ci } 146462306a36Sopenharmony_ci } 146562306a36Sopenharmony_ci return NULL; 146662306a36Sopenharmony_ci} 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_cistatic int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len, 146962306a36Sopenharmony_ci struct folio **foliop, void **_fsdata) 147062306a36Sopenharmony_ci{ 147162306a36Sopenharmony_ci struct inode *inode = file_inode(file); 147262306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 147362306a36Sopenharmony_ci struct ceph_snap_context *snapc; 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci snapc = ceph_find_incompatible(folio_page(*foliop, 0)); 147662306a36Sopenharmony_ci if (snapc) { 147762306a36Sopenharmony_ci int r; 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci folio_unlock(*foliop); 148062306a36Sopenharmony_ci folio_put(*foliop); 148162306a36Sopenharmony_ci *foliop = NULL; 148262306a36Sopenharmony_ci if (IS_ERR(snapc)) 148362306a36Sopenharmony_ci return PTR_ERR(snapc); 148462306a36Sopenharmony_ci 148562306a36Sopenharmony_ci ceph_queue_writeback(inode); 148662306a36Sopenharmony_ci r = wait_event_killable(ci->i_cap_wq, 148762306a36Sopenharmony_ci context_is_writeable_or_written(inode, snapc)); 148862306a36Sopenharmony_ci ceph_put_snap_context(snapc); 148962306a36Sopenharmony_ci return r == 0 ? -EAGAIN : r; 149062306a36Sopenharmony_ci } 149162306a36Sopenharmony_ci return 0; 149262306a36Sopenharmony_ci} 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_ci/* 149562306a36Sopenharmony_ci * We are only allowed to write into/dirty the page if the page is 149662306a36Sopenharmony_ci * clean, or already dirty within the same snap context. 149762306a36Sopenharmony_ci */ 149862306a36Sopenharmony_cistatic int ceph_write_begin(struct file *file, struct address_space *mapping, 149962306a36Sopenharmony_ci loff_t pos, unsigned len, 150062306a36Sopenharmony_ci struct page **pagep, void **fsdata) 150162306a36Sopenharmony_ci{ 150262306a36Sopenharmony_ci struct inode *inode = file_inode(file); 150362306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 150462306a36Sopenharmony_ci struct folio *folio = NULL; 150562306a36Sopenharmony_ci int r; 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_ci r = netfs_write_begin(&ci->netfs, file, inode->i_mapping, pos, len, &folio, NULL); 150862306a36Sopenharmony_ci if (r < 0) 150962306a36Sopenharmony_ci return r; 151062306a36Sopenharmony_ci 151162306a36Sopenharmony_ci folio_wait_fscache(folio); 151262306a36Sopenharmony_ci WARN_ON_ONCE(!folio_test_locked(folio)); 151362306a36Sopenharmony_ci *pagep = &folio->page; 151462306a36Sopenharmony_ci return 0; 151562306a36Sopenharmony_ci} 151662306a36Sopenharmony_ci 151762306a36Sopenharmony_ci/* 151862306a36Sopenharmony_ci * we don't do anything in here that simple_write_end doesn't do 151962306a36Sopenharmony_ci * except adjust dirty page accounting 152062306a36Sopenharmony_ci */ 152162306a36Sopenharmony_cistatic int ceph_write_end(struct file *file, struct address_space *mapping, 152262306a36Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 152362306a36Sopenharmony_ci struct page *subpage, void *fsdata) 152462306a36Sopenharmony_ci{ 152562306a36Sopenharmony_ci struct folio *folio = page_folio(subpage); 152662306a36Sopenharmony_ci struct inode *inode = file_inode(file); 152762306a36Sopenharmony_ci bool check_cap = false; 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_ci dout("write_end file %p inode %p folio %p %d~%d (%d)\n", file, 153062306a36Sopenharmony_ci inode, folio, (int)pos, (int)copied, (int)len); 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci if (!folio_test_uptodate(folio)) { 153362306a36Sopenharmony_ci /* just return that nothing was copied on a short copy */ 153462306a36Sopenharmony_ci if (copied < len) { 153562306a36Sopenharmony_ci copied = 0; 153662306a36Sopenharmony_ci goto out; 153762306a36Sopenharmony_ci } 153862306a36Sopenharmony_ci folio_mark_uptodate(folio); 153962306a36Sopenharmony_ci } 154062306a36Sopenharmony_ci 154162306a36Sopenharmony_ci /* did file size increase? */ 154262306a36Sopenharmony_ci if (pos+copied > i_size_read(inode)) 154362306a36Sopenharmony_ci check_cap = ceph_inode_set_size(inode, pos+copied); 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci folio_mark_dirty(folio); 154662306a36Sopenharmony_ci 154762306a36Sopenharmony_ciout: 154862306a36Sopenharmony_ci folio_unlock(folio); 154962306a36Sopenharmony_ci folio_put(folio); 155062306a36Sopenharmony_ci 155162306a36Sopenharmony_ci if (check_cap) 155262306a36Sopenharmony_ci ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY); 155362306a36Sopenharmony_ci 155462306a36Sopenharmony_ci return copied; 155562306a36Sopenharmony_ci} 155662306a36Sopenharmony_ci 155762306a36Sopenharmony_ciconst struct address_space_operations ceph_aops = { 155862306a36Sopenharmony_ci .read_folio = netfs_read_folio, 155962306a36Sopenharmony_ci .readahead = netfs_readahead, 156062306a36Sopenharmony_ci .writepage = ceph_writepage, 156162306a36Sopenharmony_ci .writepages = ceph_writepages_start, 156262306a36Sopenharmony_ci .write_begin = ceph_write_begin, 156362306a36Sopenharmony_ci .write_end = ceph_write_end, 156462306a36Sopenharmony_ci .dirty_folio = ceph_dirty_folio, 156562306a36Sopenharmony_ci .invalidate_folio = ceph_invalidate_folio, 156662306a36Sopenharmony_ci .release_folio = ceph_release_folio, 156762306a36Sopenharmony_ci .direct_IO = noop_direct_IO, 156862306a36Sopenharmony_ci}; 156962306a36Sopenharmony_ci 157062306a36Sopenharmony_cistatic void ceph_block_sigs(sigset_t *oldset) 157162306a36Sopenharmony_ci{ 157262306a36Sopenharmony_ci sigset_t mask; 157362306a36Sopenharmony_ci siginitsetinv(&mask, sigmask(SIGKILL)); 157462306a36Sopenharmony_ci sigprocmask(SIG_BLOCK, &mask, oldset); 157562306a36Sopenharmony_ci} 157662306a36Sopenharmony_ci 157762306a36Sopenharmony_cistatic void ceph_restore_sigs(sigset_t *oldset) 157862306a36Sopenharmony_ci{ 157962306a36Sopenharmony_ci sigprocmask(SIG_SETMASK, oldset, NULL); 158062306a36Sopenharmony_ci} 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_ci/* 158362306a36Sopenharmony_ci * vm ops 158462306a36Sopenharmony_ci */ 158562306a36Sopenharmony_cistatic vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) 158662306a36Sopenharmony_ci{ 158762306a36Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 158862306a36Sopenharmony_ci struct inode *inode = file_inode(vma->vm_file); 158962306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 159062306a36Sopenharmony_ci struct ceph_file_info *fi = vma->vm_file->private_data; 159162306a36Sopenharmony_ci loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT; 159262306a36Sopenharmony_ci int want, got, err; 159362306a36Sopenharmony_ci sigset_t oldset; 159462306a36Sopenharmony_ci vm_fault_t ret = VM_FAULT_SIGBUS; 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_ci if (ceph_inode_is_shutdown(inode)) 159762306a36Sopenharmony_ci return ret; 159862306a36Sopenharmony_ci 159962306a36Sopenharmony_ci ceph_block_sigs(&oldset); 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_ci dout("filemap_fault %p %llx.%llx %llu trying to get caps\n", 160262306a36Sopenharmony_ci inode, ceph_vinop(inode), off); 160362306a36Sopenharmony_ci if (fi->fmode & CEPH_FILE_MODE_LAZY) 160462306a36Sopenharmony_ci want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; 160562306a36Sopenharmony_ci else 160662306a36Sopenharmony_ci want = CEPH_CAP_FILE_CACHE; 160762306a36Sopenharmony_ci 160862306a36Sopenharmony_ci got = 0; 160962306a36Sopenharmony_ci err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1, &got); 161062306a36Sopenharmony_ci if (err < 0) 161162306a36Sopenharmony_ci goto out_restore; 161262306a36Sopenharmony_ci 161362306a36Sopenharmony_ci dout("filemap_fault %p %llu got cap refs on %s\n", 161462306a36Sopenharmony_ci inode, off, ceph_cap_string(got)); 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_ci if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || 161762306a36Sopenharmony_ci !ceph_has_inline_data(ci)) { 161862306a36Sopenharmony_ci CEPH_DEFINE_RW_CONTEXT(rw_ctx, got); 161962306a36Sopenharmony_ci ceph_add_rw_context(fi, &rw_ctx); 162062306a36Sopenharmony_ci ret = filemap_fault(vmf); 162162306a36Sopenharmony_ci ceph_del_rw_context(fi, &rw_ctx); 162262306a36Sopenharmony_ci dout("filemap_fault %p %llu drop cap refs %s ret %x\n", 162362306a36Sopenharmony_ci inode, off, ceph_cap_string(got), ret); 162462306a36Sopenharmony_ci } else 162562306a36Sopenharmony_ci err = -EAGAIN; 162662306a36Sopenharmony_ci 162762306a36Sopenharmony_ci ceph_put_cap_refs(ci, got); 162862306a36Sopenharmony_ci 162962306a36Sopenharmony_ci if (err != -EAGAIN) 163062306a36Sopenharmony_ci goto out_restore; 163162306a36Sopenharmony_ci 163262306a36Sopenharmony_ci /* read inline data */ 163362306a36Sopenharmony_ci if (off >= PAGE_SIZE) { 163462306a36Sopenharmony_ci /* does not support inline data > PAGE_SIZE */ 163562306a36Sopenharmony_ci ret = VM_FAULT_SIGBUS; 163662306a36Sopenharmony_ci } else { 163762306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 163862306a36Sopenharmony_ci struct page *page; 163962306a36Sopenharmony_ci 164062306a36Sopenharmony_ci filemap_invalidate_lock_shared(mapping); 164162306a36Sopenharmony_ci page = find_or_create_page(mapping, 0, 164262306a36Sopenharmony_ci mapping_gfp_constraint(mapping, ~__GFP_FS)); 164362306a36Sopenharmony_ci if (!page) { 164462306a36Sopenharmony_ci ret = VM_FAULT_OOM; 164562306a36Sopenharmony_ci goto out_inline; 164662306a36Sopenharmony_ci } 164762306a36Sopenharmony_ci err = __ceph_do_getattr(inode, page, 164862306a36Sopenharmony_ci CEPH_STAT_CAP_INLINE_DATA, true); 164962306a36Sopenharmony_ci if (err < 0 || off >= i_size_read(inode)) { 165062306a36Sopenharmony_ci unlock_page(page); 165162306a36Sopenharmony_ci put_page(page); 165262306a36Sopenharmony_ci ret = vmf_error(err); 165362306a36Sopenharmony_ci goto out_inline; 165462306a36Sopenharmony_ci } 165562306a36Sopenharmony_ci if (err < PAGE_SIZE) 165662306a36Sopenharmony_ci zero_user_segment(page, err, PAGE_SIZE); 165762306a36Sopenharmony_ci else 165862306a36Sopenharmony_ci flush_dcache_page(page); 165962306a36Sopenharmony_ci SetPageUptodate(page); 166062306a36Sopenharmony_ci vmf->page = page; 166162306a36Sopenharmony_ci ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED; 166262306a36Sopenharmony_ciout_inline: 166362306a36Sopenharmony_ci filemap_invalidate_unlock_shared(mapping); 166462306a36Sopenharmony_ci dout("filemap_fault %p %llu read inline data ret %x\n", 166562306a36Sopenharmony_ci inode, off, ret); 166662306a36Sopenharmony_ci } 166762306a36Sopenharmony_ciout_restore: 166862306a36Sopenharmony_ci ceph_restore_sigs(&oldset); 166962306a36Sopenharmony_ci if (err < 0) 167062306a36Sopenharmony_ci ret = vmf_error(err); 167162306a36Sopenharmony_ci 167262306a36Sopenharmony_ci return ret; 167362306a36Sopenharmony_ci} 167462306a36Sopenharmony_ci 167562306a36Sopenharmony_cistatic vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) 167662306a36Sopenharmony_ci{ 167762306a36Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 167862306a36Sopenharmony_ci struct inode *inode = file_inode(vma->vm_file); 167962306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 168062306a36Sopenharmony_ci struct ceph_file_info *fi = vma->vm_file->private_data; 168162306a36Sopenharmony_ci struct ceph_cap_flush *prealloc_cf; 168262306a36Sopenharmony_ci struct page *page = vmf->page; 168362306a36Sopenharmony_ci loff_t off = page_offset(page); 168462306a36Sopenharmony_ci loff_t size = i_size_read(inode); 168562306a36Sopenharmony_ci size_t len; 168662306a36Sopenharmony_ci int want, got, err; 168762306a36Sopenharmony_ci sigset_t oldset; 168862306a36Sopenharmony_ci vm_fault_t ret = VM_FAULT_SIGBUS; 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci if (ceph_inode_is_shutdown(inode)) 169162306a36Sopenharmony_ci return ret; 169262306a36Sopenharmony_ci 169362306a36Sopenharmony_ci prealloc_cf = ceph_alloc_cap_flush(); 169462306a36Sopenharmony_ci if (!prealloc_cf) 169562306a36Sopenharmony_ci return VM_FAULT_OOM; 169662306a36Sopenharmony_ci 169762306a36Sopenharmony_ci sb_start_pagefault(inode->i_sb); 169862306a36Sopenharmony_ci ceph_block_sigs(&oldset); 169962306a36Sopenharmony_ci 170062306a36Sopenharmony_ci if (off + thp_size(page) <= size) 170162306a36Sopenharmony_ci len = thp_size(page); 170262306a36Sopenharmony_ci else 170362306a36Sopenharmony_ci len = offset_in_thp(page, size); 170462306a36Sopenharmony_ci 170562306a36Sopenharmony_ci dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n", 170662306a36Sopenharmony_ci inode, ceph_vinop(inode), off, len, size); 170762306a36Sopenharmony_ci if (fi->fmode & CEPH_FILE_MODE_LAZY) 170862306a36Sopenharmony_ci want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; 170962306a36Sopenharmony_ci else 171062306a36Sopenharmony_ci want = CEPH_CAP_FILE_BUFFER; 171162306a36Sopenharmony_ci 171262306a36Sopenharmony_ci got = 0; 171362306a36Sopenharmony_ci err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len, &got); 171462306a36Sopenharmony_ci if (err < 0) 171562306a36Sopenharmony_ci goto out_free; 171662306a36Sopenharmony_ci 171762306a36Sopenharmony_ci dout("page_mkwrite %p %llu~%zd got cap refs on %s\n", 171862306a36Sopenharmony_ci inode, off, len, ceph_cap_string(got)); 171962306a36Sopenharmony_ci 172062306a36Sopenharmony_ci /* Update time before taking page lock */ 172162306a36Sopenharmony_ci file_update_time(vma->vm_file); 172262306a36Sopenharmony_ci inode_inc_iversion_raw(inode); 172362306a36Sopenharmony_ci 172462306a36Sopenharmony_ci do { 172562306a36Sopenharmony_ci struct ceph_snap_context *snapc; 172662306a36Sopenharmony_ci 172762306a36Sopenharmony_ci lock_page(page); 172862306a36Sopenharmony_ci 172962306a36Sopenharmony_ci if (page_mkwrite_check_truncate(page, inode) < 0) { 173062306a36Sopenharmony_ci unlock_page(page); 173162306a36Sopenharmony_ci ret = VM_FAULT_NOPAGE; 173262306a36Sopenharmony_ci break; 173362306a36Sopenharmony_ci } 173462306a36Sopenharmony_ci 173562306a36Sopenharmony_ci snapc = ceph_find_incompatible(page); 173662306a36Sopenharmony_ci if (!snapc) { 173762306a36Sopenharmony_ci /* success. we'll keep the page locked. */ 173862306a36Sopenharmony_ci set_page_dirty(page); 173962306a36Sopenharmony_ci ret = VM_FAULT_LOCKED; 174062306a36Sopenharmony_ci break; 174162306a36Sopenharmony_ci } 174262306a36Sopenharmony_ci 174362306a36Sopenharmony_ci unlock_page(page); 174462306a36Sopenharmony_ci 174562306a36Sopenharmony_ci if (IS_ERR(snapc)) { 174662306a36Sopenharmony_ci ret = VM_FAULT_SIGBUS; 174762306a36Sopenharmony_ci break; 174862306a36Sopenharmony_ci } 174962306a36Sopenharmony_ci 175062306a36Sopenharmony_ci ceph_queue_writeback(inode); 175162306a36Sopenharmony_ci err = wait_event_killable(ci->i_cap_wq, 175262306a36Sopenharmony_ci context_is_writeable_or_written(inode, snapc)); 175362306a36Sopenharmony_ci ceph_put_snap_context(snapc); 175462306a36Sopenharmony_ci } while (err == 0); 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci if (ret == VM_FAULT_LOCKED) { 175762306a36Sopenharmony_ci int dirty; 175862306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 175962306a36Sopenharmony_ci dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, 176062306a36Sopenharmony_ci &prealloc_cf); 176162306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 176262306a36Sopenharmony_ci if (dirty) 176362306a36Sopenharmony_ci __mark_inode_dirty(inode, dirty); 176462306a36Sopenharmony_ci } 176562306a36Sopenharmony_ci 176662306a36Sopenharmony_ci dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n", 176762306a36Sopenharmony_ci inode, off, len, ceph_cap_string(got), ret); 176862306a36Sopenharmony_ci ceph_put_cap_refs_async(ci, got); 176962306a36Sopenharmony_ciout_free: 177062306a36Sopenharmony_ci ceph_restore_sigs(&oldset); 177162306a36Sopenharmony_ci sb_end_pagefault(inode->i_sb); 177262306a36Sopenharmony_ci ceph_free_cap_flush(prealloc_cf); 177362306a36Sopenharmony_ci if (err < 0) 177462306a36Sopenharmony_ci ret = vmf_error(err); 177562306a36Sopenharmony_ci return ret; 177662306a36Sopenharmony_ci} 177762306a36Sopenharmony_ci 177862306a36Sopenharmony_civoid ceph_fill_inline_data(struct inode *inode, struct page *locked_page, 177962306a36Sopenharmony_ci char *data, size_t len) 178062306a36Sopenharmony_ci{ 178162306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 178262306a36Sopenharmony_ci struct page *page; 178362306a36Sopenharmony_ci 178462306a36Sopenharmony_ci if (locked_page) { 178562306a36Sopenharmony_ci page = locked_page; 178662306a36Sopenharmony_ci } else { 178762306a36Sopenharmony_ci if (i_size_read(inode) == 0) 178862306a36Sopenharmony_ci return; 178962306a36Sopenharmony_ci page = find_or_create_page(mapping, 0, 179062306a36Sopenharmony_ci mapping_gfp_constraint(mapping, 179162306a36Sopenharmony_ci ~__GFP_FS)); 179262306a36Sopenharmony_ci if (!page) 179362306a36Sopenharmony_ci return; 179462306a36Sopenharmony_ci if (PageUptodate(page)) { 179562306a36Sopenharmony_ci unlock_page(page); 179662306a36Sopenharmony_ci put_page(page); 179762306a36Sopenharmony_ci return; 179862306a36Sopenharmony_ci } 179962306a36Sopenharmony_ci } 180062306a36Sopenharmony_ci 180162306a36Sopenharmony_ci dout("fill_inline_data %p %llx.%llx len %zu locked_page %p\n", 180262306a36Sopenharmony_ci inode, ceph_vinop(inode), len, locked_page); 180362306a36Sopenharmony_ci 180462306a36Sopenharmony_ci if (len > 0) { 180562306a36Sopenharmony_ci void *kaddr = kmap_atomic(page); 180662306a36Sopenharmony_ci memcpy(kaddr, data, len); 180762306a36Sopenharmony_ci kunmap_atomic(kaddr); 180862306a36Sopenharmony_ci } 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_ci if (page != locked_page) { 181162306a36Sopenharmony_ci if (len < PAGE_SIZE) 181262306a36Sopenharmony_ci zero_user_segment(page, len, PAGE_SIZE); 181362306a36Sopenharmony_ci else 181462306a36Sopenharmony_ci flush_dcache_page(page); 181562306a36Sopenharmony_ci 181662306a36Sopenharmony_ci SetPageUptodate(page); 181762306a36Sopenharmony_ci unlock_page(page); 181862306a36Sopenharmony_ci put_page(page); 181962306a36Sopenharmony_ci } 182062306a36Sopenharmony_ci} 182162306a36Sopenharmony_ci 182262306a36Sopenharmony_ciint ceph_uninline_data(struct file *file) 182362306a36Sopenharmony_ci{ 182462306a36Sopenharmony_ci struct inode *inode = file_inode(file); 182562306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 182662306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 182762306a36Sopenharmony_ci struct ceph_osd_request *req = NULL; 182862306a36Sopenharmony_ci struct ceph_cap_flush *prealloc_cf = NULL; 182962306a36Sopenharmony_ci struct folio *folio = NULL; 183062306a36Sopenharmony_ci u64 inline_version = CEPH_INLINE_NONE; 183162306a36Sopenharmony_ci struct page *pages[1]; 183262306a36Sopenharmony_ci int err = 0; 183362306a36Sopenharmony_ci u64 len; 183462306a36Sopenharmony_ci 183562306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 183662306a36Sopenharmony_ci inline_version = ci->i_inline_version; 183762306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 183862306a36Sopenharmony_ci 183962306a36Sopenharmony_ci dout("uninline_data %p %llx.%llx inline_version %llu\n", 184062306a36Sopenharmony_ci inode, ceph_vinop(inode), inline_version); 184162306a36Sopenharmony_ci 184262306a36Sopenharmony_ci if (ceph_inode_is_shutdown(inode)) { 184362306a36Sopenharmony_ci err = -EIO; 184462306a36Sopenharmony_ci goto out; 184562306a36Sopenharmony_ci } 184662306a36Sopenharmony_ci 184762306a36Sopenharmony_ci if (inline_version == CEPH_INLINE_NONE) 184862306a36Sopenharmony_ci return 0; 184962306a36Sopenharmony_ci 185062306a36Sopenharmony_ci prealloc_cf = ceph_alloc_cap_flush(); 185162306a36Sopenharmony_ci if (!prealloc_cf) 185262306a36Sopenharmony_ci return -ENOMEM; 185362306a36Sopenharmony_ci 185462306a36Sopenharmony_ci if (inline_version == 1) /* initial version, no data */ 185562306a36Sopenharmony_ci goto out_uninline; 185662306a36Sopenharmony_ci 185762306a36Sopenharmony_ci folio = read_mapping_folio(inode->i_mapping, 0, file); 185862306a36Sopenharmony_ci if (IS_ERR(folio)) { 185962306a36Sopenharmony_ci err = PTR_ERR(folio); 186062306a36Sopenharmony_ci goto out; 186162306a36Sopenharmony_ci } 186262306a36Sopenharmony_ci 186362306a36Sopenharmony_ci folio_lock(folio); 186462306a36Sopenharmony_ci 186562306a36Sopenharmony_ci len = i_size_read(inode); 186662306a36Sopenharmony_ci if (len > folio_size(folio)) 186762306a36Sopenharmony_ci len = folio_size(folio); 186862306a36Sopenharmony_ci 186962306a36Sopenharmony_ci req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 187062306a36Sopenharmony_ci ceph_vino(inode), 0, &len, 0, 1, 187162306a36Sopenharmony_ci CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE, 187262306a36Sopenharmony_ci NULL, 0, 0, false); 187362306a36Sopenharmony_ci if (IS_ERR(req)) { 187462306a36Sopenharmony_ci err = PTR_ERR(req); 187562306a36Sopenharmony_ci goto out_unlock; 187662306a36Sopenharmony_ci } 187762306a36Sopenharmony_ci 187862306a36Sopenharmony_ci req->r_mtime = inode->i_mtime; 187962306a36Sopenharmony_ci ceph_osdc_start_request(&fsc->client->osdc, req); 188062306a36Sopenharmony_ci err = ceph_osdc_wait_request(&fsc->client->osdc, req); 188162306a36Sopenharmony_ci ceph_osdc_put_request(req); 188262306a36Sopenharmony_ci if (err < 0) 188362306a36Sopenharmony_ci goto out_unlock; 188462306a36Sopenharmony_ci 188562306a36Sopenharmony_ci req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 188662306a36Sopenharmony_ci ceph_vino(inode), 0, &len, 1, 3, 188762306a36Sopenharmony_ci CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, 188862306a36Sopenharmony_ci NULL, ci->i_truncate_seq, 188962306a36Sopenharmony_ci ci->i_truncate_size, false); 189062306a36Sopenharmony_ci if (IS_ERR(req)) { 189162306a36Sopenharmony_ci err = PTR_ERR(req); 189262306a36Sopenharmony_ci goto out_unlock; 189362306a36Sopenharmony_ci } 189462306a36Sopenharmony_ci 189562306a36Sopenharmony_ci pages[0] = folio_page(folio, 0); 189662306a36Sopenharmony_ci osd_req_op_extent_osd_data_pages(req, 1, pages, len, 0, false, false); 189762306a36Sopenharmony_ci 189862306a36Sopenharmony_ci { 189962306a36Sopenharmony_ci __le64 xattr_buf = cpu_to_le64(inline_version); 190062306a36Sopenharmony_ci err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, 190162306a36Sopenharmony_ci "inline_version", &xattr_buf, 190262306a36Sopenharmony_ci sizeof(xattr_buf), 190362306a36Sopenharmony_ci CEPH_OSD_CMPXATTR_OP_GT, 190462306a36Sopenharmony_ci CEPH_OSD_CMPXATTR_MODE_U64); 190562306a36Sopenharmony_ci if (err) 190662306a36Sopenharmony_ci goto out_put_req; 190762306a36Sopenharmony_ci } 190862306a36Sopenharmony_ci 190962306a36Sopenharmony_ci { 191062306a36Sopenharmony_ci char xattr_buf[32]; 191162306a36Sopenharmony_ci int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf), 191262306a36Sopenharmony_ci "%llu", inline_version); 191362306a36Sopenharmony_ci err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, 191462306a36Sopenharmony_ci "inline_version", 191562306a36Sopenharmony_ci xattr_buf, xattr_len, 0, 0); 191662306a36Sopenharmony_ci if (err) 191762306a36Sopenharmony_ci goto out_put_req; 191862306a36Sopenharmony_ci } 191962306a36Sopenharmony_ci 192062306a36Sopenharmony_ci req->r_mtime = inode->i_mtime; 192162306a36Sopenharmony_ci ceph_osdc_start_request(&fsc->client->osdc, req); 192262306a36Sopenharmony_ci err = ceph_osdc_wait_request(&fsc->client->osdc, req); 192362306a36Sopenharmony_ci 192462306a36Sopenharmony_ci ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency, 192562306a36Sopenharmony_ci req->r_end_latency, len, err); 192662306a36Sopenharmony_ci 192762306a36Sopenharmony_ciout_uninline: 192862306a36Sopenharmony_ci if (!err) { 192962306a36Sopenharmony_ci int dirty; 193062306a36Sopenharmony_ci 193162306a36Sopenharmony_ci /* Set to CAP_INLINE_NONE and dirty the caps */ 193262306a36Sopenharmony_ci down_read(&fsc->mdsc->snap_rwsem); 193362306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 193462306a36Sopenharmony_ci ci->i_inline_version = CEPH_INLINE_NONE; 193562306a36Sopenharmony_ci dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf); 193662306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 193762306a36Sopenharmony_ci up_read(&fsc->mdsc->snap_rwsem); 193862306a36Sopenharmony_ci if (dirty) 193962306a36Sopenharmony_ci __mark_inode_dirty(inode, dirty); 194062306a36Sopenharmony_ci } 194162306a36Sopenharmony_ciout_put_req: 194262306a36Sopenharmony_ci ceph_osdc_put_request(req); 194362306a36Sopenharmony_ci if (err == -ECANCELED) 194462306a36Sopenharmony_ci err = 0; 194562306a36Sopenharmony_ciout_unlock: 194662306a36Sopenharmony_ci if (folio) { 194762306a36Sopenharmony_ci folio_unlock(folio); 194862306a36Sopenharmony_ci folio_put(folio); 194962306a36Sopenharmony_ci } 195062306a36Sopenharmony_ciout: 195162306a36Sopenharmony_ci ceph_free_cap_flush(prealloc_cf); 195262306a36Sopenharmony_ci dout("uninline_data %p %llx.%llx inline_version %llu = %d\n", 195362306a36Sopenharmony_ci inode, ceph_vinop(inode), inline_version, err); 195462306a36Sopenharmony_ci return err; 195562306a36Sopenharmony_ci} 195662306a36Sopenharmony_ci 195762306a36Sopenharmony_cistatic const struct vm_operations_struct ceph_vmops = { 195862306a36Sopenharmony_ci .fault = ceph_filemap_fault, 195962306a36Sopenharmony_ci .page_mkwrite = ceph_page_mkwrite, 196062306a36Sopenharmony_ci}; 196162306a36Sopenharmony_ci 196262306a36Sopenharmony_ciint ceph_mmap(struct file *file, struct vm_area_struct *vma) 196362306a36Sopenharmony_ci{ 196462306a36Sopenharmony_ci struct address_space *mapping = file->f_mapping; 196562306a36Sopenharmony_ci 196662306a36Sopenharmony_ci if (!mapping->a_ops->read_folio) 196762306a36Sopenharmony_ci return -ENOEXEC; 196862306a36Sopenharmony_ci vma->vm_ops = &ceph_vmops; 196962306a36Sopenharmony_ci return 0; 197062306a36Sopenharmony_ci} 197162306a36Sopenharmony_ci 197262306a36Sopenharmony_cienum { 197362306a36Sopenharmony_ci POOL_READ = 1, 197462306a36Sopenharmony_ci POOL_WRITE = 2, 197562306a36Sopenharmony_ci}; 197662306a36Sopenharmony_ci 197762306a36Sopenharmony_cistatic int __ceph_pool_perm_get(struct ceph_inode_info *ci, 197862306a36Sopenharmony_ci s64 pool, struct ceph_string *pool_ns) 197962306a36Sopenharmony_ci{ 198062306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->netfs.inode); 198162306a36Sopenharmony_ci struct ceph_mds_client *mdsc = fsc->mdsc; 198262306a36Sopenharmony_ci struct ceph_osd_request *rd_req = NULL, *wr_req = NULL; 198362306a36Sopenharmony_ci struct rb_node **p, *parent; 198462306a36Sopenharmony_ci struct ceph_pool_perm *perm; 198562306a36Sopenharmony_ci struct page **pages; 198662306a36Sopenharmony_ci size_t pool_ns_len; 198762306a36Sopenharmony_ci int err = 0, err2 = 0, have = 0; 198862306a36Sopenharmony_ci 198962306a36Sopenharmony_ci down_read(&mdsc->pool_perm_rwsem); 199062306a36Sopenharmony_ci p = &mdsc->pool_perm_tree.rb_node; 199162306a36Sopenharmony_ci while (*p) { 199262306a36Sopenharmony_ci perm = rb_entry(*p, struct ceph_pool_perm, node); 199362306a36Sopenharmony_ci if (pool < perm->pool) 199462306a36Sopenharmony_ci p = &(*p)->rb_left; 199562306a36Sopenharmony_ci else if (pool > perm->pool) 199662306a36Sopenharmony_ci p = &(*p)->rb_right; 199762306a36Sopenharmony_ci else { 199862306a36Sopenharmony_ci int ret = ceph_compare_string(pool_ns, 199962306a36Sopenharmony_ci perm->pool_ns, 200062306a36Sopenharmony_ci perm->pool_ns_len); 200162306a36Sopenharmony_ci if (ret < 0) 200262306a36Sopenharmony_ci p = &(*p)->rb_left; 200362306a36Sopenharmony_ci else if (ret > 0) 200462306a36Sopenharmony_ci p = &(*p)->rb_right; 200562306a36Sopenharmony_ci else { 200662306a36Sopenharmony_ci have = perm->perm; 200762306a36Sopenharmony_ci break; 200862306a36Sopenharmony_ci } 200962306a36Sopenharmony_ci } 201062306a36Sopenharmony_ci } 201162306a36Sopenharmony_ci up_read(&mdsc->pool_perm_rwsem); 201262306a36Sopenharmony_ci if (*p) 201362306a36Sopenharmony_ci goto out; 201462306a36Sopenharmony_ci 201562306a36Sopenharmony_ci if (pool_ns) 201662306a36Sopenharmony_ci dout("__ceph_pool_perm_get pool %lld ns %.*s no perm cached\n", 201762306a36Sopenharmony_ci pool, (int)pool_ns->len, pool_ns->str); 201862306a36Sopenharmony_ci else 201962306a36Sopenharmony_ci dout("__ceph_pool_perm_get pool %lld no perm cached\n", pool); 202062306a36Sopenharmony_ci 202162306a36Sopenharmony_ci down_write(&mdsc->pool_perm_rwsem); 202262306a36Sopenharmony_ci p = &mdsc->pool_perm_tree.rb_node; 202362306a36Sopenharmony_ci parent = NULL; 202462306a36Sopenharmony_ci while (*p) { 202562306a36Sopenharmony_ci parent = *p; 202662306a36Sopenharmony_ci perm = rb_entry(parent, struct ceph_pool_perm, node); 202762306a36Sopenharmony_ci if (pool < perm->pool) 202862306a36Sopenharmony_ci p = &(*p)->rb_left; 202962306a36Sopenharmony_ci else if (pool > perm->pool) 203062306a36Sopenharmony_ci p = &(*p)->rb_right; 203162306a36Sopenharmony_ci else { 203262306a36Sopenharmony_ci int ret = ceph_compare_string(pool_ns, 203362306a36Sopenharmony_ci perm->pool_ns, 203462306a36Sopenharmony_ci perm->pool_ns_len); 203562306a36Sopenharmony_ci if (ret < 0) 203662306a36Sopenharmony_ci p = &(*p)->rb_left; 203762306a36Sopenharmony_ci else if (ret > 0) 203862306a36Sopenharmony_ci p = &(*p)->rb_right; 203962306a36Sopenharmony_ci else { 204062306a36Sopenharmony_ci have = perm->perm; 204162306a36Sopenharmony_ci break; 204262306a36Sopenharmony_ci } 204362306a36Sopenharmony_ci } 204462306a36Sopenharmony_ci } 204562306a36Sopenharmony_ci if (*p) { 204662306a36Sopenharmony_ci up_write(&mdsc->pool_perm_rwsem); 204762306a36Sopenharmony_ci goto out; 204862306a36Sopenharmony_ci } 204962306a36Sopenharmony_ci 205062306a36Sopenharmony_ci rd_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL, 205162306a36Sopenharmony_ci 1, false, GFP_NOFS); 205262306a36Sopenharmony_ci if (!rd_req) { 205362306a36Sopenharmony_ci err = -ENOMEM; 205462306a36Sopenharmony_ci goto out_unlock; 205562306a36Sopenharmony_ci } 205662306a36Sopenharmony_ci 205762306a36Sopenharmony_ci rd_req->r_flags = CEPH_OSD_FLAG_READ; 205862306a36Sopenharmony_ci osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0); 205962306a36Sopenharmony_ci rd_req->r_base_oloc.pool = pool; 206062306a36Sopenharmony_ci if (pool_ns) 206162306a36Sopenharmony_ci rd_req->r_base_oloc.pool_ns = ceph_get_string(pool_ns); 206262306a36Sopenharmony_ci ceph_oid_printf(&rd_req->r_base_oid, "%llx.00000000", ci->i_vino.ino); 206362306a36Sopenharmony_ci 206462306a36Sopenharmony_ci err = ceph_osdc_alloc_messages(rd_req, GFP_NOFS); 206562306a36Sopenharmony_ci if (err) 206662306a36Sopenharmony_ci goto out_unlock; 206762306a36Sopenharmony_ci 206862306a36Sopenharmony_ci wr_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL, 206962306a36Sopenharmony_ci 1, false, GFP_NOFS); 207062306a36Sopenharmony_ci if (!wr_req) { 207162306a36Sopenharmony_ci err = -ENOMEM; 207262306a36Sopenharmony_ci goto out_unlock; 207362306a36Sopenharmony_ci } 207462306a36Sopenharmony_ci 207562306a36Sopenharmony_ci wr_req->r_flags = CEPH_OSD_FLAG_WRITE; 207662306a36Sopenharmony_ci osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL); 207762306a36Sopenharmony_ci ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc); 207862306a36Sopenharmony_ci ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid); 207962306a36Sopenharmony_ci 208062306a36Sopenharmony_ci err = ceph_osdc_alloc_messages(wr_req, GFP_NOFS); 208162306a36Sopenharmony_ci if (err) 208262306a36Sopenharmony_ci goto out_unlock; 208362306a36Sopenharmony_ci 208462306a36Sopenharmony_ci /* one page should be large enough for STAT data */ 208562306a36Sopenharmony_ci pages = ceph_alloc_page_vector(1, GFP_KERNEL); 208662306a36Sopenharmony_ci if (IS_ERR(pages)) { 208762306a36Sopenharmony_ci err = PTR_ERR(pages); 208862306a36Sopenharmony_ci goto out_unlock; 208962306a36Sopenharmony_ci } 209062306a36Sopenharmony_ci 209162306a36Sopenharmony_ci osd_req_op_raw_data_in_pages(rd_req, 0, pages, PAGE_SIZE, 209262306a36Sopenharmony_ci 0, false, true); 209362306a36Sopenharmony_ci ceph_osdc_start_request(&fsc->client->osdc, rd_req); 209462306a36Sopenharmony_ci 209562306a36Sopenharmony_ci wr_req->r_mtime = ci->netfs.inode.i_mtime; 209662306a36Sopenharmony_ci ceph_osdc_start_request(&fsc->client->osdc, wr_req); 209762306a36Sopenharmony_ci 209862306a36Sopenharmony_ci err = ceph_osdc_wait_request(&fsc->client->osdc, rd_req); 209962306a36Sopenharmony_ci err2 = ceph_osdc_wait_request(&fsc->client->osdc, wr_req); 210062306a36Sopenharmony_ci 210162306a36Sopenharmony_ci if (err >= 0 || err == -ENOENT) 210262306a36Sopenharmony_ci have |= POOL_READ; 210362306a36Sopenharmony_ci else if (err != -EPERM) { 210462306a36Sopenharmony_ci if (err == -EBLOCKLISTED) 210562306a36Sopenharmony_ci fsc->blocklisted = true; 210662306a36Sopenharmony_ci goto out_unlock; 210762306a36Sopenharmony_ci } 210862306a36Sopenharmony_ci 210962306a36Sopenharmony_ci if (err2 == 0 || err2 == -EEXIST) 211062306a36Sopenharmony_ci have |= POOL_WRITE; 211162306a36Sopenharmony_ci else if (err2 != -EPERM) { 211262306a36Sopenharmony_ci if (err2 == -EBLOCKLISTED) 211362306a36Sopenharmony_ci fsc->blocklisted = true; 211462306a36Sopenharmony_ci err = err2; 211562306a36Sopenharmony_ci goto out_unlock; 211662306a36Sopenharmony_ci } 211762306a36Sopenharmony_ci 211862306a36Sopenharmony_ci pool_ns_len = pool_ns ? pool_ns->len : 0; 211962306a36Sopenharmony_ci perm = kmalloc(sizeof(*perm) + pool_ns_len + 1, GFP_NOFS); 212062306a36Sopenharmony_ci if (!perm) { 212162306a36Sopenharmony_ci err = -ENOMEM; 212262306a36Sopenharmony_ci goto out_unlock; 212362306a36Sopenharmony_ci } 212462306a36Sopenharmony_ci 212562306a36Sopenharmony_ci perm->pool = pool; 212662306a36Sopenharmony_ci perm->perm = have; 212762306a36Sopenharmony_ci perm->pool_ns_len = pool_ns_len; 212862306a36Sopenharmony_ci if (pool_ns_len > 0) 212962306a36Sopenharmony_ci memcpy(perm->pool_ns, pool_ns->str, pool_ns_len); 213062306a36Sopenharmony_ci perm->pool_ns[pool_ns_len] = 0; 213162306a36Sopenharmony_ci 213262306a36Sopenharmony_ci rb_link_node(&perm->node, parent, p); 213362306a36Sopenharmony_ci rb_insert_color(&perm->node, &mdsc->pool_perm_tree); 213462306a36Sopenharmony_ci err = 0; 213562306a36Sopenharmony_ciout_unlock: 213662306a36Sopenharmony_ci up_write(&mdsc->pool_perm_rwsem); 213762306a36Sopenharmony_ci 213862306a36Sopenharmony_ci ceph_osdc_put_request(rd_req); 213962306a36Sopenharmony_ci ceph_osdc_put_request(wr_req); 214062306a36Sopenharmony_ciout: 214162306a36Sopenharmony_ci if (!err) 214262306a36Sopenharmony_ci err = have; 214362306a36Sopenharmony_ci if (pool_ns) 214462306a36Sopenharmony_ci dout("__ceph_pool_perm_get pool %lld ns %.*s result = %d\n", 214562306a36Sopenharmony_ci pool, (int)pool_ns->len, pool_ns->str, err); 214662306a36Sopenharmony_ci else 214762306a36Sopenharmony_ci dout("__ceph_pool_perm_get pool %lld result = %d\n", pool, err); 214862306a36Sopenharmony_ci return err; 214962306a36Sopenharmony_ci} 215062306a36Sopenharmony_ci 215162306a36Sopenharmony_ciint ceph_pool_perm_check(struct inode *inode, int need) 215262306a36Sopenharmony_ci{ 215362306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 215462306a36Sopenharmony_ci struct ceph_string *pool_ns; 215562306a36Sopenharmony_ci s64 pool; 215662306a36Sopenharmony_ci int ret, flags; 215762306a36Sopenharmony_ci 215862306a36Sopenharmony_ci /* Only need to do this for regular files */ 215962306a36Sopenharmony_ci if (!S_ISREG(inode->i_mode)) 216062306a36Sopenharmony_ci return 0; 216162306a36Sopenharmony_ci 216262306a36Sopenharmony_ci if (ci->i_vino.snap != CEPH_NOSNAP) { 216362306a36Sopenharmony_ci /* 216462306a36Sopenharmony_ci * Pool permission check needs to write to the first object. 216562306a36Sopenharmony_ci * But for snapshot, head of the first object may have alread 216662306a36Sopenharmony_ci * been deleted. Skip check to avoid creating orphan object. 216762306a36Sopenharmony_ci */ 216862306a36Sopenharmony_ci return 0; 216962306a36Sopenharmony_ci } 217062306a36Sopenharmony_ci 217162306a36Sopenharmony_ci if (ceph_test_mount_opt(ceph_inode_to_client(inode), 217262306a36Sopenharmony_ci NOPOOLPERM)) 217362306a36Sopenharmony_ci return 0; 217462306a36Sopenharmony_ci 217562306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 217662306a36Sopenharmony_ci flags = ci->i_ceph_flags; 217762306a36Sopenharmony_ci pool = ci->i_layout.pool_id; 217862306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 217962306a36Sopenharmony_cicheck: 218062306a36Sopenharmony_ci if (flags & CEPH_I_POOL_PERM) { 218162306a36Sopenharmony_ci if ((need & CEPH_CAP_FILE_RD) && !(flags & CEPH_I_POOL_RD)) { 218262306a36Sopenharmony_ci dout("ceph_pool_perm_check pool %lld no read perm\n", 218362306a36Sopenharmony_ci pool); 218462306a36Sopenharmony_ci return -EPERM; 218562306a36Sopenharmony_ci } 218662306a36Sopenharmony_ci if ((need & CEPH_CAP_FILE_WR) && !(flags & CEPH_I_POOL_WR)) { 218762306a36Sopenharmony_ci dout("ceph_pool_perm_check pool %lld no write perm\n", 218862306a36Sopenharmony_ci pool); 218962306a36Sopenharmony_ci return -EPERM; 219062306a36Sopenharmony_ci } 219162306a36Sopenharmony_ci return 0; 219262306a36Sopenharmony_ci } 219362306a36Sopenharmony_ci 219462306a36Sopenharmony_ci pool_ns = ceph_try_get_string(ci->i_layout.pool_ns); 219562306a36Sopenharmony_ci ret = __ceph_pool_perm_get(ci, pool, pool_ns); 219662306a36Sopenharmony_ci ceph_put_string(pool_ns); 219762306a36Sopenharmony_ci if (ret < 0) 219862306a36Sopenharmony_ci return ret; 219962306a36Sopenharmony_ci 220062306a36Sopenharmony_ci flags = CEPH_I_POOL_PERM; 220162306a36Sopenharmony_ci if (ret & POOL_READ) 220262306a36Sopenharmony_ci flags |= CEPH_I_POOL_RD; 220362306a36Sopenharmony_ci if (ret & POOL_WRITE) 220462306a36Sopenharmony_ci flags |= CEPH_I_POOL_WR; 220562306a36Sopenharmony_ci 220662306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 220762306a36Sopenharmony_ci if (pool == ci->i_layout.pool_id && 220862306a36Sopenharmony_ci pool_ns == rcu_dereference_raw(ci->i_layout.pool_ns)) { 220962306a36Sopenharmony_ci ci->i_ceph_flags |= flags; 221062306a36Sopenharmony_ci } else { 221162306a36Sopenharmony_ci pool = ci->i_layout.pool_id; 221262306a36Sopenharmony_ci flags = ci->i_ceph_flags; 221362306a36Sopenharmony_ci } 221462306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 221562306a36Sopenharmony_ci goto check; 221662306a36Sopenharmony_ci} 221762306a36Sopenharmony_ci 221862306a36Sopenharmony_civoid ceph_pool_perm_destroy(struct ceph_mds_client *mdsc) 221962306a36Sopenharmony_ci{ 222062306a36Sopenharmony_ci struct ceph_pool_perm *perm; 222162306a36Sopenharmony_ci struct rb_node *n; 222262306a36Sopenharmony_ci 222362306a36Sopenharmony_ci while (!RB_EMPTY_ROOT(&mdsc->pool_perm_tree)) { 222462306a36Sopenharmony_ci n = rb_first(&mdsc->pool_perm_tree); 222562306a36Sopenharmony_ci perm = rb_entry(n, struct ceph_pool_perm, node); 222662306a36Sopenharmony_ci rb_erase(n, &mdsc->pool_perm_tree); 222762306a36Sopenharmony_ci kfree(perm); 222862306a36Sopenharmony_ci } 222962306a36Sopenharmony_ci} 2230