18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci#include <linux/ceph/ceph_debug.h> 38c2ecf20Sopenharmony_ci 48c2ecf20Sopenharmony_ci#include <linux/types.h> 58c2ecf20Sopenharmony_ci#include <linux/percpu_counter.h> 68c2ecf20Sopenharmony_ci#include <linux/math64.h> 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include "metric.h" 98c2ecf20Sopenharmony_ci#include "mds_client.h" 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_cistatic bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, 128c2ecf20Sopenharmony_ci struct ceph_mds_session *s) 138c2ecf20Sopenharmony_ci{ 148c2ecf20Sopenharmony_ci struct ceph_metric_head *head; 158c2ecf20Sopenharmony_ci struct ceph_metric_cap *cap; 168c2ecf20Sopenharmony_ci struct ceph_metric_read_latency *read; 178c2ecf20Sopenharmony_ci struct ceph_metric_write_latency *write; 188c2ecf20Sopenharmony_ci struct ceph_metric_metadata_latency *meta; 198c2ecf20Sopenharmony_ci struct ceph_client_metric *m = &mdsc->metric; 208c2ecf20Sopenharmony_ci u64 nr_caps = atomic64_read(&m->total_caps); 218c2ecf20Sopenharmony_ci struct ceph_msg *msg; 228c2ecf20Sopenharmony_ci struct timespec64 ts; 238c2ecf20Sopenharmony_ci s64 sum; 248c2ecf20Sopenharmony_ci s32 items = 0; 258c2ecf20Sopenharmony_ci s32 len; 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write) 288c2ecf20Sopenharmony_ci + sizeof(*meta); 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true); 318c2ecf20Sopenharmony_ci if (!msg) { 328c2ecf20Sopenharmony_ci pr_err("send metrics to mds%d, failed to allocate message\n", 338c2ecf20Sopenharmony_ci s->s_mds); 348c2ecf20Sopenharmony_ci return false; 358c2ecf20Sopenharmony_ci } 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci head = msg->front.iov_base; 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci /* encode the cap metric */ 408c2ecf20Sopenharmony_ci cap = (struct ceph_metric_cap *)(head + 1); 418c2ecf20Sopenharmony_ci cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO); 428c2ecf20Sopenharmony_ci cap->ver = 1; 438c2ecf20Sopenharmony_ci cap->compat = 1; 448c2ecf20Sopenharmony_ci cap->data_len = cpu_to_le32(sizeof(*cap) - 10); 458c2ecf20Sopenharmony_ci cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit)); 468c2ecf20Sopenharmony_ci cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis)); 478c2ecf20Sopenharmony_ci cap->total = cpu_to_le64(nr_caps); 488c2ecf20Sopenharmony_ci items++; 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci /* encode the read latency metric */ 518c2ecf20Sopenharmony_ci read = (struct ceph_metric_read_latency *)(cap + 1); 528c2ecf20Sopenharmony_ci read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY); 538c2ecf20Sopenharmony_ci read->ver = 1; 548c2ecf20Sopenharmony_ci read->compat = 1; 558c2ecf20Sopenharmony_ci read->data_len = cpu_to_le32(sizeof(*read) - 10); 568c2ecf20Sopenharmony_ci sum = m->read_latency_sum; 578c2ecf20Sopenharmony_ci jiffies_to_timespec64(sum, &ts); 588c2ecf20Sopenharmony_ci read->sec = cpu_to_le32(ts.tv_sec); 598c2ecf20Sopenharmony_ci read->nsec = cpu_to_le32(ts.tv_nsec); 608c2ecf20Sopenharmony_ci items++; 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci /* encode the write latency metric */ 638c2ecf20Sopenharmony_ci write = (struct ceph_metric_write_latency *)(read + 1); 648c2ecf20Sopenharmony_ci write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY); 658c2ecf20Sopenharmony_ci write->ver = 1; 668c2ecf20Sopenharmony_ci write->compat = 1; 678c2ecf20Sopenharmony_ci write->data_len = cpu_to_le32(sizeof(*write) - 10); 688c2ecf20Sopenharmony_ci sum = m->write_latency_sum; 698c2ecf20Sopenharmony_ci jiffies_to_timespec64(sum, &ts); 708c2ecf20Sopenharmony_ci write->sec = cpu_to_le32(ts.tv_sec); 718c2ecf20Sopenharmony_ci write->nsec = cpu_to_le32(ts.tv_nsec); 728c2ecf20Sopenharmony_ci items++; 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci /* encode the metadata latency metric */ 758c2ecf20Sopenharmony_ci meta = (struct ceph_metric_metadata_latency *)(write + 1); 768c2ecf20Sopenharmony_ci meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY); 778c2ecf20Sopenharmony_ci meta->ver = 1; 788c2ecf20Sopenharmony_ci meta->compat = 1; 798c2ecf20Sopenharmony_ci meta->data_len = cpu_to_le32(sizeof(*meta) - 10); 808c2ecf20Sopenharmony_ci sum = m->metadata_latency_sum; 818c2ecf20Sopenharmony_ci jiffies_to_timespec64(sum, &ts); 828c2ecf20Sopenharmony_ci meta->sec = cpu_to_le32(ts.tv_sec); 838c2ecf20Sopenharmony_ci meta->nsec = cpu_to_le32(ts.tv_nsec); 848c2ecf20Sopenharmony_ci items++; 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci put_unaligned_le32(items, &head->num); 878c2ecf20Sopenharmony_ci msg->front.iov_len = len; 888c2ecf20Sopenharmony_ci msg->hdr.version = cpu_to_le16(1); 898c2ecf20Sopenharmony_ci msg->hdr.compat_version = cpu_to_le16(1); 908c2ecf20Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 918c2ecf20Sopenharmony_ci dout("client%llu send metrics to mds%d\n", 928c2ecf20Sopenharmony_ci ceph_client_gid(mdsc->fsc->client), s->s_mds); 938c2ecf20Sopenharmony_ci ceph_con_send(&s->s_con, msg); 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci return true; 968c2ecf20Sopenharmony_ci} 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_cistatic void metric_get_session(struct ceph_mds_client *mdsc) 1008c2ecf20Sopenharmony_ci{ 1018c2ecf20Sopenharmony_ci struct ceph_mds_session *s; 1028c2ecf20Sopenharmony_ci int i; 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 1058c2ecf20Sopenharmony_ci for (i = 0; i < mdsc->max_sessions; i++) { 1068c2ecf20Sopenharmony_ci s = __ceph_lookup_mds_session(mdsc, i); 1078c2ecf20Sopenharmony_ci if (!s) 1088c2ecf20Sopenharmony_ci continue; 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci /* 1118c2ecf20Sopenharmony_ci * Skip it if MDS doesn't support the metric collection, 1128c2ecf20Sopenharmony_ci * or the MDS will close the session's socket connection 1138c2ecf20Sopenharmony_ci * directly when it get this message. 1148c2ecf20Sopenharmony_ci */ 1158c2ecf20Sopenharmony_ci if (check_session_state(s) && 1168c2ecf20Sopenharmony_ci test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &s->s_features)) { 1178c2ecf20Sopenharmony_ci mdsc->metric.session = s; 1188c2ecf20Sopenharmony_ci break; 1198c2ecf20Sopenharmony_ci } 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci ceph_put_mds_session(s); 1228c2ecf20Sopenharmony_ci } 1238c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 1248c2ecf20Sopenharmony_ci} 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_cistatic void metric_delayed_work(struct work_struct *work) 1278c2ecf20Sopenharmony_ci{ 1288c2ecf20Sopenharmony_ci struct ceph_client_metric *m = 1298c2ecf20Sopenharmony_ci container_of(work, struct ceph_client_metric, delayed_work.work); 1308c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = 1318c2ecf20Sopenharmony_ci container_of(m, struct ceph_mds_client, metric); 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci if (mdsc->stopping || disable_send_metrics) 1348c2ecf20Sopenharmony_ci return; 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci if (!m->session || !check_session_state(m->session)) { 1378c2ecf20Sopenharmony_ci if (m->session) { 1388c2ecf20Sopenharmony_ci ceph_put_mds_session(m->session); 1398c2ecf20Sopenharmony_ci m->session = NULL; 1408c2ecf20Sopenharmony_ci } 1418c2ecf20Sopenharmony_ci metric_get_session(mdsc); 1428c2ecf20Sopenharmony_ci } 1438c2ecf20Sopenharmony_ci if (m->session) { 1448c2ecf20Sopenharmony_ci ceph_mdsc_send_metrics(mdsc, m->session); 1458c2ecf20Sopenharmony_ci metric_schedule_delayed(m); 1468c2ecf20Sopenharmony_ci } 1478c2ecf20Sopenharmony_ci} 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ciint ceph_metric_init(struct ceph_client_metric *m) 1508c2ecf20Sopenharmony_ci{ 1518c2ecf20Sopenharmony_ci int ret; 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci if (!m) 1548c2ecf20Sopenharmony_ci return -EINVAL; 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci atomic64_set(&m->total_dentries, 0); 1578c2ecf20Sopenharmony_ci ret = percpu_counter_init(&m->d_lease_hit, 0, GFP_KERNEL); 1588c2ecf20Sopenharmony_ci if (ret) 1598c2ecf20Sopenharmony_ci return ret; 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci ret = percpu_counter_init(&m->d_lease_mis, 0, GFP_KERNEL); 1628c2ecf20Sopenharmony_ci if (ret) 1638c2ecf20Sopenharmony_ci goto err_d_lease_mis; 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci atomic64_set(&m->total_caps, 0); 1668c2ecf20Sopenharmony_ci ret = percpu_counter_init(&m->i_caps_hit, 0, GFP_KERNEL); 1678c2ecf20Sopenharmony_ci if (ret) 1688c2ecf20Sopenharmony_ci goto err_i_caps_hit; 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci ret = percpu_counter_init(&m->i_caps_mis, 0, GFP_KERNEL); 1718c2ecf20Sopenharmony_ci if (ret) 1728c2ecf20Sopenharmony_ci goto err_i_caps_mis; 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci spin_lock_init(&m->read_latency_lock); 1758c2ecf20Sopenharmony_ci m->read_latency_sq_sum = 0; 1768c2ecf20Sopenharmony_ci m->read_latency_min = KTIME_MAX; 1778c2ecf20Sopenharmony_ci m->read_latency_max = 0; 1788c2ecf20Sopenharmony_ci m->total_reads = 0; 1798c2ecf20Sopenharmony_ci m->read_latency_sum = 0; 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci spin_lock_init(&m->write_latency_lock); 1828c2ecf20Sopenharmony_ci m->write_latency_sq_sum = 0; 1838c2ecf20Sopenharmony_ci m->write_latency_min = KTIME_MAX; 1848c2ecf20Sopenharmony_ci m->write_latency_max = 0; 1858c2ecf20Sopenharmony_ci m->total_writes = 0; 1868c2ecf20Sopenharmony_ci m->write_latency_sum = 0; 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci spin_lock_init(&m->metadata_latency_lock); 1898c2ecf20Sopenharmony_ci m->metadata_latency_sq_sum = 0; 1908c2ecf20Sopenharmony_ci m->metadata_latency_min = KTIME_MAX; 1918c2ecf20Sopenharmony_ci m->metadata_latency_max = 0; 1928c2ecf20Sopenharmony_ci m->total_metadatas = 0; 1938c2ecf20Sopenharmony_ci m->metadata_latency_sum = 0; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci atomic64_set(&m->opened_files, 0); 1968c2ecf20Sopenharmony_ci ret = percpu_counter_init(&m->opened_inodes, 0, GFP_KERNEL); 1978c2ecf20Sopenharmony_ci if (ret) 1988c2ecf20Sopenharmony_ci goto err_opened_inodes; 1998c2ecf20Sopenharmony_ci ret = percpu_counter_init(&m->total_inodes, 0, GFP_KERNEL); 2008c2ecf20Sopenharmony_ci if (ret) 2018c2ecf20Sopenharmony_ci goto err_total_inodes; 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci m->session = NULL; 2048c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work); 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci return 0; 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_cierr_total_inodes: 2098c2ecf20Sopenharmony_ci percpu_counter_destroy(&m->opened_inodes); 2108c2ecf20Sopenharmony_cierr_opened_inodes: 2118c2ecf20Sopenharmony_ci percpu_counter_destroy(&m->i_caps_mis); 2128c2ecf20Sopenharmony_cierr_i_caps_mis: 2138c2ecf20Sopenharmony_ci percpu_counter_destroy(&m->i_caps_hit); 2148c2ecf20Sopenharmony_cierr_i_caps_hit: 2158c2ecf20Sopenharmony_ci percpu_counter_destroy(&m->d_lease_mis); 2168c2ecf20Sopenharmony_cierr_d_lease_mis: 2178c2ecf20Sopenharmony_ci percpu_counter_destroy(&m->d_lease_hit); 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci return ret; 2208c2ecf20Sopenharmony_ci} 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_civoid ceph_metric_destroy(struct ceph_client_metric *m) 2238c2ecf20Sopenharmony_ci{ 2248c2ecf20Sopenharmony_ci if (!m) 2258c2ecf20Sopenharmony_ci return; 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci cancel_delayed_work_sync(&m->delayed_work); 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci percpu_counter_destroy(&m->total_inodes); 2308c2ecf20Sopenharmony_ci percpu_counter_destroy(&m->opened_inodes); 2318c2ecf20Sopenharmony_ci percpu_counter_destroy(&m->i_caps_mis); 2328c2ecf20Sopenharmony_ci percpu_counter_destroy(&m->i_caps_hit); 2338c2ecf20Sopenharmony_ci percpu_counter_destroy(&m->d_lease_mis); 2348c2ecf20Sopenharmony_ci percpu_counter_destroy(&m->d_lease_hit); 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci ceph_put_mds_session(m->session); 2378c2ecf20Sopenharmony_ci} 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_cistatic inline void __update_latency(ktime_t *totalp, ktime_t *lsump, 2408c2ecf20Sopenharmony_ci ktime_t *min, ktime_t *max, 2418c2ecf20Sopenharmony_ci ktime_t *sq_sump, ktime_t lat) 2428c2ecf20Sopenharmony_ci{ 2438c2ecf20Sopenharmony_ci ktime_t total, avg, sq, lsum; 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci total = ++(*totalp); 2468c2ecf20Sopenharmony_ci lsum = (*lsump += lat); 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci if (unlikely(lat < *min)) 2498c2ecf20Sopenharmony_ci *min = lat; 2508c2ecf20Sopenharmony_ci if (unlikely(lat > *max)) 2518c2ecf20Sopenharmony_ci *max = lat; 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci if (unlikely(total == 1)) 2548c2ecf20Sopenharmony_ci return; 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci /* the sq is (lat - old_avg) * (lat - new_avg) */ 2578c2ecf20Sopenharmony_ci avg = DIV64_U64_ROUND_CLOSEST((lsum - lat), (total - 1)); 2588c2ecf20Sopenharmony_ci sq = lat - avg; 2598c2ecf20Sopenharmony_ci avg = DIV64_U64_ROUND_CLOSEST(lsum, total); 2608c2ecf20Sopenharmony_ci sq = sq * (lat - avg); 2618c2ecf20Sopenharmony_ci *sq_sump += sq; 2628c2ecf20Sopenharmony_ci} 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_civoid ceph_update_read_latency(struct ceph_client_metric *m, 2658c2ecf20Sopenharmony_ci ktime_t r_start, ktime_t r_end, 2668c2ecf20Sopenharmony_ci int rc) 2678c2ecf20Sopenharmony_ci{ 2688c2ecf20Sopenharmony_ci ktime_t lat = ktime_sub(r_end, r_start); 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_ci if (unlikely(rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT)) 2718c2ecf20Sopenharmony_ci return; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci spin_lock(&m->read_latency_lock); 2748c2ecf20Sopenharmony_ci __update_latency(&m->total_reads, &m->read_latency_sum, 2758c2ecf20Sopenharmony_ci &m->read_latency_min, &m->read_latency_max, 2768c2ecf20Sopenharmony_ci &m->read_latency_sq_sum, lat); 2778c2ecf20Sopenharmony_ci spin_unlock(&m->read_latency_lock); 2788c2ecf20Sopenharmony_ci} 2798c2ecf20Sopenharmony_ci 2808c2ecf20Sopenharmony_civoid ceph_update_write_latency(struct ceph_client_metric *m, 2818c2ecf20Sopenharmony_ci ktime_t r_start, ktime_t r_end, 2828c2ecf20Sopenharmony_ci int rc) 2838c2ecf20Sopenharmony_ci{ 2848c2ecf20Sopenharmony_ci ktime_t lat = ktime_sub(r_end, r_start); 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci if (unlikely(rc && rc != -ETIMEDOUT)) 2878c2ecf20Sopenharmony_ci return; 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci spin_lock(&m->write_latency_lock); 2908c2ecf20Sopenharmony_ci __update_latency(&m->total_writes, &m->write_latency_sum, 2918c2ecf20Sopenharmony_ci &m->write_latency_min, &m->write_latency_max, 2928c2ecf20Sopenharmony_ci &m->write_latency_sq_sum, lat); 2938c2ecf20Sopenharmony_ci spin_unlock(&m->write_latency_lock); 2948c2ecf20Sopenharmony_ci} 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_civoid ceph_update_metadata_latency(struct ceph_client_metric *m, 2978c2ecf20Sopenharmony_ci ktime_t r_start, ktime_t r_end, 2988c2ecf20Sopenharmony_ci int rc) 2998c2ecf20Sopenharmony_ci{ 3008c2ecf20Sopenharmony_ci ktime_t lat = ktime_sub(r_end, r_start); 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci if (unlikely(rc && rc != -ENOENT)) 3038c2ecf20Sopenharmony_ci return; 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci spin_lock(&m->metadata_latency_lock); 3068c2ecf20Sopenharmony_ci __update_latency(&m->total_metadatas, &m->metadata_latency_sum, 3078c2ecf20Sopenharmony_ci &m->metadata_latency_min, &m->metadata_latency_max, 3088c2ecf20Sopenharmony_ci &m->metadata_latency_sq_sum, lat); 3098c2ecf20Sopenharmony_ci spin_unlock(&m->metadata_latency_lock); 3108c2ecf20Sopenharmony_ci} 311