1// SPDX-License-Identifier: GPL-2.0-only 2 3/* 4 * Copyright (C) 2020 Advanced Micro Devices, Inc. 5 */ 6#include <asm/cpu_device_id.h> 7 8#include <linux/bits.h> 9#include <linux/cpu.h> 10#include <linux/cpumask.h> 11#include <linux/delay.h> 12#include <linux/device.h> 13#include <linux/hwmon.h> 14#include <linux/kernel.h> 15#include <linux/kthread.h> 16#include <linux/list.h> 17#include <linux/module.h> 18#include <linux/mutex.h> 19#include <linux/processor.h> 20#include <linux/platform_device.h> 21#include <linux/sched.h> 22#include <linux/slab.h> 23#include <linux/topology.h> 24#include <linux/types.h> 25 26#define DRVNAME "amd_energy" 27 28#define ENERGY_PWR_UNIT_MSR 0xC0010299 29#define ENERGY_CORE_MSR 0xC001029A 30#define ENERGY_PKG_MSR 0xC001029B 31 32#define AMD_ENERGY_UNIT_MASK 0x01F00 33#define AMD_ENERGY_MASK 0xFFFFFFFF 34 35struct sensor_accumulator { 36 u64 energy_ctr; 37 u64 prev_value; 38}; 39 40struct amd_energy_data { 41 struct hwmon_channel_info energy_info; 42 const struct hwmon_channel_info *info[2]; 43 struct hwmon_chip_info chip; 44 struct task_struct *wrap_accumulate; 45 /* Lock around the accumulator */ 46 struct mutex lock; 47 /* An accumulator for each core and socket */ 48 struct sensor_accumulator *accums; 49 unsigned int timeout_ms; 50 /* Energy Status Units */ 51 int energy_units; 52 int nr_cpus; 53 int nr_socks; 54 int core_id; 55 char (*label)[10]; 56}; 57 58static int amd_energy_read_labels(struct device *dev, 59 enum hwmon_sensor_types type, 60 u32 attr, int channel, 61 const char **str) 62{ 63 struct amd_energy_data *data = dev_get_drvdata(dev); 64 65 *str = data->label[channel]; 66 return 0; 67} 68 69static void get_energy_units(struct amd_energy_data *data) 70{ 71 u64 rapl_units; 72 73 rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units); 74 data->energy_units = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8; 75} 76 77static void accumulate_delta(struct amd_energy_data *data, 78 int channel, int cpu, u32 reg) 79{ 80 struct sensor_accumulator *accum; 81 u64 input; 82 83 mutex_lock(&data->lock); 84 rdmsrl_safe_on_cpu(cpu, reg, &input); 85 input &= AMD_ENERGY_MASK; 86 87 accum = &data->accums[channel]; 88 if (input >= accum->prev_value) 89 accum->energy_ctr += 90 input - accum->prev_value; 91 else 92 accum->energy_ctr += UINT_MAX - 93 accum->prev_value + input; 94 95 accum->prev_value = input; 96 mutex_unlock(&data->lock); 97} 98 99static void read_accumulate(struct amd_energy_data *data) 100{ 101 int sock, scpu, cpu; 102 103 for (sock = 0; sock < data->nr_socks; sock++) { 104 scpu = cpumask_first_and(cpu_online_mask, 105 cpumask_of_node(sock)); 106 107 accumulate_delta(data, data->nr_cpus + sock, 108 scpu, ENERGY_PKG_MSR); 109 } 110 111 if (data->core_id >= data->nr_cpus) 112 data->core_id = 0; 113 114 cpu = data->core_id; 115 if (cpu_online(cpu)) 116 accumulate_delta(data, cpu, cpu, ENERGY_CORE_MSR); 117 118 data->core_id++; 119} 120 121static void amd_add_delta(struct amd_energy_data *data, int ch, 122 int cpu, long *val, u32 reg) 123{ 124 struct sensor_accumulator *accum; 125 u64 input; 126 127 mutex_lock(&data->lock); 128 rdmsrl_safe_on_cpu(cpu, reg, &input); 129 input &= AMD_ENERGY_MASK; 130 131 accum = &data->accums[ch]; 132 if (input >= accum->prev_value) 133 input += accum->energy_ctr - 134 accum->prev_value; 135 else 136 input += UINT_MAX - accum->prev_value + 137 accum->energy_ctr; 138 139 /* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) μJoules */ 140 *val = div64_ul(input * 1000000UL, BIT(data->energy_units)); 141 142 mutex_unlock(&data->lock); 143} 144 145static int amd_energy_read(struct device *dev, 146 enum hwmon_sensor_types type, 147 u32 attr, int channel, long *val) 148{ 149 struct amd_energy_data *data = dev_get_drvdata(dev); 150 u32 reg; 151 int cpu; 152 153 if (channel >= data->nr_cpus) { 154 cpu = cpumask_first_and(cpu_online_mask, 155 cpumask_of_node 156 (channel - data->nr_cpus)); 157 reg = ENERGY_PKG_MSR; 158 } else { 159 cpu = channel; 160 if (!cpu_online(cpu)) 161 return -ENODEV; 162 163 reg = ENERGY_CORE_MSR; 164 } 165 amd_add_delta(data, channel, cpu, val, reg); 166 167 return 0; 168} 169 170static umode_t amd_energy_is_visible(const void *_data, 171 enum hwmon_sensor_types type, 172 u32 attr, int channel) 173{ 174 return 0440; 175} 176 177static int energy_accumulator(void *p) 178{ 179 struct amd_energy_data *data = (struct amd_energy_data *)p; 180 unsigned int timeout = data->timeout_ms; 181 182 while (!kthread_should_stop()) { 183 /* 184 * Ignoring the conditions such as 185 * cpu being offline or rdmsr failure 186 */ 187 read_accumulate(data); 188 189 set_current_state(TASK_INTERRUPTIBLE); 190 if (kthread_should_stop()) 191 break; 192 193 schedule_timeout(msecs_to_jiffies(timeout)); 194 } 195 return 0; 196} 197 198static const struct hwmon_ops amd_energy_ops = { 199 .is_visible = amd_energy_is_visible, 200 .read = amd_energy_read, 201 .read_string = amd_energy_read_labels, 202}; 203 204static int amd_create_sensor(struct device *dev, 205 struct amd_energy_data *data, 206 enum hwmon_sensor_types type, u32 config) 207{ 208 struct hwmon_channel_info *info = &data->energy_info; 209 struct sensor_accumulator *accums; 210 int i, num_siblings, cpus, sockets; 211 u32 *s_config; 212 char (*label_l)[10]; 213 214 /* Identify the number of siblings per core */ 215 num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; 216 217 sockets = num_possible_nodes(); 218 219 /* 220 * Energy counter register is accessed at core level. 221 * Hence, filterout the siblings. 222 */ 223 cpus = num_present_cpus() / num_siblings; 224 225 s_config = devm_kcalloc(dev, cpus + sockets + 1, 226 sizeof(u32), GFP_KERNEL); 227 if (!s_config) 228 return -ENOMEM; 229 230 accums = devm_kcalloc(dev, cpus + sockets, 231 sizeof(struct sensor_accumulator), 232 GFP_KERNEL); 233 if (!accums) 234 return -ENOMEM; 235 236 label_l = devm_kcalloc(dev, cpus + sockets, 237 sizeof(*label_l), GFP_KERNEL); 238 if (!label_l) 239 return -ENOMEM; 240 241 info->type = type; 242 info->config = s_config; 243 244 data->nr_cpus = cpus; 245 data->nr_socks = sockets; 246 data->accums = accums; 247 data->label = label_l; 248 249 for (i = 0; i < cpus + sockets; i++) { 250 s_config[i] = config; 251 if (i < cpus) 252 scnprintf(label_l[i], 10, "Ecore%03u", i); 253 else 254 scnprintf(label_l[i], 10, "Esocket%u", (i - cpus)); 255 } 256 257 s_config[i] = 0; 258 return 0; 259} 260 261static int amd_energy_probe(struct platform_device *pdev) 262{ 263 struct device *hwmon_dev; 264 struct amd_energy_data *data; 265 struct device *dev = &pdev->dev; 266 int ret; 267 268 data = devm_kzalloc(dev, 269 sizeof(struct amd_energy_data), GFP_KERNEL); 270 if (!data) 271 return -ENOMEM; 272 273 data->chip.ops = &amd_energy_ops; 274 data->chip.info = data->info; 275 276 dev_set_drvdata(dev, data); 277 /* Populate per-core energy reporting */ 278 data->info[0] = &data->energy_info; 279 ret = amd_create_sensor(dev, data, hwmon_energy, 280 HWMON_E_INPUT | HWMON_E_LABEL); 281 if (ret) 282 return ret; 283 284 mutex_init(&data->lock); 285 get_energy_units(data); 286 287 hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME, 288 data, 289 &data->chip, 290 NULL); 291 if (IS_ERR(hwmon_dev)) 292 return PTR_ERR(hwmon_dev); 293 294 /* 295 * On a system with peak wattage of 250W 296 * timeout = 2 ^ 32 / 2 ^ energy_units / 250 secs 297 */ 298 data->timeout_ms = 1000 * 299 BIT(min(28, 31 - data->energy_units)) / 250; 300 301 data->wrap_accumulate = kthread_run(energy_accumulator, data, 302 "%s", dev_name(hwmon_dev)); 303 return PTR_ERR_OR_ZERO(data->wrap_accumulate); 304} 305 306static int amd_energy_remove(struct platform_device *pdev) 307{ 308 struct amd_energy_data *data = dev_get_drvdata(&pdev->dev); 309 310 if (data && data->wrap_accumulate) 311 kthread_stop(data->wrap_accumulate); 312 313 return 0; 314} 315 316static const struct platform_device_id amd_energy_ids[] = { 317 { .name = DRVNAME, }, 318 {} 319}; 320MODULE_DEVICE_TABLE(platform, amd_energy_ids); 321 322static struct platform_driver amd_energy_driver = { 323 .probe = amd_energy_probe, 324 .remove = amd_energy_remove, 325 .id_table = amd_energy_ids, 326 .driver = { 327 .name = DRVNAME, 328 }, 329}; 330 331static struct platform_device *amd_energy_platdev; 332 333static const struct x86_cpu_id cpu_ids[] __initconst = { 334 X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x31, NULL), 335 {} 336}; 337MODULE_DEVICE_TABLE(x86cpu, cpu_ids); 338 339static int __init amd_energy_init(void) 340{ 341 int ret; 342 343 if (!x86_match_cpu(cpu_ids)) 344 return -ENODEV; 345 346 ret = platform_driver_register(&amd_energy_driver); 347 if (ret) 348 return ret; 349 350 amd_energy_platdev = platform_device_alloc(DRVNAME, 0); 351 if (!amd_energy_platdev) { 352 platform_driver_unregister(&amd_energy_driver); 353 return -ENOMEM; 354 } 355 356 ret = platform_device_add(amd_energy_platdev); 357 if (ret) { 358 platform_device_put(amd_energy_platdev); 359 platform_driver_unregister(&amd_energy_driver); 360 return ret; 361 } 362 363 return ret; 364} 365 366static void __exit amd_energy_exit(void) 367{ 368 platform_device_unregister(amd_energy_platdev); 369 platform_driver_unregister(&amd_energy_driver); 370} 371 372module_init(amd_energy_init); 373module_exit(amd_energy_exit); 374 375MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface"); 376MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>"); 377MODULE_LICENSE("GPL"); 378