1// SPDX-License-Identifier: GPL-2.0-only
2
3/*
4 * Copyright (C) 2020 Advanced Micro Devices, Inc.
5 */
6#include <asm/cpu_device_id.h>
7
8#include <linux/bits.h>
9#include <linux/cpu.h>
10#include <linux/cpumask.h>
11#include <linux/delay.h>
12#include <linux/device.h>
13#include <linux/hwmon.h>
14#include <linux/kernel.h>
15#include <linux/kthread.h>
16#include <linux/list.h>
17#include <linux/module.h>
18#include <linux/mutex.h>
19#include <linux/processor.h>
20#include <linux/platform_device.h>
21#include <linux/sched.h>
22#include <linux/slab.h>
23#include <linux/topology.h>
24#include <linux/types.h>
25
26#define DRVNAME			"amd_energy"
27
28#define ENERGY_PWR_UNIT_MSR	0xC0010299
29#define ENERGY_CORE_MSR		0xC001029A
30#define ENERGY_PKG_MSR		0xC001029B
31
32#define AMD_ENERGY_UNIT_MASK	0x01F00
33#define AMD_ENERGY_MASK		0xFFFFFFFF
34
35struct sensor_accumulator {
36	u64 energy_ctr;
37	u64 prev_value;
38};
39
40struct amd_energy_data {
41	struct hwmon_channel_info energy_info;
42	const struct hwmon_channel_info *info[2];
43	struct hwmon_chip_info chip;
44	struct task_struct *wrap_accumulate;
45	/* Lock around the accumulator */
46	struct mutex lock;
47	/* An accumulator for each core and socket */
48	struct sensor_accumulator *accums;
49	unsigned int timeout_ms;
50	/* Energy Status Units */
51	int energy_units;
52	int nr_cpus;
53	int nr_socks;
54	int core_id;
55	char (*label)[10];
56};
57
58static int amd_energy_read_labels(struct device *dev,
59				  enum hwmon_sensor_types type,
60				  u32 attr, int channel,
61				  const char **str)
62{
63	struct amd_energy_data *data = dev_get_drvdata(dev);
64
65	*str = data->label[channel];
66	return 0;
67}
68
69static void get_energy_units(struct amd_energy_data *data)
70{
71	u64 rapl_units;
72
73	rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
74	data->energy_units = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
75}
76
77static void accumulate_delta(struct amd_energy_data *data,
78			     int channel, int cpu, u32 reg)
79{
80	struct sensor_accumulator *accum;
81	u64 input;
82
83	mutex_lock(&data->lock);
84	rdmsrl_safe_on_cpu(cpu, reg, &input);
85	input &= AMD_ENERGY_MASK;
86
87	accum = &data->accums[channel];
88	if (input >= accum->prev_value)
89		accum->energy_ctr +=
90			input - accum->prev_value;
91	else
92		accum->energy_ctr += UINT_MAX -
93			accum->prev_value + input;
94
95	accum->prev_value = input;
96	mutex_unlock(&data->lock);
97}
98
99static void read_accumulate(struct amd_energy_data *data)
100{
101	int sock, scpu, cpu;
102
103	for (sock = 0; sock < data->nr_socks; sock++) {
104		scpu = cpumask_first_and(cpu_online_mask,
105					 cpumask_of_node(sock));
106
107		accumulate_delta(data, data->nr_cpus + sock,
108				 scpu, ENERGY_PKG_MSR);
109	}
110
111	if (data->core_id >= data->nr_cpus)
112		data->core_id = 0;
113
114	cpu = data->core_id;
115	if (cpu_online(cpu))
116		accumulate_delta(data, cpu, cpu, ENERGY_CORE_MSR);
117
118	data->core_id++;
119}
120
121static void amd_add_delta(struct amd_energy_data *data, int ch,
122			  int cpu, long *val, u32 reg)
123{
124	struct sensor_accumulator *accum;
125	u64 input;
126
127	mutex_lock(&data->lock);
128	rdmsrl_safe_on_cpu(cpu, reg, &input);
129	input &= AMD_ENERGY_MASK;
130
131	accum = &data->accums[ch];
132	if (input >= accum->prev_value)
133		input += accum->energy_ctr -
134				accum->prev_value;
135	else
136		input += UINT_MAX - accum->prev_value +
137				accum->energy_ctr;
138
139	/* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) μJoules */
140	*val = div64_ul(input * 1000000UL, BIT(data->energy_units));
141
142	mutex_unlock(&data->lock);
143}
144
145static int amd_energy_read(struct device *dev,
146			   enum hwmon_sensor_types type,
147			   u32 attr, int channel, long *val)
148{
149	struct amd_energy_data *data = dev_get_drvdata(dev);
150	u32 reg;
151	int cpu;
152
153	if (channel >= data->nr_cpus) {
154		cpu = cpumask_first_and(cpu_online_mask,
155					cpumask_of_node
156					(channel - data->nr_cpus));
157		reg = ENERGY_PKG_MSR;
158	} else {
159		cpu = channel;
160		if (!cpu_online(cpu))
161			return -ENODEV;
162
163		reg = ENERGY_CORE_MSR;
164	}
165	amd_add_delta(data, channel, cpu, val, reg);
166
167	return 0;
168}
169
170static umode_t amd_energy_is_visible(const void *_data,
171				     enum hwmon_sensor_types type,
172				     u32 attr, int channel)
173{
174	return 0440;
175}
176
177static int energy_accumulator(void *p)
178{
179	struct amd_energy_data *data = (struct amd_energy_data *)p;
180	unsigned int timeout = data->timeout_ms;
181
182	while (!kthread_should_stop()) {
183		/*
184		 * Ignoring the conditions such as
185		 * cpu being offline or rdmsr failure
186		 */
187		read_accumulate(data);
188
189		set_current_state(TASK_INTERRUPTIBLE);
190		if (kthread_should_stop())
191			break;
192
193		schedule_timeout(msecs_to_jiffies(timeout));
194	}
195	return 0;
196}
197
198static const struct hwmon_ops amd_energy_ops = {
199	.is_visible = amd_energy_is_visible,
200	.read = amd_energy_read,
201	.read_string = amd_energy_read_labels,
202};
203
204static int amd_create_sensor(struct device *dev,
205			     struct amd_energy_data *data,
206			     enum hwmon_sensor_types type, u32 config)
207{
208	struct hwmon_channel_info *info = &data->energy_info;
209	struct sensor_accumulator *accums;
210	int i, num_siblings, cpus, sockets;
211	u32 *s_config;
212	char (*label_l)[10];
213
214	/* Identify the number of siblings per core */
215	num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
216
217	sockets = num_possible_nodes();
218
219	/*
220	 * Energy counter register is accessed at core level.
221	 * Hence, filterout the siblings.
222	 */
223	cpus = num_present_cpus() / num_siblings;
224
225	s_config = devm_kcalloc(dev, cpus + sockets + 1,
226				sizeof(u32), GFP_KERNEL);
227	if (!s_config)
228		return -ENOMEM;
229
230	accums = devm_kcalloc(dev, cpus + sockets,
231			      sizeof(struct sensor_accumulator),
232			      GFP_KERNEL);
233	if (!accums)
234		return -ENOMEM;
235
236	label_l = devm_kcalloc(dev, cpus + sockets,
237			       sizeof(*label_l), GFP_KERNEL);
238	if (!label_l)
239		return -ENOMEM;
240
241	info->type = type;
242	info->config = s_config;
243
244	data->nr_cpus = cpus;
245	data->nr_socks = sockets;
246	data->accums = accums;
247	data->label = label_l;
248
249	for (i = 0; i < cpus + sockets; i++) {
250		s_config[i] = config;
251		if (i < cpus)
252			scnprintf(label_l[i], 10, "Ecore%03u", i);
253		else
254			scnprintf(label_l[i], 10, "Esocket%u", (i - cpus));
255	}
256
257	s_config[i] = 0;
258	return 0;
259}
260
261static int amd_energy_probe(struct platform_device *pdev)
262{
263	struct device *hwmon_dev;
264	struct amd_energy_data *data;
265	struct device *dev = &pdev->dev;
266	int ret;
267
268	data = devm_kzalloc(dev,
269			    sizeof(struct amd_energy_data), GFP_KERNEL);
270	if (!data)
271		return -ENOMEM;
272
273	data->chip.ops = &amd_energy_ops;
274	data->chip.info = data->info;
275
276	dev_set_drvdata(dev, data);
277	/* Populate per-core energy reporting */
278	data->info[0] = &data->energy_info;
279	ret = amd_create_sensor(dev, data, hwmon_energy,
280				HWMON_E_INPUT | HWMON_E_LABEL);
281	if (ret)
282		return ret;
283
284	mutex_init(&data->lock);
285	get_energy_units(data);
286
287	hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
288							 data,
289							 &data->chip,
290							 NULL);
291	if (IS_ERR(hwmon_dev))
292		return PTR_ERR(hwmon_dev);
293
294	/*
295	 * On a system with peak wattage of 250W
296	 * timeout = 2 ^ 32 / 2 ^ energy_units / 250 secs
297	 */
298	data->timeout_ms = 1000 *
299			   BIT(min(28, 31 - data->energy_units)) / 250;
300
301	data->wrap_accumulate = kthread_run(energy_accumulator, data,
302					    "%s", dev_name(hwmon_dev));
303	return PTR_ERR_OR_ZERO(data->wrap_accumulate);
304}
305
306static int amd_energy_remove(struct platform_device *pdev)
307{
308	struct amd_energy_data *data = dev_get_drvdata(&pdev->dev);
309
310	if (data && data->wrap_accumulate)
311		kthread_stop(data->wrap_accumulate);
312
313	return 0;
314}
315
316static const struct platform_device_id amd_energy_ids[] = {
317	{ .name = DRVNAME, },
318	{}
319};
320MODULE_DEVICE_TABLE(platform, amd_energy_ids);
321
322static struct platform_driver amd_energy_driver = {
323	.probe = amd_energy_probe,
324	.remove	= amd_energy_remove,
325	.id_table = amd_energy_ids,
326	.driver = {
327		.name = DRVNAME,
328	},
329};
330
331static struct platform_device *amd_energy_platdev;
332
333static const struct x86_cpu_id cpu_ids[] __initconst = {
334	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x31, NULL),
335	{}
336};
337MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
338
339static int __init amd_energy_init(void)
340{
341	int ret;
342
343	if (!x86_match_cpu(cpu_ids))
344		return -ENODEV;
345
346	ret = platform_driver_register(&amd_energy_driver);
347	if (ret)
348		return ret;
349
350	amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
351	if (!amd_energy_platdev) {
352		platform_driver_unregister(&amd_energy_driver);
353		return -ENOMEM;
354	}
355
356	ret = platform_device_add(amd_energy_platdev);
357	if (ret) {
358		platform_device_put(amd_energy_platdev);
359		platform_driver_unregister(&amd_energy_driver);
360		return ret;
361	}
362
363	return ret;
364}
365
366static void __exit amd_energy_exit(void)
367{
368	platform_device_unregister(amd_energy_platdev);
369	platform_driver_unregister(&amd_energy_driver);
370}
371
372module_init(amd_energy_init);
373module_exit(amd_energy_exit);
374
375MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
376MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
377MODULE_LICENSE("GPL");
378