1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * intel_powerclamp.c - package c-state idle injection
4 *
5 * Copyright (c) 2012, Intel Corporation.
6 *
7 * Authors:
8 *     Arjan van de Ven <arjan@linux.intel.com>
9 *     Jacob Pan <jacob.jun.pan@linux.intel.com>
10 *
11 *	TODO:
12 *           1. better handle wakeup from external interrupts, currently a fixed
13 *              compensation is added to clamping duration when excessive amount
14 *              of wakeups are observed during idle time. the reason is that in
15 *              case of external interrupts without need for ack, clamping down
16 *              cpu in non-irq context does not reduce irq. for majority of the
17 *              cases, clamping down cpu does help reduce irq as well, we should
18 *              be able to differentiate the two cases and give a quantitative
19 *              solution for the irqs that we can control. perhaps based on
20 *              get_cpu_iowait_time_us()
21 *
22 *	     2. synchronization with other hw blocks
23 */
24
25#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
26
27#include <linux/module.h>
28#include <linux/kernel.h>
29#include <linux/delay.h>
30#include <linux/kthread.h>
31#include <linux/cpu.h>
32#include <linux/thermal.h>
33#include <linux/slab.h>
34#include <linux/tick.h>
35#include <linux/debugfs.h>
36#include <linux/seq_file.h>
37#include <linux/sched/rt.h>
38#include <uapi/linux/sched/types.h>
39
40#include <asm/nmi.h>
41#include <asm/msr.h>
42#include <asm/mwait.h>
43#include <asm/cpu_device_id.h>
44#include <asm/hardirq.h>
45
46#define MAX_TARGET_RATIO (50U)
47/* For each undisturbed clamping period (no extra wake ups during idle time),
48 * we increment the confidence counter for the given target ratio.
49 * CONFIDENCE_OK defines the level where runtime calibration results are
50 * valid.
51 */
52#define CONFIDENCE_OK (3)
53/* Default idle injection duration, driver adjust sleep time to meet target
54 * idle ratio. Similar to frequency modulation.
55 */
56#define DEFAULT_DURATION_JIFFIES (6)
57
58static unsigned int target_mwait;
59static struct dentry *debug_dir;
60static bool poll_pkg_cstate_enable;
61
62/* user selected target */
63static unsigned int set_target_ratio;
64static unsigned int current_ratio;
65static bool should_skip;
66static bool reduce_irq;
67static atomic_t idle_wakeup_counter;
68static unsigned int control_cpu; /* The cpu assigned to collect stat and update
69				  * control parameters. default to BSP but BSP
70				  * can be offlined.
71				  */
72static bool clamping;
73
74struct powerclamp_worker_data {
75	struct kthread_worker *worker;
76	struct kthread_work balancing_work;
77	struct kthread_delayed_work idle_injection_work;
78	unsigned int cpu;
79	unsigned int count;
80	unsigned int guard;
81	unsigned int window_size_now;
82	unsigned int target_ratio;
83	unsigned int duration_jiffies;
84	bool clamping;
85};
86
87static struct powerclamp_worker_data __percpu *worker_data;
88static struct thermal_cooling_device *cooling_dev;
89static unsigned long *cpu_clamping_mask;  /* bit map for tracking per cpu
90					   * clamping kthread worker
91					   */
92
93static unsigned int duration;
94static unsigned int pkg_cstate_ratio_cur;
95static unsigned int window_size;
96
97static int duration_set(const char *arg, const struct kernel_param *kp)
98{
99	int ret = 0;
100	unsigned long new_duration;
101
102	ret = kstrtoul(arg, 10, &new_duration);
103	if (ret)
104		goto exit;
105	if (new_duration > 25 || new_duration < 6) {
106		pr_err("Out of recommended range %lu, between 6-25ms\n",
107			new_duration);
108		ret = -EINVAL;
109	}
110
111	duration = clamp(new_duration, 6ul, 25ul);
112	smp_mb();
113
114exit:
115
116	return ret;
117}
118
119static const struct kernel_param_ops duration_ops = {
120	.set = duration_set,
121	.get = param_get_int,
122};
123
124
125module_param_cb(duration, &duration_ops, &duration, 0644);
126MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
127
128struct powerclamp_calibration_data {
129	unsigned long confidence;  /* used for calibration, basically a counter
130				    * gets incremented each time a clamping
131				    * period is completed without extra wakeups
132				    * once that counter is reached given level,
133				    * compensation is deemed usable.
134				    */
135	unsigned long steady_comp; /* steady state compensation used when
136				    * no extra wakeups occurred.
137				    */
138	unsigned long dynamic_comp; /* compensate excessive wakeup from idle
139				     * mostly from external interrupts.
140				     */
141};
142
143static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
144
145static int window_size_set(const char *arg, const struct kernel_param *kp)
146{
147	int ret = 0;
148	unsigned long new_window_size;
149
150	ret = kstrtoul(arg, 10, &new_window_size);
151	if (ret)
152		goto exit_win;
153	if (new_window_size > 10 || new_window_size < 2) {
154		pr_err("Out of recommended window size %lu, between 2-10\n",
155			new_window_size);
156		ret = -EINVAL;
157	}
158
159	window_size = clamp(new_window_size, 2ul, 10ul);
160	smp_mb();
161
162exit_win:
163
164	return ret;
165}
166
167static const struct kernel_param_ops window_size_ops = {
168	.set = window_size_set,
169	.get = param_get_int,
170};
171
172module_param_cb(window_size, &window_size_ops, &window_size, 0644);
173MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
174	"\tpowerclamp controls idle ratio within this window. larger\n"
175	"\twindow size results in slower response time but more smooth\n"
176	"\tclamping results. default to 2.");
177
178static void find_target_mwait(void)
179{
180	unsigned int eax, ebx, ecx, edx;
181	unsigned int highest_cstate = 0;
182	unsigned int highest_subcstate = 0;
183	int i;
184
185	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
186		return;
187
188	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
189
190	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
191	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
192		return;
193
194	edx >>= MWAIT_SUBSTATE_SIZE;
195	for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
196		if (edx & MWAIT_SUBSTATE_MASK) {
197			highest_cstate = i;
198			highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
199		}
200	}
201	target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
202		(highest_subcstate - 1);
203
204}
205
206struct pkg_cstate_info {
207	bool skip;
208	int msr_index;
209	int cstate_id;
210};
211
212#define PKG_CSTATE_INIT(id) {				\
213		.msr_index = MSR_PKG_C##id##_RESIDENCY, \
214		.cstate_id = id				\
215			}
216
217static struct pkg_cstate_info pkg_cstates[] = {
218	PKG_CSTATE_INIT(2),
219	PKG_CSTATE_INIT(3),
220	PKG_CSTATE_INIT(6),
221	PKG_CSTATE_INIT(7),
222	PKG_CSTATE_INIT(8),
223	PKG_CSTATE_INIT(9),
224	PKG_CSTATE_INIT(10),
225	{NULL},
226};
227
228static bool has_pkg_state_counter(void)
229{
230	u64 val;
231	struct pkg_cstate_info *info = pkg_cstates;
232
233	/* check if any one of the counter msrs exists */
234	while (info->msr_index) {
235		if (!rdmsrl_safe(info->msr_index, &val))
236			return true;
237		info++;
238	}
239
240	return false;
241}
242
243static u64 pkg_state_counter(void)
244{
245	u64 val;
246	u64 count = 0;
247	struct pkg_cstate_info *info = pkg_cstates;
248
249	while (info->msr_index) {
250		if (!info->skip) {
251			if (!rdmsrl_safe(info->msr_index, &val))
252				count += val;
253			else
254				info->skip = true;
255		}
256		info++;
257	}
258
259	return count;
260}
261
262static unsigned int get_compensation(int ratio)
263{
264	unsigned int comp = 0;
265
266	if (!poll_pkg_cstate_enable)
267		return 0;
268
269	/* we only use compensation if all adjacent ones are good */
270	if (ratio == 1 &&
271		cal_data[ratio].confidence >= CONFIDENCE_OK &&
272		cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
273		cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
274		comp = (cal_data[ratio].steady_comp +
275			cal_data[ratio + 1].steady_comp +
276			cal_data[ratio + 2].steady_comp) / 3;
277	} else if (ratio == MAX_TARGET_RATIO - 1 &&
278		cal_data[ratio].confidence >= CONFIDENCE_OK &&
279		cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
280		cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
281		comp = (cal_data[ratio].steady_comp +
282			cal_data[ratio - 1].steady_comp +
283			cal_data[ratio - 2].steady_comp) / 3;
284	} else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
285		cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
286		cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
287		comp = (cal_data[ratio].steady_comp +
288			cal_data[ratio - 1].steady_comp +
289			cal_data[ratio + 1].steady_comp) / 3;
290	}
291
292	/* REVISIT: simple penalty of double idle injection */
293	if (reduce_irq)
294		comp = ratio;
295	/* do not exceed limit */
296	if (comp + ratio >= MAX_TARGET_RATIO)
297		comp = MAX_TARGET_RATIO - ratio - 1;
298
299	return comp;
300}
301
302static void adjust_compensation(int target_ratio, unsigned int win)
303{
304	int delta;
305	struct powerclamp_calibration_data *d = &cal_data[target_ratio];
306
307	/*
308	 * adjust compensations if confidence level has not been reached or
309	 * there are too many wakeups during the last idle injection period, we
310	 * cannot trust the data for compensation.
311	 */
312	if (d->confidence >= CONFIDENCE_OK ||
313		atomic_read(&idle_wakeup_counter) >
314		win * num_online_cpus())
315		return;
316
317	delta = set_target_ratio - current_ratio;
318	/* filter out bad data */
319	if (delta >= 0 && delta <= (1+target_ratio/10)) {
320		if (d->steady_comp)
321			d->steady_comp =
322				roundup(delta+d->steady_comp, 2)/2;
323		else
324			d->steady_comp = delta;
325		d->confidence++;
326	}
327}
328
329static bool powerclamp_adjust_controls(unsigned int target_ratio,
330				unsigned int guard, unsigned int win)
331{
332	static u64 msr_last, tsc_last;
333	u64 msr_now, tsc_now;
334	u64 val64;
335
336	/* check result for the last window */
337	msr_now = pkg_state_counter();
338	tsc_now = rdtsc();
339
340	/* calculate pkg cstate vs tsc ratio */
341	if (!msr_last || !tsc_last)
342		current_ratio = 1;
343	else if (tsc_now-tsc_last) {
344		val64 = 100*(msr_now-msr_last);
345		do_div(val64, (tsc_now-tsc_last));
346		current_ratio = val64;
347	}
348
349	/* update record */
350	msr_last = msr_now;
351	tsc_last = tsc_now;
352
353	adjust_compensation(target_ratio, win);
354	/*
355	 * too many external interrupts, set flag such
356	 * that we can take measure later.
357	 */
358	reduce_irq = atomic_read(&idle_wakeup_counter) >=
359		2 * win * num_online_cpus();
360
361	atomic_set(&idle_wakeup_counter, 0);
362	/* if we are above target+guard, skip */
363	return set_target_ratio + guard <= current_ratio;
364}
365
366static void clamp_balancing_func(struct kthread_work *work)
367{
368	struct powerclamp_worker_data *w_data;
369	int sleeptime;
370	unsigned long target_jiffies;
371	unsigned int compensated_ratio;
372	int interval; /* jiffies to sleep for each attempt */
373
374	w_data = container_of(work, struct powerclamp_worker_data,
375			      balancing_work);
376
377	/*
378	 * make sure user selected ratio does not take effect until
379	 * the next round. adjust target_ratio if user has changed
380	 * target such that we can converge quickly.
381	 */
382	w_data->target_ratio = READ_ONCE(set_target_ratio);
383	w_data->guard = 1 + w_data->target_ratio / 20;
384	w_data->window_size_now = window_size;
385	w_data->duration_jiffies = msecs_to_jiffies(duration);
386	w_data->count++;
387
388	/*
389	 * systems may have different ability to enter package level
390	 * c-states, thus we need to compensate the injected idle ratio
391	 * to achieve the actual target reported by the HW.
392	 */
393	compensated_ratio = w_data->target_ratio +
394		get_compensation(w_data->target_ratio);
395	if (compensated_ratio <= 0)
396		compensated_ratio = 1;
397	interval = w_data->duration_jiffies * 100 / compensated_ratio;
398
399	/* align idle time */
400	target_jiffies = roundup(jiffies, interval);
401	sleeptime = target_jiffies - jiffies;
402	if (sleeptime <= 0)
403		sleeptime = 1;
404
405	if (clamping && w_data->clamping && cpu_online(w_data->cpu))
406		kthread_queue_delayed_work(w_data->worker,
407					   &w_data->idle_injection_work,
408					   sleeptime);
409}
410
411static void clamp_idle_injection_func(struct kthread_work *work)
412{
413	struct powerclamp_worker_data *w_data;
414
415	w_data = container_of(work, struct powerclamp_worker_data,
416			      idle_injection_work.work);
417
418	/*
419	 * only elected controlling cpu can collect stats and update
420	 * control parameters.
421	 */
422	if (w_data->cpu == control_cpu &&
423	    !(w_data->count % w_data->window_size_now)) {
424		should_skip =
425			powerclamp_adjust_controls(w_data->target_ratio,
426						   w_data->guard,
427						   w_data->window_size_now);
428		smp_mb();
429	}
430
431	if (should_skip)
432		goto balance;
433
434	play_idle(jiffies_to_usecs(w_data->duration_jiffies));
435
436balance:
437	if (clamping && w_data->clamping && cpu_online(w_data->cpu))
438		kthread_queue_work(w_data->worker, &w_data->balancing_work);
439}
440
441/*
442 * 1 HZ polling while clamping is active, useful for userspace
443 * to monitor actual idle ratio.
444 */
445static void poll_pkg_cstate(struct work_struct *dummy);
446static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
447static void poll_pkg_cstate(struct work_struct *dummy)
448{
449	static u64 msr_last;
450	static u64 tsc_last;
451
452	u64 msr_now;
453	u64 tsc_now;
454	u64 val64;
455
456	msr_now = pkg_state_counter();
457	tsc_now = rdtsc();
458
459	/* calculate pkg cstate vs tsc ratio */
460	if (!msr_last || !tsc_last)
461		pkg_cstate_ratio_cur = 1;
462	else {
463		if (tsc_now - tsc_last) {
464			val64 = 100 * (msr_now - msr_last);
465			do_div(val64, (tsc_now - tsc_last));
466			pkg_cstate_ratio_cur = val64;
467		}
468	}
469
470	/* update record */
471	msr_last = msr_now;
472	tsc_last = tsc_now;
473
474	if (true == clamping)
475		schedule_delayed_work(&poll_pkg_cstate_work, HZ);
476}
477
478static void start_power_clamp_worker(unsigned long cpu)
479{
480	struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
481	struct kthread_worker *worker;
482
483	worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu);
484	if (IS_ERR(worker))
485		return;
486
487	w_data->worker = worker;
488	w_data->count = 0;
489	w_data->cpu = cpu;
490	w_data->clamping = true;
491	set_bit(cpu, cpu_clamping_mask);
492	sched_set_fifo(worker->task);
493	kthread_init_work(&w_data->balancing_work, clamp_balancing_func);
494	kthread_init_delayed_work(&w_data->idle_injection_work,
495				  clamp_idle_injection_func);
496	kthread_queue_work(w_data->worker, &w_data->balancing_work);
497}
498
499static void stop_power_clamp_worker(unsigned long cpu)
500{
501	struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
502
503	if (!w_data->worker)
504		return;
505
506	w_data->clamping = false;
507	/*
508	 * Make sure that all works that get queued after this point see
509	 * the clamping disabled. The counter part is not needed because
510	 * there is an implicit memory barrier when the queued work
511	 * is proceed.
512	 */
513	smp_wmb();
514	kthread_cancel_work_sync(&w_data->balancing_work);
515	kthread_cancel_delayed_work_sync(&w_data->idle_injection_work);
516	/*
517	 * The balancing work still might be queued here because
518	 * the handling of the "clapming" variable, cancel, and queue
519	 * operations are not synchronized via a lock. But it is not
520	 * a big deal. The balancing work is fast and destroy kthread
521	 * will wait for it.
522	 */
523	clear_bit(w_data->cpu, cpu_clamping_mask);
524	kthread_destroy_worker(w_data->worker);
525
526	w_data->worker = NULL;
527}
528
529static int start_power_clamp(void)
530{
531	unsigned long cpu;
532
533	set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
534	/* prevent cpu hotplug */
535	get_online_cpus();
536
537	/* prefer BSP */
538	control_cpu = cpumask_first(cpu_online_mask);
539
540	clamping = true;
541	if (poll_pkg_cstate_enable)
542		schedule_delayed_work(&poll_pkg_cstate_work, 0);
543
544	/* start one kthread worker per online cpu */
545	for_each_online_cpu(cpu) {
546		start_power_clamp_worker(cpu);
547	}
548	put_online_cpus();
549
550	return 0;
551}
552
553static void end_power_clamp(void)
554{
555	int i;
556
557	/*
558	 * Block requeuing in all the kthread workers. They will flush and
559	 * stop faster.
560	 */
561	clamping = false;
562	if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
563		for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
564			pr_debug("clamping worker for cpu %d alive, destroy\n",
565				 i);
566			stop_power_clamp_worker(i);
567		}
568	}
569}
570
571static int powerclamp_cpu_online(unsigned int cpu)
572{
573	if (clamping == false)
574		return 0;
575	start_power_clamp_worker(cpu);
576	/* prefer BSP as controlling CPU */
577	if (cpu == 0) {
578		control_cpu = 0;
579		smp_mb();
580	}
581	return 0;
582}
583
584static int powerclamp_cpu_predown(unsigned int cpu)
585{
586	if (clamping == false)
587		return 0;
588
589	stop_power_clamp_worker(cpu);
590	if (cpu != control_cpu)
591		return 0;
592
593	control_cpu = cpumask_first(cpu_online_mask);
594	if (control_cpu == cpu)
595		control_cpu = cpumask_next(cpu, cpu_online_mask);
596	smp_mb();
597	return 0;
598}
599
600static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
601				 unsigned long *state)
602{
603	*state = MAX_TARGET_RATIO;
604
605	return 0;
606}
607
608static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
609				 unsigned long *state)
610{
611	if (clamping) {
612		if (poll_pkg_cstate_enable)
613			*state = pkg_cstate_ratio_cur;
614		else
615			*state = set_target_ratio;
616	} else {
617		/* to save power, do not poll idle ratio while not clamping */
618		*state = -1; /* indicates invalid state */
619	}
620
621	return 0;
622}
623
624static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
625				 unsigned long new_target_ratio)
626{
627	int ret = 0;
628
629	new_target_ratio = clamp(new_target_ratio, 0UL,
630				(unsigned long) (MAX_TARGET_RATIO-1));
631	if (set_target_ratio == 0 && new_target_ratio > 0) {
632		pr_info("Start idle injection to reduce power\n");
633		set_target_ratio = new_target_ratio;
634		ret = start_power_clamp();
635		goto exit_set;
636	} else	if (set_target_ratio > 0 && new_target_ratio == 0) {
637		pr_info("Stop forced idle injection\n");
638		end_power_clamp();
639		set_target_ratio = 0;
640	} else	/* adjust currently running */ {
641		set_target_ratio = new_target_ratio;
642		/* make new set_target_ratio visible to other cpus */
643		smp_mb();
644	}
645
646exit_set:
647	return ret;
648}
649
650/* bind to generic thermal layer as cooling device*/
651static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
652	.get_max_state = powerclamp_get_max_state,
653	.get_cur_state = powerclamp_get_cur_state,
654	.set_cur_state = powerclamp_set_cur_state,
655};
656
657static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
658	X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL),
659	{}
660};
661MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
662
663static int __init powerclamp_probe(void)
664{
665
666	if (!x86_match_cpu(intel_powerclamp_ids)) {
667		pr_err("CPU does not support MWAIT\n");
668		return -ENODEV;
669	}
670
671	/* The goal for idle time alignment is to achieve package cstate. */
672	if (!has_pkg_state_counter()) {
673		pr_info("No package C-state available\n");
674		return -ENODEV;
675	}
676
677	/* find the deepest mwait value */
678	find_target_mwait();
679
680	return 0;
681}
682
683static int powerclamp_debug_show(struct seq_file *m, void *unused)
684{
685	int i = 0;
686
687	seq_printf(m, "controlling cpu: %d\n", control_cpu);
688	seq_printf(m, "pct confidence steady dynamic (compensation)\n");
689	for (i = 0; i < MAX_TARGET_RATIO; i++) {
690		seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
691			i,
692			cal_data[i].confidence,
693			cal_data[i].steady_comp,
694			cal_data[i].dynamic_comp);
695	}
696
697	return 0;
698}
699
700DEFINE_SHOW_ATTRIBUTE(powerclamp_debug);
701
702static inline void powerclamp_create_debug_files(void)
703{
704	debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
705
706	debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, cal_data,
707			    &powerclamp_debug_fops);
708}
709
710static enum cpuhp_state hp_state;
711
712static int __init powerclamp_init(void)
713{
714	int retval;
715	int bitmap_size;
716
717	bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long);
718	cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL);
719	if (!cpu_clamping_mask)
720		return -ENOMEM;
721
722	/* probe cpu features and ids here */
723	retval = powerclamp_probe();
724	if (retval)
725		goto exit_free;
726
727	/* set default limit, maybe adjusted during runtime based on feedback */
728	window_size = 2;
729	retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
730					   "thermal/intel_powerclamp:online",
731					   powerclamp_cpu_online,
732					   powerclamp_cpu_predown);
733	if (retval < 0)
734		goto exit_free;
735
736	hp_state = retval;
737
738	worker_data = alloc_percpu(struct powerclamp_worker_data);
739	if (!worker_data) {
740		retval = -ENOMEM;
741		goto exit_unregister;
742	}
743
744	if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
745		poll_pkg_cstate_enable = true;
746
747	cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
748						&powerclamp_cooling_ops);
749	if (IS_ERR(cooling_dev)) {
750		retval = -ENODEV;
751		goto exit_free_thread;
752	}
753
754	if (!duration)
755		duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
756
757	powerclamp_create_debug_files();
758
759	return 0;
760
761exit_free_thread:
762	free_percpu(worker_data);
763exit_unregister:
764	cpuhp_remove_state_nocalls(hp_state);
765exit_free:
766	kfree(cpu_clamping_mask);
767	return retval;
768}
769module_init(powerclamp_init);
770
771static void __exit powerclamp_exit(void)
772{
773	end_power_clamp();
774	cpuhp_remove_state_nocalls(hp_state);
775	free_percpu(worker_data);
776	thermal_cooling_device_unregister(cooling_dev);
777	kfree(cpu_clamping_mask);
778
779	cancel_delayed_work_sync(&poll_pkg_cstate_work);
780	debugfs_remove_recursive(debug_dir);
781}
782module_exit(powerclamp_exit);
783
784MODULE_LICENSE("GPL");
785MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
786MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
787MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");
788