18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Timer events oriented CPU idle governor 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2018 Intel Corporation 68c2ecf20Sopenharmony_ci * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * The idea of this governor is based on the observation that on many systems 98c2ecf20Sopenharmony_ci * timer events are two or more orders of magnitude more frequent than any 108c2ecf20Sopenharmony_ci * other interrupts, so they are likely to be the most significant source of CPU 118c2ecf20Sopenharmony_ci * wakeups from idle states. Moreover, information about what happened in the 128c2ecf20Sopenharmony_ci * (relatively recent) past can be used to estimate whether or not the deepest 138c2ecf20Sopenharmony_ci * idle state with target residency within the time to the closest timer is 148c2ecf20Sopenharmony_ci * likely to be suitable for the upcoming idle time of the CPU and, if not, then 158c2ecf20Sopenharmony_ci * which of the shallower idle states to choose. 168c2ecf20Sopenharmony_ci * 178c2ecf20Sopenharmony_ci * Of course, non-timer wakeup sources are more important in some use cases and 188c2ecf20Sopenharmony_ci * they can be covered by taking a few most recent idle time intervals of the 198c2ecf20Sopenharmony_ci * CPU into account. However, even in that case it is not necessary to consider 208c2ecf20Sopenharmony_ci * idle duration values greater than the time till the closest timer, as the 218c2ecf20Sopenharmony_ci * patterns that they may belong to produce average values close enough to 228c2ecf20Sopenharmony_ci * the time till the closest timer (sleep length) anyway. 238c2ecf20Sopenharmony_ci * 248c2ecf20Sopenharmony_ci * Thus this governor estimates whether or not the upcoming idle time of the CPU 258c2ecf20Sopenharmony_ci * is likely to be significantly shorter than the sleep length and selects an 268c2ecf20Sopenharmony_ci * idle state for it in accordance with that, as follows: 278c2ecf20Sopenharmony_ci * 288c2ecf20Sopenharmony_ci * - Find an idle state on the basis of the sleep length and state statistics 298c2ecf20Sopenharmony_ci * collected over time: 308c2ecf20Sopenharmony_ci * 318c2ecf20Sopenharmony_ci * o Find the deepest idle state whose target residency is less than or equal 328c2ecf20Sopenharmony_ci * to the sleep length. 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * o Select it if it matched both the sleep length and the observed idle 358c2ecf20Sopenharmony_ci * duration in the past more often than it matched the sleep length alone 368c2ecf20Sopenharmony_ci * (i.e. the observed idle duration was significantly shorter than the sleep 378c2ecf20Sopenharmony_ci * length matched by it). 388c2ecf20Sopenharmony_ci * 398c2ecf20Sopenharmony_ci * o Otherwise, select the shallower state with the greatest matched "early" 408c2ecf20Sopenharmony_ci * wakeups metric. 418c2ecf20Sopenharmony_ci * 428c2ecf20Sopenharmony_ci * - If the majority of the most recent idle duration values are below the 438c2ecf20Sopenharmony_ci * target residency of the idle state selected so far, use those values to 448c2ecf20Sopenharmony_ci * compute the new expected idle duration and find an idle state matching it 458c2ecf20Sopenharmony_ci * (which has to be shallower than the one selected so far). 468c2ecf20Sopenharmony_ci */ 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci#include <linux/cpuidle.h> 498c2ecf20Sopenharmony_ci#include <linux/jiffies.h> 508c2ecf20Sopenharmony_ci#include <linux/kernel.h> 518c2ecf20Sopenharmony_ci#include <linux/sched/clock.h> 528c2ecf20Sopenharmony_ci#include <linux/tick.h> 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci/* 558c2ecf20Sopenharmony_ci * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value 568c2ecf20Sopenharmony_ci * is used for decreasing metrics on a regular basis. 578c2ecf20Sopenharmony_ci */ 588c2ecf20Sopenharmony_ci#define PULSE 1024 598c2ecf20Sopenharmony_ci#define DECAY_SHIFT 3 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci/* 628c2ecf20Sopenharmony_ci * Number of the most recent idle duration values to take into consideration for 638c2ecf20Sopenharmony_ci * the detection of wakeup patterns. 648c2ecf20Sopenharmony_ci */ 658c2ecf20Sopenharmony_ci#define INTERVALS 8 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci/** 688c2ecf20Sopenharmony_ci * struct teo_idle_state - Idle state data used by the TEO cpuidle governor. 698c2ecf20Sopenharmony_ci * @early_hits: "Early" CPU wakeups "matching" this state. 708c2ecf20Sopenharmony_ci * @hits: "On time" CPU wakeups "matching" this state. 718c2ecf20Sopenharmony_ci * @misses: CPU wakeups "missing" this state. 728c2ecf20Sopenharmony_ci * 738c2ecf20Sopenharmony_ci * A CPU wakeup is "matched" by a given idle state if the idle duration measured 748c2ecf20Sopenharmony_ci * after the wakeup is between the target residency of that state and the target 758c2ecf20Sopenharmony_ci * residency of the next one (or if this is the deepest available idle state, it 768c2ecf20Sopenharmony_ci * "matches" a CPU wakeup when the measured idle duration is at least equal to 778c2ecf20Sopenharmony_ci * its target residency). 788c2ecf20Sopenharmony_ci * 798c2ecf20Sopenharmony_ci * Also, from the TEO governor perspective, a CPU wakeup from idle is "early" if 808c2ecf20Sopenharmony_ci * it occurs significantly earlier than the closest expected timer event (that 818c2ecf20Sopenharmony_ci * is, early enough to match an idle state shallower than the one matching the 828c2ecf20Sopenharmony_ci * time till the closest timer event). Otherwise, the wakeup is "on time", or 838c2ecf20Sopenharmony_ci * it is a "hit". 848c2ecf20Sopenharmony_ci * 858c2ecf20Sopenharmony_ci * A "miss" occurs when the given state doesn't match the wakeup, but it matches 868c2ecf20Sopenharmony_ci * the time till the closest timer event used for idle state selection. 878c2ecf20Sopenharmony_ci */ 888c2ecf20Sopenharmony_cistruct teo_idle_state { 898c2ecf20Sopenharmony_ci unsigned int early_hits; 908c2ecf20Sopenharmony_ci unsigned int hits; 918c2ecf20Sopenharmony_ci unsigned int misses; 928c2ecf20Sopenharmony_ci}; 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci/** 958c2ecf20Sopenharmony_ci * struct teo_cpu - CPU data used by the TEO cpuidle governor. 968c2ecf20Sopenharmony_ci * @time_span_ns: Time between idle state selection and post-wakeup update. 978c2ecf20Sopenharmony_ci * @sleep_length_ns: Time till the closest timer event (at the selection time). 988c2ecf20Sopenharmony_ci * @states: Idle states data corresponding to this CPU. 998c2ecf20Sopenharmony_ci * @interval_idx: Index of the most recent saved idle interval. 1008c2ecf20Sopenharmony_ci * @intervals: Saved idle duration values. 1018c2ecf20Sopenharmony_ci */ 1028c2ecf20Sopenharmony_cistruct teo_cpu { 1038c2ecf20Sopenharmony_ci u64 time_span_ns; 1048c2ecf20Sopenharmony_ci u64 sleep_length_ns; 1058c2ecf20Sopenharmony_ci struct teo_idle_state states[CPUIDLE_STATE_MAX]; 1068c2ecf20Sopenharmony_ci int interval_idx; 1078c2ecf20Sopenharmony_ci u64 intervals[INTERVALS]; 1088c2ecf20Sopenharmony_ci}; 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct teo_cpu, teo_cpus); 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci/** 1138c2ecf20Sopenharmony_ci * teo_update - Update CPU data after wakeup. 1148c2ecf20Sopenharmony_ci * @drv: cpuidle driver containing state data. 1158c2ecf20Sopenharmony_ci * @dev: Target CPU. 1168c2ecf20Sopenharmony_ci */ 1178c2ecf20Sopenharmony_cistatic void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) 1188c2ecf20Sopenharmony_ci{ 1198c2ecf20Sopenharmony_ci struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); 1208c2ecf20Sopenharmony_ci int i, idx_hit = -1, idx_timer = -1; 1218c2ecf20Sopenharmony_ci u64 measured_ns; 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { 1248c2ecf20Sopenharmony_ci /* 1258c2ecf20Sopenharmony_ci * One of the safety nets has triggered or the wakeup was close 1268c2ecf20Sopenharmony_ci * enough to the closest timer event expected at the idle state 1278c2ecf20Sopenharmony_ci * selection time to be discarded. 1288c2ecf20Sopenharmony_ci */ 1298c2ecf20Sopenharmony_ci measured_ns = U64_MAX; 1308c2ecf20Sopenharmony_ci } else { 1318c2ecf20Sopenharmony_ci u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns; 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci /* 1348c2ecf20Sopenharmony_ci * The computations below are to determine whether or not the 1358c2ecf20Sopenharmony_ci * (saved) time till the next timer event and the measured idle 1368c2ecf20Sopenharmony_ci * duration fall into the same "bin", so use last_residency_ns 1378c2ecf20Sopenharmony_ci * for that instead of time_span_ns which includes the cpuidle 1388c2ecf20Sopenharmony_ci * overhead. 1398c2ecf20Sopenharmony_ci */ 1408c2ecf20Sopenharmony_ci measured_ns = dev->last_residency_ns; 1418c2ecf20Sopenharmony_ci /* 1428c2ecf20Sopenharmony_ci * The delay between the wakeup and the first instruction 1438c2ecf20Sopenharmony_ci * executed by the CPU is not likely to be worst-case every 1448c2ecf20Sopenharmony_ci * time, so take 1/2 of the exit latency as a very rough 1458c2ecf20Sopenharmony_ci * approximation of the average of it. 1468c2ecf20Sopenharmony_ci */ 1478c2ecf20Sopenharmony_ci if (measured_ns >= lat_ns) 1488c2ecf20Sopenharmony_ci measured_ns -= lat_ns / 2; 1498c2ecf20Sopenharmony_ci else 1508c2ecf20Sopenharmony_ci measured_ns /= 2; 1518c2ecf20Sopenharmony_ci } 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci /* 1548c2ecf20Sopenharmony_ci * Decay the "early hits" metric for all of the states and find the 1558c2ecf20Sopenharmony_ci * states matching the sleep length and the measured idle duration. 1568c2ecf20Sopenharmony_ci */ 1578c2ecf20Sopenharmony_ci for (i = 0; i < drv->state_count; i++) { 1588c2ecf20Sopenharmony_ci unsigned int early_hits = cpu_data->states[i].early_hits; 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT; 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci if (drv->states[i].target_residency_ns <= cpu_data->sleep_length_ns) { 1638c2ecf20Sopenharmony_ci idx_timer = i; 1648c2ecf20Sopenharmony_ci if (drv->states[i].target_residency_ns <= measured_ns) 1658c2ecf20Sopenharmony_ci idx_hit = i; 1668c2ecf20Sopenharmony_ci } 1678c2ecf20Sopenharmony_ci } 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci /* 1708c2ecf20Sopenharmony_ci * Update the "hits" and "misses" data for the state matching the sleep 1718c2ecf20Sopenharmony_ci * length. If it matches the measured idle duration too, this is a hit, 1728c2ecf20Sopenharmony_ci * so increase the "hits" metric for it then. Otherwise, this is a 1738c2ecf20Sopenharmony_ci * miss, so increase the "misses" metric for it. In the latter case 1748c2ecf20Sopenharmony_ci * also increase the "early hits" metric for the state that actually 1758c2ecf20Sopenharmony_ci * matches the measured idle duration. 1768c2ecf20Sopenharmony_ci */ 1778c2ecf20Sopenharmony_ci if (idx_timer >= 0) { 1788c2ecf20Sopenharmony_ci unsigned int hits = cpu_data->states[idx_timer].hits; 1798c2ecf20Sopenharmony_ci unsigned int misses = cpu_data->states[idx_timer].misses; 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci hits -= hits >> DECAY_SHIFT; 1828c2ecf20Sopenharmony_ci misses -= misses >> DECAY_SHIFT; 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci if (idx_timer > idx_hit) { 1858c2ecf20Sopenharmony_ci misses += PULSE; 1868c2ecf20Sopenharmony_ci if (idx_hit >= 0) 1878c2ecf20Sopenharmony_ci cpu_data->states[idx_hit].early_hits += PULSE; 1888c2ecf20Sopenharmony_ci } else { 1898c2ecf20Sopenharmony_ci hits += PULSE; 1908c2ecf20Sopenharmony_ci } 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci cpu_data->states[idx_timer].misses = misses; 1938c2ecf20Sopenharmony_ci cpu_data->states[idx_timer].hits = hits; 1948c2ecf20Sopenharmony_ci } 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_ci /* 1978c2ecf20Sopenharmony_ci * Save idle duration values corresponding to non-timer wakeups for 1988c2ecf20Sopenharmony_ci * pattern detection. 1998c2ecf20Sopenharmony_ci */ 2008c2ecf20Sopenharmony_ci cpu_data->intervals[cpu_data->interval_idx++] = measured_ns; 2018c2ecf20Sopenharmony_ci if (cpu_data->interval_idx >= INTERVALS) 2028c2ecf20Sopenharmony_ci cpu_data->interval_idx = 0; 2038c2ecf20Sopenharmony_ci} 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_cistatic bool teo_time_ok(u64 interval_ns) 2068c2ecf20Sopenharmony_ci{ 2078c2ecf20Sopenharmony_ci return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC; 2088c2ecf20Sopenharmony_ci} 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci/** 2118c2ecf20Sopenharmony_ci * teo_find_shallower_state - Find shallower idle state matching given duration. 2128c2ecf20Sopenharmony_ci * @drv: cpuidle driver containing state data. 2138c2ecf20Sopenharmony_ci * @dev: Target CPU. 2148c2ecf20Sopenharmony_ci * @state_idx: Index of the capping idle state. 2158c2ecf20Sopenharmony_ci * @duration_ns: Idle duration value to match. 2168c2ecf20Sopenharmony_ci */ 2178c2ecf20Sopenharmony_cistatic int teo_find_shallower_state(struct cpuidle_driver *drv, 2188c2ecf20Sopenharmony_ci struct cpuidle_device *dev, int state_idx, 2198c2ecf20Sopenharmony_ci u64 duration_ns) 2208c2ecf20Sopenharmony_ci{ 2218c2ecf20Sopenharmony_ci int i; 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci for (i = state_idx - 1; i >= 0; i--) { 2248c2ecf20Sopenharmony_ci if (dev->states_usage[i].disable) 2258c2ecf20Sopenharmony_ci continue; 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci state_idx = i; 2288c2ecf20Sopenharmony_ci if (drv->states[i].target_residency_ns <= duration_ns) 2298c2ecf20Sopenharmony_ci break; 2308c2ecf20Sopenharmony_ci } 2318c2ecf20Sopenharmony_ci return state_idx; 2328c2ecf20Sopenharmony_ci} 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci/** 2358c2ecf20Sopenharmony_ci * teo_select - Selects the next idle state to enter. 2368c2ecf20Sopenharmony_ci * @drv: cpuidle driver containing state data. 2378c2ecf20Sopenharmony_ci * @dev: Target CPU. 2388c2ecf20Sopenharmony_ci * @stop_tick: Indication on whether or not to stop the scheduler tick. 2398c2ecf20Sopenharmony_ci */ 2408c2ecf20Sopenharmony_cistatic int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, 2418c2ecf20Sopenharmony_ci bool *stop_tick) 2428c2ecf20Sopenharmony_ci{ 2438c2ecf20Sopenharmony_ci struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); 2448c2ecf20Sopenharmony_ci s64 latency_req = cpuidle_governor_latency_req(dev->cpu); 2458c2ecf20Sopenharmony_ci u64 duration_ns; 2468c2ecf20Sopenharmony_ci unsigned int hits, misses, early_hits; 2478c2ecf20Sopenharmony_ci int max_early_idx, prev_max_early_idx, constraint_idx, idx, i; 2488c2ecf20Sopenharmony_ci ktime_t delta_tick; 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci if (dev->last_state_idx >= 0) { 2518c2ecf20Sopenharmony_ci teo_update(drv, dev); 2528c2ecf20Sopenharmony_ci dev->last_state_idx = -1; 2538c2ecf20Sopenharmony_ci } 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci cpu_data->time_span_ns = local_clock(); 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci duration_ns = tick_nohz_get_sleep_length(&delta_tick); 2588c2ecf20Sopenharmony_ci cpu_data->sleep_length_ns = duration_ns; 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci hits = 0; 2618c2ecf20Sopenharmony_ci misses = 0; 2628c2ecf20Sopenharmony_ci early_hits = 0; 2638c2ecf20Sopenharmony_ci max_early_idx = -1; 2648c2ecf20Sopenharmony_ci prev_max_early_idx = -1; 2658c2ecf20Sopenharmony_ci constraint_idx = drv->state_count; 2668c2ecf20Sopenharmony_ci idx = -1; 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci for (i = 0; i < drv->state_count; i++) { 2698c2ecf20Sopenharmony_ci struct cpuidle_state *s = &drv->states[i]; 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci if (dev->states_usage[i].disable) { 2728c2ecf20Sopenharmony_ci /* 2738c2ecf20Sopenharmony_ci * Ignore disabled states with target residencies beyond 2748c2ecf20Sopenharmony_ci * the anticipated idle duration. 2758c2ecf20Sopenharmony_ci */ 2768c2ecf20Sopenharmony_ci if (s->target_residency_ns > duration_ns) 2778c2ecf20Sopenharmony_ci continue; 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci /* 2808c2ecf20Sopenharmony_ci * This state is disabled, so the range of idle duration 2818c2ecf20Sopenharmony_ci * values corresponding to it is covered by the current 2828c2ecf20Sopenharmony_ci * candidate state, but still the "hits" and "misses" 2838c2ecf20Sopenharmony_ci * metrics of the disabled state need to be used to 2848c2ecf20Sopenharmony_ci * decide whether or not the state covering the range in 2858c2ecf20Sopenharmony_ci * question is good enough. 2868c2ecf20Sopenharmony_ci */ 2878c2ecf20Sopenharmony_ci hits = cpu_data->states[i].hits; 2888c2ecf20Sopenharmony_ci misses = cpu_data->states[i].misses; 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci if (early_hits >= cpu_data->states[i].early_hits || 2918c2ecf20Sopenharmony_ci idx < 0) 2928c2ecf20Sopenharmony_ci continue; 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci /* 2958c2ecf20Sopenharmony_ci * If the current candidate state has been the one with 2968c2ecf20Sopenharmony_ci * the maximum "early hits" metric so far, the "early 2978c2ecf20Sopenharmony_ci * hits" metric of the disabled state replaces the 2988c2ecf20Sopenharmony_ci * current "early hits" count to avoid selecting a 2998c2ecf20Sopenharmony_ci * deeper state with lower "early hits" metric. 3008c2ecf20Sopenharmony_ci */ 3018c2ecf20Sopenharmony_ci if (max_early_idx == idx) { 3028c2ecf20Sopenharmony_ci early_hits = cpu_data->states[i].early_hits; 3038c2ecf20Sopenharmony_ci continue; 3048c2ecf20Sopenharmony_ci } 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_ci /* 3078c2ecf20Sopenharmony_ci * The current candidate state is closer to the disabled 3088c2ecf20Sopenharmony_ci * one than the current maximum "early hits" state, so 3098c2ecf20Sopenharmony_ci * replace the latter with it, but in case the maximum 3108c2ecf20Sopenharmony_ci * "early hits" state index has not been set so far, 3118c2ecf20Sopenharmony_ci * check if the current candidate state is not too 3128c2ecf20Sopenharmony_ci * shallow for that role. 3138c2ecf20Sopenharmony_ci */ 3148c2ecf20Sopenharmony_ci if (teo_time_ok(drv->states[idx].target_residency_ns)) { 3158c2ecf20Sopenharmony_ci prev_max_early_idx = max_early_idx; 3168c2ecf20Sopenharmony_ci early_hits = cpu_data->states[i].early_hits; 3178c2ecf20Sopenharmony_ci max_early_idx = idx; 3188c2ecf20Sopenharmony_ci } 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci continue; 3218c2ecf20Sopenharmony_ci } 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_ci if (idx < 0) { 3248c2ecf20Sopenharmony_ci idx = i; /* first enabled state */ 3258c2ecf20Sopenharmony_ci hits = cpu_data->states[i].hits; 3268c2ecf20Sopenharmony_ci misses = cpu_data->states[i].misses; 3278c2ecf20Sopenharmony_ci } 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci if (s->target_residency_ns > duration_ns) 3308c2ecf20Sopenharmony_ci break; 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci if (s->exit_latency_ns > latency_req && constraint_idx > i) 3338c2ecf20Sopenharmony_ci constraint_idx = i; 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci idx = i; 3368c2ecf20Sopenharmony_ci hits = cpu_data->states[i].hits; 3378c2ecf20Sopenharmony_ci misses = cpu_data->states[i].misses; 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci if (early_hits < cpu_data->states[i].early_hits && 3408c2ecf20Sopenharmony_ci teo_time_ok(drv->states[i].target_residency_ns)) { 3418c2ecf20Sopenharmony_ci prev_max_early_idx = max_early_idx; 3428c2ecf20Sopenharmony_ci early_hits = cpu_data->states[i].early_hits; 3438c2ecf20Sopenharmony_ci max_early_idx = i; 3448c2ecf20Sopenharmony_ci } 3458c2ecf20Sopenharmony_ci } 3468c2ecf20Sopenharmony_ci 3478c2ecf20Sopenharmony_ci /* 3488c2ecf20Sopenharmony_ci * If the "hits" metric of the idle state matching the sleep length is 3498c2ecf20Sopenharmony_ci * greater than its "misses" metric, that is the one to use. Otherwise, 3508c2ecf20Sopenharmony_ci * it is more likely that one of the shallower states will match the 3518c2ecf20Sopenharmony_ci * idle duration observed after wakeup, so take the one with the maximum 3528c2ecf20Sopenharmony_ci * "early hits" metric, but if that cannot be determined, just use the 3538c2ecf20Sopenharmony_ci * state selected so far. 3548c2ecf20Sopenharmony_ci */ 3558c2ecf20Sopenharmony_ci if (hits <= misses) { 3568c2ecf20Sopenharmony_ci /* 3578c2ecf20Sopenharmony_ci * The current candidate state is not suitable, so take the one 3588c2ecf20Sopenharmony_ci * whose "early hits" metric is the maximum for the range of 3598c2ecf20Sopenharmony_ci * shallower states. 3608c2ecf20Sopenharmony_ci */ 3618c2ecf20Sopenharmony_ci if (idx == max_early_idx) 3628c2ecf20Sopenharmony_ci max_early_idx = prev_max_early_idx; 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci if (max_early_idx >= 0) { 3658c2ecf20Sopenharmony_ci idx = max_early_idx; 3668c2ecf20Sopenharmony_ci duration_ns = drv->states[idx].target_residency_ns; 3678c2ecf20Sopenharmony_ci } 3688c2ecf20Sopenharmony_ci } 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ci /* 3718c2ecf20Sopenharmony_ci * If there is a latency constraint, it may be necessary to use a 3728c2ecf20Sopenharmony_ci * shallower idle state than the one selected so far. 3738c2ecf20Sopenharmony_ci */ 3748c2ecf20Sopenharmony_ci if (constraint_idx < idx) 3758c2ecf20Sopenharmony_ci idx = constraint_idx; 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci if (idx < 0) { 3788c2ecf20Sopenharmony_ci idx = 0; /* No states enabled. Must use 0. */ 3798c2ecf20Sopenharmony_ci } else if (idx > 0) { 3808c2ecf20Sopenharmony_ci unsigned int count = 0; 3818c2ecf20Sopenharmony_ci u64 sum = 0; 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci /* 3848c2ecf20Sopenharmony_ci * Count and sum the most recent idle duration values less than 3858c2ecf20Sopenharmony_ci * the current expected idle duration value. 3868c2ecf20Sopenharmony_ci */ 3878c2ecf20Sopenharmony_ci for (i = 0; i < INTERVALS; i++) { 3888c2ecf20Sopenharmony_ci u64 val = cpu_data->intervals[i]; 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci if (val >= duration_ns) 3918c2ecf20Sopenharmony_ci continue; 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_ci count++; 3948c2ecf20Sopenharmony_ci sum += val; 3958c2ecf20Sopenharmony_ci } 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_ci /* 3988c2ecf20Sopenharmony_ci * Give up unless the majority of the most recent idle duration 3998c2ecf20Sopenharmony_ci * values are in the interesting range. 4008c2ecf20Sopenharmony_ci */ 4018c2ecf20Sopenharmony_ci if (count > INTERVALS / 2) { 4028c2ecf20Sopenharmony_ci u64 avg_ns = div64_u64(sum, count); 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci /* 4058c2ecf20Sopenharmony_ci * Avoid spending too much time in an idle state that 4068c2ecf20Sopenharmony_ci * would be too shallow. 4078c2ecf20Sopenharmony_ci */ 4088c2ecf20Sopenharmony_ci if (teo_time_ok(avg_ns)) { 4098c2ecf20Sopenharmony_ci duration_ns = avg_ns; 4108c2ecf20Sopenharmony_ci if (drv->states[idx].target_residency_ns > avg_ns) 4118c2ecf20Sopenharmony_ci idx = teo_find_shallower_state(drv, dev, 4128c2ecf20Sopenharmony_ci idx, avg_ns); 4138c2ecf20Sopenharmony_ci } 4148c2ecf20Sopenharmony_ci } 4158c2ecf20Sopenharmony_ci } 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_ci /* 4188c2ecf20Sopenharmony_ci * Don't stop the tick if the selected state is a polling one or if the 4198c2ecf20Sopenharmony_ci * expected idle duration is shorter than the tick period length. 4208c2ecf20Sopenharmony_ci */ 4218c2ecf20Sopenharmony_ci if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || 4228c2ecf20Sopenharmony_ci duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) { 4238c2ecf20Sopenharmony_ci *stop_tick = false; 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci /* 4268c2ecf20Sopenharmony_ci * The tick is not going to be stopped, so if the target 4278c2ecf20Sopenharmony_ci * residency of the state to be returned is not within the time 4288c2ecf20Sopenharmony_ci * till the closest timer including the tick, try to correct 4298c2ecf20Sopenharmony_ci * that. 4308c2ecf20Sopenharmony_ci */ 4318c2ecf20Sopenharmony_ci if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick) 4328c2ecf20Sopenharmony_ci idx = teo_find_shallower_state(drv, dev, idx, delta_tick); 4338c2ecf20Sopenharmony_ci } 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci return idx; 4368c2ecf20Sopenharmony_ci} 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_ci/** 4398c2ecf20Sopenharmony_ci * teo_reflect - Note that governor data for the CPU need to be updated. 4408c2ecf20Sopenharmony_ci * @dev: Target CPU. 4418c2ecf20Sopenharmony_ci * @state: Entered state. 4428c2ecf20Sopenharmony_ci */ 4438c2ecf20Sopenharmony_cistatic void teo_reflect(struct cpuidle_device *dev, int state) 4448c2ecf20Sopenharmony_ci{ 4458c2ecf20Sopenharmony_ci struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); 4468c2ecf20Sopenharmony_ci 4478c2ecf20Sopenharmony_ci dev->last_state_idx = state; 4488c2ecf20Sopenharmony_ci /* 4498c2ecf20Sopenharmony_ci * If the wakeup was not "natural", but triggered by one of the safety 4508c2ecf20Sopenharmony_ci * nets, assume that the CPU might have been idle for the entire sleep 4518c2ecf20Sopenharmony_ci * length time. 4528c2ecf20Sopenharmony_ci */ 4538c2ecf20Sopenharmony_ci if (dev->poll_time_limit || 4548c2ecf20Sopenharmony_ci (tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) { 4558c2ecf20Sopenharmony_ci dev->poll_time_limit = false; 4568c2ecf20Sopenharmony_ci cpu_data->time_span_ns = cpu_data->sleep_length_ns; 4578c2ecf20Sopenharmony_ci } else { 4588c2ecf20Sopenharmony_ci cpu_data->time_span_ns = local_clock() - cpu_data->time_span_ns; 4598c2ecf20Sopenharmony_ci } 4608c2ecf20Sopenharmony_ci} 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci/** 4638c2ecf20Sopenharmony_ci * teo_enable_device - Initialize the governor's data for the target CPU. 4648c2ecf20Sopenharmony_ci * @drv: cpuidle driver (not used). 4658c2ecf20Sopenharmony_ci * @dev: Target CPU. 4668c2ecf20Sopenharmony_ci */ 4678c2ecf20Sopenharmony_cistatic int teo_enable_device(struct cpuidle_driver *drv, 4688c2ecf20Sopenharmony_ci struct cpuidle_device *dev) 4698c2ecf20Sopenharmony_ci{ 4708c2ecf20Sopenharmony_ci struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); 4718c2ecf20Sopenharmony_ci int i; 4728c2ecf20Sopenharmony_ci 4738c2ecf20Sopenharmony_ci memset(cpu_data, 0, sizeof(*cpu_data)); 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci for (i = 0; i < INTERVALS; i++) 4768c2ecf20Sopenharmony_ci cpu_data->intervals[i] = U64_MAX; 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_ci return 0; 4798c2ecf20Sopenharmony_ci} 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_cistatic struct cpuidle_governor teo_governor = { 4828c2ecf20Sopenharmony_ci .name = "teo", 4838c2ecf20Sopenharmony_ci .rating = 19, 4848c2ecf20Sopenharmony_ci .enable = teo_enable_device, 4858c2ecf20Sopenharmony_ci .select = teo_select, 4868c2ecf20Sopenharmony_ci .reflect = teo_reflect, 4878c2ecf20Sopenharmony_ci}; 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_cistatic int __init teo_governor_init(void) 4908c2ecf20Sopenharmony_ci{ 4918c2ecf20Sopenharmony_ci return cpuidle_register_governor(&teo_governor); 4928c2ecf20Sopenharmony_ci} 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_cipostcore_initcall(teo_governor_init); 495