--- drivers/cpufreq/cpufreq_governor.c | 80 ++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 35 deletions(-) Index: linux-pm/drivers/cpufreq/cpufreq_governor.c =================================================================== --- linux-pm.orig/drivers/cpufreq/cpufreq_governor.c +++ linux-pm/drivers/cpufreq/cpufreq_governor.c @@ -156,44 +156,54 @@ unsigned int dbs_update(struct cpufreq_p j_cdbs->prev_cpu_nice = cur_nice; } - if (unlikely(!time_elapsed || time_elapsed < idle_time)) - continue; - - /* - * If the CPU had gone completely idle, and a task just woke up - * on this CPU now, it would be unfair to calculate 'load' the - * usual way for this elapsed time-window, because it will show - * near-zero load, irrespective of how CPU intensive that task - * actually is. This is undesirable for latency-sensitive bursty - * workloads. - * - * To avoid this, we reuse the 'load' from the previous - * time-window and give this task a chance to start with a - * reasonably high CPU frequency. (However, we shouldn't over-do - * this copy, lest we get stuck at a high load (high frequency) - * for too long, even when the current system load has actually - * dropped down. So we perform the copy only once, upon the - * first wake-up from idle.) - * - * Detecting this situation is easy: the governor's utilization - * update handler would not have run during CPU-idle periods. - * Hence, an unusually large 'time_elapsed' (as compared to the - * sampling rate) indicates this scenario. - * - * prev_load can be zero in two cases and we must recalculate it - * for both cases: - * - during long idle intervals - * - explicitly set to zero - */ - if (unlikely(time_elapsed > 2 * sampling_rate && - j_cdbs->prev_load)) { + if (unlikely(!time_elapsed)) { + /* + * That can only happen when this function is called + * twice in a row with a very short interval between the + * calls, so the previous load value can be used then. + */ load = j_cdbs->prev_load; - + } else if (time_elapsed < idle_time) { /* - * Perform a destructive copy, to ensure that we copy - * the previous load only once, upon the first wake-up - * from idle. + * That can happen if idle_time is returned by + * get_cpu_idle_time_jiffy(). In that case idle_time is + * roughly equal to the difference between time_elapsed + * and "busy time" obtained from CPU statistics. In + * that case the "busy time" can end up being greater + * than time_elapsed (for example, if jiffies_64 and + * the CPU statistics are updated by different CPUs), + * so idle_time may in fact be negative. That means, + * though, that the CPU was busy all the time (on the + * rough average), 100 can be returned as the load. */ + load = (int)idle_time < 0 ? 100 : 0; + } else if (unlikely(time_elapsed > 2 * sampling_rate && + j_cdbs->prev_load)) { + /* + * If the CPU had gone completely idle and a task has + * just woken up on this CPU now, it would be unfair to + * calculate 'load' the usual way for this elapsed + * time-window, because it would show near-zero load, + * irrespective of how CPU intensive that task actually + * was. This is undesirable for latency-sensitive bursty + * workloads. + * + * To avoid this, reuse the 'load' from the previous + * time-window and give this task a chance to start with + * a reasonably high CPU frequency. However, that + * shouldn't be over-done, lest we get stuck at a high + * load (high frequency) for too long, even when the + * current system load has actually dropped down, so + * clear prev_load to guarantee that the load will be + * computed again next time. + * + * Detecting this situation is easy: the governor's + * utilization update handler would not have run during + * CPU-idle periods. Hence, an unusually large + * 'time_elapsed' (as compared to the sampling rate) + * indicates this scenario. + */ + load = j_cdbs->prev_load; j_cdbs->prev_load = 0; } else { load = 100 * (time_elapsed - idle_time) / time_elapsed;