Bug 57411

Summary: 3.9-linux-next-20130501: OOPS in intel_pstate_sample
Product: Power Management Reporter: Martin Mokrejs (mmokrejs)
Component: intel_idleAssignee: Len Brown (lenb)
Status: CLOSED CODE_FIX    
Severity: normal CC: dirk.brandewie, rjw
Priority: P1    
Hardware: All   
OS: Linux   
Kernel Version: 3.9 Subsystem:
Regression: Yes Bisected commit-id:
Attachments: camera picture of the stacktrace
.config

Description Martin Mokrejs 2013-05-01 20:59:49 UTC
Created attachment 100391 [details]
camera picture of the stacktrace

This is maybe a dupe of bug #57401 but happened on linux-next-20130501 after I did "dmesg | less".

? pid_param_set
intel_pstate_timer_func
call_timer_fn
? __internal_add_timer
? pid_param_set
run_timer_softirq
__do_softirq
irq_exit
smp_apic_timer_interrupt
apic_timer_interrupt
? sysret_check
Comment 1 Martin Mokrejs 2013-05-02 08:53:07 UTC
Created attachment 100431 [details]
.config
Comment 2 Dirk Brandewie 2013-05-13 14:24:18 UTC
The patch below fixes this issue it is in Rafael's next branch and has been sent to stable.

Author: Dirk Brandewie <dirk.j.brandewie@intel.com>  2013-05-07 08:20:25
Committer: Rafael J. Wysocki <rafael.j.wysocki@intel.com>  2013-05-12 05:04:16
Parent: 885f925eef411f549f17bc64dd054a3269cf66cd (Merge branch 'pm-cpufreq')
Child:  d8f469e9cff3bc4a6317d923e9506be046aa7bdc (cpufreq / intel_pstate: use lowest requested max performance)
Branches: remotes/linux-pm/bleeding-edge, remotes/linux-pm/linux-next
Follows: v3.10-rc1
Precedes: 

    cpufreq / intel_pstate: remove idle time and duration from sample and calculations
    
    Idle time is taken into account in the APERF/MPERF ratio calculation
    there is no reason for the driver to track it seperately.  This
    reduces the work in the driver and makes the code more readable.
    
    Removal of the tracking of sample duration removes the possibility of
    the divide by zero exception when the duration is sub 1us
    
    References: https://bugzilla.kernel.org/show_bug.cgi?id=56691
    Reported-by: Mike Lothian <mike@fireburn.co.uk>
    Cc: 3.9+ <stable@vger.kernel.org>
    Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
    Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

------------------------ drivers/cpufreq/intel_pstate.c ------------------------
index cc3a8e6..c6e10d0 100644
@@ -48,12 +48,7 @@ static inline int32_t div_fp(int32_t x, int32_t y)
 }
 
 struct sample {
-	ktime_t start_time;
-	ktime_t end_time;
 	int core_pct_busy;
-	int pstate_pct_busy;
-	u64 duration_us;
-	u64 idletime_us;
 	u64 aperf;
 	u64 mperf;
 	int freq;
@@ -91,8 +86,6 @@ struct cpudata {
 	int min_pstate_count;
 	int idle_mode;
 
-	ktime_t prev_sample;
-	u64	prev_idle_time_us;
 	u64	prev_aperf;
 	u64	prev_mperf;
 	int	sample_ptr;
@@ -450,48 +443,26 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu,
 					struct sample *sample)
 {
 	u64 core_pct;
-	sample->pstate_pct_busy = 100 - div64_u64(
-					sample->idletime_us * 100,
-					sample->duration_us);
 	core_pct = div64_u64(sample->aperf * 100, sample->mperf);
 	sample->freq = cpu->pstate.max_pstate * core_pct * 1000;
 
-	sample->core_pct_busy = div_s64((sample->pstate_pct_busy * core_pct),
-					100);
+	sample->core_pct_busy = core_pct;
 }
 
 static inline void intel_pstate_sample(struct cpudata *cpu)
 {
-	ktime_t now;
-	u64 idle_time_us;
 	u64 aperf, mperf;
 
-	now = ktime_get();
-	idle_time_us = get_cpu_idle_time_us(cpu->cpu, NULL);
-
 	rdmsrl(MSR_IA32_APERF, aperf);
 	rdmsrl(MSR_IA32_MPERF, mperf);
-	/* for the first sample, don't actually record a sample, just
-	 * set the baseline */
-	if (cpu->prev_idle_time_us > 0) {
-		cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
-		cpu->samples[cpu->sample_ptr].start_time = cpu->prev_sample;
-		cpu->samples[cpu->sample_ptr].end_time = now;
-		cpu->samples[cpu->sample_ptr].duration_us =
-			ktime_us_delta(now, cpu->prev_sample);
-		cpu->samples[cpu->sample_ptr].idletime_us =
-			idle_time_us - cpu->prev_idle_time_us;
-
-		cpu->samples[cpu->sample_ptr].aperf = aperf;
-		cpu->samples[cpu->sample_ptr].mperf = mperf;
-		cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf;
-		cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf;
-
-		intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]);
-	}
+	cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
+	cpu->samples[cpu->sample_ptr].aperf = aperf;
+	cpu->samples[cpu->sample_ptr].mperf = mperf;
+	cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf;
+	cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf;
+
+	intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]);
 
-	cpu->prev_sample = now;
-	cpu->prev_idle_time_us = idle_time_us;
 	cpu->prev_aperf = aperf;
 	cpu->prev_mperf = mperf;
 }
Comment 3 Martin Mokrejs 2013-06-06 23:14:28 UTC
Hi Dirk, I somehow did not get the email about the patch. Now with 3.10-rc4 I don't see the issue. Thank you.
Comment 4 Rafael J. Wysocki 2013-08-10 13:05:55 UTC
Fixed by commit 1abc4b2 (cpufreq / intel_pstate: remove idle time and duration from sample and calculations).