Bug 215458
Summary: | tick/sched: iowait_sleeptime in tick_cpu_sched structure is a negative value | ||
---|---|---|---|
Product: | Timers | Reporter: | Liguang (zhangliguang) |
Component: | Other | Assignee: | john stultz (john.stultz) |
Status: | NEW --- | ||
Severity: | normal | ||
Priority: | P1 | ||
Hardware: | All | ||
OS: | Linux | ||
Kernel Version: | 5.10.23 | Subsystem: | |
Regression: | No | Bisected commit-id: |
Description
Liguang
2022-01-06 08:19:12 UTC
/* watchdog detector functions */ bool is_hardlockup(void) { unsigned long hrint = __this_cpu_read(hrtimer_interrupts); if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) return true; __this_cpu_write(hrtimer_interrupts_saved, hrint); return false; } ----------------- crash> p hrtimer_interrupts:126 per_cpu(hrtimer_interrupts, 126) = $2 = 1524 crash> p hrtimer_interrupts_saved:126 per_cpu(hrtimer_interrupts_saved, 126) = $3 = 1524 crash> watchdog_thresh watchdog_thresh = $4 = 10 10s period, not arch timer interrupt occured. crash> sys config | grep HZ CONFIG_NO_HZ_COMMON=y # CONFIG_HZ_PERIODIC is not set # CONFIG_NO_HZ_IDLE is not set CONFIG_NO_HZ_FULL=y CONFIG_NO_HZ=y # CONFIG_HZ_100 is not set CONFIG_HZ_250=y # CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=250 nohz=off is configured,theoretically,every 4ms an timer interrupt occured. grub info: BOOT_IMAGE=/vmlinuz-5.10.23-007.ali5000.alios7.aarch64 root=/dev/vda3 ro crashkernel=512M vconsole.font=latarcyrheb-sun16 vconsole.keymap=us biosdevname=0 console=tty0 scsi_mod.scan=sync pci=pcie_bus_perf nohz=off console=ttyS0,115200 virtio_ring.vring_force_dma_api=1 module_blacklist=ast,hns3 ast.ast_modeset=0 cryptomgr.notests cma_pernuma=64M mpam=acpi console=ttyAMA0,115200 initcall_debug pci=earlydump earlycon=pl011,0x2A400000,115200 module_blacklist=acc crash> p watchdog_hrtimer:126 per_cpu(watchdog_hrtimer, 126) = $5 = { node = { node = { __rb_parent_color = 18446603337777921265, rb_right = 0x0, rb_left = 0x0 }, expires = 6110016166520 }, _softexpires = 6110016166520, function = 0xffff80001017aa50 <watchdog_timer_fn>, base = 0xffff04473bbcc780, state = 1 '\001', is_rel = 0 '\000', is_soft = 0 '\000', is_hard = 1 '\001' } crash> p tick_cpu_sched:126 per_cpu(tick_cpu_sched, 126) = $1 = { sched_timer = { node = { node = { __rb_parent_color = 18446603337117384112, rb_right = 0x0, rb_left = 0x0 }, expires = 6108564000000 }, _softexpires = 6108564000000, function = 0xffff800010122ec0 <tick_sched_timer>, base = 0xffff04473bbcc780, state = 1 '\001', is_rel = 0 '\000', is_soft = 0 '\000', is_hard = 1 '\001' }, check_clocks = 0, nohz_mode = NOHZ_MODE_INACTIVE, inidle = 1, tick_stopped = 0, idle_active = 1, do_timer_last = 0, got_idle_tick = 1, last_tick = 0, next_tick = 0, idle_jiffies = 0, idle_calls = 0, idle_sleeps = 0, idle_entrytime = 5012087709249, idle_waketime = 0, idle_exittime = 0, idle_sleeptime = 4936136669951, iowait_sleeptime = -1942739704, last_jiffies = 0, timer_expires = 0, timer_expires_base = 0, next_timer = 0, idle_expires = 0, tick_dep_mask = { counter = 0 } } (In reply to Liguang from comment #3) > crash> p tick_cpu_sched:126 > per_cpu(tick_cpu_sched, 126) = $1 = { > sched_timer = { > node = { > node = { > __rb_parent_color = 18446603337117384112, > rb_right = 0x0, > rb_left = 0x0 > }, > expires = 6108564000000 > }, > _softexpires = 6108564000000, > function = 0xffff800010122ec0 <tick_sched_timer>, > base = 0xffff04473bbcc780, > state = 1 '\001', > is_rel = 0 '\000', > is_soft = 0 '\000', > is_hard = 1 '\001' > }, > check_clocks = 0, > nohz_mode = NOHZ_MODE_INACTIVE, > inidle = 1, > tick_stopped = 0, > idle_active = 1, > do_timer_last = 0, > got_idle_tick = 1, > last_tick = 0, > next_tick = 0, > idle_jiffies = 0, > idle_calls = 0, > idle_sleeps = 0, > idle_entrytime = 5012087709249, > idle_waketime = 0, > idle_exittime = 0, > idle_sleeptime = 4936136669951, > iowait_sleeptime = -1942739704, > last_jiffies = 0, > timer_expires = 0, > timer_expires_base = 0, > next_timer = 0, > idle_expires = 0, > tick_dep_mask = { > counter = 0 > } > } iowait_sleeptime = -1942739704 why iowait_sleeptime is a negative value? nohz=off was configured in the startup parameters crash> tick_nohz_enabled tick_nohz_enabled = $1 = false crash> tick_nohz_active tick_nohz_active = $2 = 0 and current no cpufreq driver was used: crash> cpufreq_driver cpufreq_driver = $4 = (struct cpufreq_driver *) 0x0 my machine is 128 core, three cores have the same problem: crash> p tick_cpu_sched:a | grep iowait_sleeptime | grep "\-" iowait_sleeptime = -422390319, iowait_sleeptime = -365884628719, iowait_sleeptime = -1942739704, (In reply to Liguang from comment #6) > my machine is 128 core, three cores have the same problem: > crash> p tick_cpu_sched:a | grep iowait_sleeptime | grep "\-" > iowait_sleeptime = -422390319, > iowait_sleeptime = -365884628719, > iowait_sleeptime = -1942739704, crash> p tick_cpu_sched:59 per_cpu(tick_cpu_sched, 59) = $331 = { sched_timer = { node = { node = { __rb_parent_color = 18446603337735190528, rb_right = 0x0, rb_left = 0x0 }, expires = 6108564000000 }, _softexpires = 6108564000000, function = 0xffff800010122ec0 <tick_sched_timer>, base = 0xffff00477bb78780, state = 1 '\001', is_rel = 0 '\000', is_soft = 0 '\000', is_hard = 1 '\001' }, check_clocks = 0, nohz_mode = NOHZ_MODE_INACTIVE, inidle = 1, tick_stopped = 0, idle_active = 1, do_timer_last = 0, got_idle_tick = 1, last_tick = 0, next_tick = 0, idle_jiffies = 0, idle_calls = 0, idle_sleeps = 0, idle_entrytime = 5009052372788, idle_waketime = 0, idle_exittime = 0, idle_sleeptime = 4925948942621, iowait_sleeptime = -422390319, last_jiffies = 0, timer_expires = 0, timer_expires_base = 0, next_timer = 0, idle_expires = 0, tick_dep_mask = { counter = 0 } } crash> p tick_cpu_sched:102 per_cpu(tick_cpu_sched, 102) = $332 = { sched_timer = { node = { node = { __rb_parent_color = 18446603336923004336, rb_right = 0x0, rb_left = 0x0 }, expires = 6108564000000 }, _softexpires = 6108564000000, function = 0xffff800010122ec0 <tick_sched_timer>, base = 0xffff04473b92c780, state = 1 '\001', is_rel = 0 '\000', is_soft = 0 '\000', is_hard = 1 '\001' }, check_clocks = 0, nohz_mode = NOHZ_MODE_INACTIVE, inidle = 1, tick_stopped = 0, idle_active = 1, do_timer_last = 0, got_idle_tick = 1, last_tick = 0, next_tick = 0, idle_jiffies = 0, idle_calls = 0, idle_sleeps = 0, idle_entrytime = 5009052372748, idle_waketime = 0, idle_exittime = 0, idle_sleeptime = 5281350974871, iowait_sleeptime = -365884628719, last_jiffies = 0, timer_expires = 0, timer_expires_base = 0, next_timer = 0, idle_expires = 0, tick_dep_mask = { counter = 0 } } crash> p tick_cpu_sched:126 per_cpu(tick_cpu_sched, 126) = $333 = { sched_timer = { node = { node = { __rb_parent_color = 18446603337117384112, rb_right = 0x0, rb_left = 0x0 }, expires = 6108564000000 }, _softexpires = 6108564000000, function = 0xffff800010122ec0 <tick_sched_timer>, base = 0xffff04473bbcc780, state = 1 '\001', is_rel = 0 '\000', is_soft = 0 '\000', is_hard = 1 '\001' }, check_clocks = 0, nohz_mode = NOHZ_MODE_INACTIVE, inidle = 1, tick_stopped = 0, idle_active = 1, do_timer_last = 0, got_idle_tick = 1, last_tick = 0, next_tick = 0, idle_jiffies = 0, idle_calls = 0, idle_sleeps = 0, idle_entrytime = 5012087709249, idle_waketime = 0, idle_exittime = 0, idle_sleeptime = 4936136669951, iowait_sleeptime = -1942739704, last_jiffies = 0, timer_expires = 0, timer_expires_base = 0, next_timer = 0, idle_expires = 0, tick_dep_mask = { counter = 0 } } |