log: HTB: quantum of class 18639 is small. Consider r2q change. HTB: quantum of class 18718 is small. Consider r2q change. HTB: quantum of class 10001 is small. Consider r2q change. kvm: 3561: cpu15 unhandled rdmsr: 0x606 kvm: 3561: cpu0 unhandled rdmsr: 0x611 kvm: 3561: cpu0 unhandled rdmsr: 0x639 kvm: 3561: cpu0 unhandled rdmsr: 0x641 kvm: 3561: cpu0 unhandled rdmsr: 0x619 kvm: 3561: cpu0 unhandled rdmsr: 0x611 kvm: 3561: cpu0 unhandled rdmsr: 0x639 kvm: 3561: cpu0 unhandled rdmsr: 0x641 kvm: 3561: cpu0 unhandled rdmsr: 0x619 kvm: 3561: cpu0 unhandled rdmsr: 0x611 kvm: 3561: cpu0 unhandled rdmsr: 0x639 __ratelimit: 23 callbacks suppressed kvm: 3561: cpu0 unhandled rdmsr: 0x1ad kvm: 4150: cpu0 unhandled rdmsr: 0x611 kvm: 4150: cpu0 unhandled rdmsr: 0x639 kvm: 4150: cpu0 unhandled rdmsr: 0x641 kvm: 4150: cpu0 unhandled rdmsr: 0x619 hrtimer: interrupt took 19374 ns BUG: NMI Watchdog detected LOCKUP on CPU2, ip ffffffff812616a8, registers: CPU 2 Modules linked in: act_police cls_u32 sch_ingress ip6_tables iptable_filter ip_tables ebtable_nat ebtables sch_htb openvswitch(U) libcrc32c ttm vhost_net macvtap macvlan tun kvm_intel kvm wmi acpi_pad microcode i2c_i801 iTCO_wdt iTCO_vendor_support sg igb hwmon dca i2c_algo_bit i2c_core shpchp ext4 mbcache jbd2 isci libsas scsi_transport_sas sd_mod crc_t10dif ahci libata megaraid_sas scsi_mod dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan] Pid: 3430, comm: vhost-3428 Not tainted 2.6.32-220.7.1.el6.3.0.1.1.x86_64 #1 Sugon I620-G10/X9DR3-F RIP: 0010:[<ffffffff812616a8>] [<ffffffff812616a8>] __rb_rotate_right+0x88/0x90 RSP: 0018:ffff8800282438b8 EFLAGS: 00000086 RAX: ffff881861e42f18 RBX: ffff881861e42f18 RCX: ffff881861e42f18 RDX: ffff881861e42f18 RSI: ffff88002824fdb8 RDI: ffff881861e42f18 RBP: ffff8800282438b8 R08: 0000000000000000 R09: 0000000000000000 R10: 000000002b30005a R11: 000000000000006f R12: ffff881861e42f18 R13: ffff881861e42f18 R14: ffff881861e42f18 R15: ffff881861e42f18 FS: 0000000000000000(0000) GS:ffff880028240000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 00000000006d3a40 CR3: 0000000c5f402000 CR4: 00000000001426e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process vhost-3428 (pid: 3430, threadinfo ffff880c607c2000, task ffff880c5e236080) Stack: ffff8800282438f8 ffffffff812617d5 ffff88002824fdb8 ffff881861e42f18 <0> 0000000000000000 0000000000000000 000000000000fd60 0000000000000000 <0> ffff880028243928 ffffffff8108cec2 ffff880028243928 ffff881861e42f18 Call Trace: <IRQ> [<ffffffff812617d5>] rb_insert_color+0x125/0x160 [<ffffffff8108cec2>] enqueue_hrtimer+0x82/0xd0 [<ffffffff8108daec>] __hrtimer_start_range_ns+0xec/0x340 [<ffffffff8126fed0>] ? swiotlb_map_page+0x0/0x100 [<ffffffff8108dd58>] hrtimer_start+0x18/0x20 [<ffffffff81420daf>] qdisc_watchdog_schedule+0x2f/0x40 [<ffffffffa02e337f>] htb_dequeue+0x69f/0x7c0 [sch_htb] [<ffffffff8141f23f>] __qdisc_run+0x3f/0xe0 [<ffffffff81405b43>] dev_queue_xmit+0x433/0x6b0 [<ffffffffa02ce991>] rpl_dev_queue_xmit+0x31/0x270 [openvswitch] [<ffffffffa02cf303>] ? arch_fast_hash2+0x23/0x50 [openvswitch] [<ffffffffa02cc7b0>] netdev_send+0x50/0xc0 [openvswitch] [<ffffffff8100bbd3>] ? apic_timer_interrupt+0x13/0x20 [<ffffffffa02ca342>] ovs_vport_send+0x22/0x90 [openvswitch] [<ffffffffa02c057e>] do_output+0x2e/0x50 [openvswitch] [<ffffffffa02c0b54>] do_execute_actions+0x5b4/0xe30 [openvswitch] [<ffffffff810306a7>] ? native_apic_msr_write+0x37/0x40 [<ffffffffa02c9439>] ? flow_lookup+0x59/0xe0 [openvswitch] [<ffffffffa02c14ad>] ovs_execute_actions+0x7d/0xe0 [openvswitch] [<ffffffffa02c265f>] ovs_dp_process_packet_with_key+0xdf/0x150 [openvswitch] [<ffffffffa02c2721>] ovs_dp_process_received_packet+0x51/0x80 [openvswitch] [<ffffffffa02ca3de>] ovs_vport_receive+0x2e/0x30 [openvswitch] [<ffffffffa02cca34>] netdev_frame_hook+0x94/0xf0 [openvswitch] [<ffffffff81401809>] __netif_receive_skb+0x519/0x6e0 [<ffffffff81401a6a>] process_backlog+0x9a/0x120 [<ffffffff8140722e>] net_rx_action+0x10e/0x300 [<ffffffff8106a8a1>] __do_softirq+0xc1/0x1d0 [<ffffffff8100c20c>] call_softirq+0x1c/0x30 <EOI> [<ffffffff8100de25>] ? do_softirq+0x65/0xa0 [<ffffffff814067a8>] netif_rx_ni+0x28/0x30 [<ffffffffa0287769>] tun_sendmsg+0x229/0x4ec [tun] [<ffffffffa02a1f25>] handle_tx+0x275/0x5e0 [vhost_net] [<ffffffffa02a22c5>] handle_tx_kick+0x15/0x20 [vhost_net] [<ffffffffa029f4fc>] vhost_worker+0xbc/0x140 [vhost_net] [<ffffffffa029f440>] ? vhost_worker+0x0/0x140 [vhost_net] [<ffffffff81088b16>] kthread+0x96/0xa0 [<ffffffff8100c10a>] child_rip+0xa/0x20 [<ffffffff81088a80>] ? kthread+0x0/0xa0 [<ffffffff8100c100>] ? child_rip+0x0/0x20 Code: 00 00 00 48 89 06 48 8b 17 83 e2 03 48 09 c2 48 89 17 c9 c3 0f 1f 80 00 00 00 00 48 89 42 08 48 8b 17 83 e2 03 48 09 c2 48 89 17 <c9> c3 66 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 49 89 fe 41 55 bt: crash> bt PID: 3430 TASK: ffff880c5e236080 CPU: 2 COMMAND: "vhost-3428" #0 [ffff880028247d50] machine_kexec at ffffffff8103229b #1 [ffff880028247db0] crash_kexec at ffffffff810b0392 #2 [ffff880028247e80] oops_end at ffffffff814c6ca0 #3 [ffff880028247eb0] die_nmi at ffffffff814c6e49 #4 [ffff880028247ee0] nmi_watchdog_tick at ffffffff814c752a #5 [ffff880028247f10] do_nmi at ffffffff814c69c1 #6 [ffff880028247f50] nmi at ffffffff814c6280 [exception RIP: __rb_rotate_right+136] RIP: ffffffff812616a8 RSP: ffff8800282438b8 RFLAGS: 00000086 RAX: ffff881861e42f18 RBX: ffff881861e42f18 RCX: ffff881861e42f18 RDX: ffff881861e42f18 RSI: ffff88002824fdb8 RDI: ffff881861e42f18 RBP: ffff8800282438b8 R8: 0000000000000000 R9: 0000000000000000 R10: 000000002b30005a R11: 000000000000006f R12: ffff881861e42f18 R13: ffff881861e42f18 R14: ffff881861e42f18 R15: ffff881861e42f18 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 --- <NMI exception stack> --- #7 [ffff8800282438b8] __rb_rotate_right at ffffffff812616a8 #8 [ffff8800282438c0] rb_insert_color at ffffffff812617d5 #9 [ffff880028243900] enqueue_hrtimer at ffffffff8108cec2 #10 [ffff880028243930] __hrtimer_start_range_ns at ffffffff8108daec #11 [ffff880028243990] hrtimer_start at ffffffff8108dd58 #12 [ffff8800282439a0] qdisc_watchdog_schedule at ffffffff81420daf #13 [ffff8800282439b0] htb_dequeue at ffffffffa02e337f [sch_htb] #14 [ffff880028243a80] __qdisc_run at ffffffff8141f23f #15 [ffff880028243ab0] dev_queue_xmit at ffffffff81405b43 #16 [ffff880028243b00] rpl_dev_queue_xmit at ffffffffa02ce991 [openvswitch] #17 [ffff880028243b40] netdev_send at ffffffffa02cc7b0 [openvswitch] #18 [ffff880028243b70] ovs_vport_send at ffffffffa02ca342 [openvswitch] #19 [ffff880028243ba0] do_output at ffffffffa02c057e [openvswitch] #20 [ffff880028243bc0] do_execute_actions at ffffffffa02c0b54 [openvswitch] #21 [ffff880028243c70] ovs_execute_actions at ffffffffa02c14ad [openvswitch] #22 [ffff880028243cc0] ovs_dp_process_packet_with_key at ffffffffa02c265f [openvswitch] #23 [ffff880028243d40] ovs_dp_process_received_packet at ffffffffa02c2721 [openvswitch] #24 [ffff880028243df0] ovs_vport_receive at ffffffffa02ca3de [openvswitch] #25 [ffff880028243e00] netdev_frame_hook at ffffffffa02cca34 [openvswitch] #26 [ffff880028243e30] __netif_receive_skb at ffffffff81401809 #27 [ffff880028243e90] process_backlog at ffffffff81401a6a #28 [ffff880028243ee0] net_rx_action at ffffffff8140722e #29 [ffff880028243f40] __do_softirq at ffffffff8106a8a1 #30 [ffff880028243fb0] call_softirq at ffffffff8100c20c --- <IRQ stack> --- #31 [ffff880c607c3ce8] do_softirq at ffffffff8100de25 #32 [ffff880c607c3d08] netif_rx_ni at ffffffff814067a8 #33 [ffff880c607c3d28] tun_sendmsg at ffffffffa0287769 [tun] #34 [ffff880c607c3da8] handle_tx at ffffffffa02a1f25 [vhost_net] #35 [ffff880c607c3e88] handle_tx_kick at ffffffffa02a22c5 [vhost_net] #36 [ffff880c607c3e98] vhost_worker at ffffffffa029f4fc [vhost_net] #37 [ffff880c607c3ee8] kthread at ffffffff81088b16 #38 [ffff880c607c3f48] kernel_thread at ffffffff8100c10a [exception RIP: child_rip] RIP: ffffffff8100c100 RSP: ffff880c607c3f58 RFLAGS: 00000200 RAX: 0000000000000000 RBX: ffff880c5e0f9bf8 RCX: 0000000000000000 RDX: 0000000000000611 RSI: ffff880c5e0f9bf8 RDI: ffffffff81088a80 RBP: ffff880c61e11f40 R8: ffff880c61e10000 R9: 00000000ffffffff R10: 0000000000000000 R11: 0000000000000000 R12: ffff880c5e0f9c30 R13: ffffffff81a86768 R14: ffff880c61df4790 R15: ffff880c61df4790 ORIG_RAX: 0000000000000000 CS: 0010 SS: 0018 bt: WARNING: possibly bogus exception frame code: crash> l * __rb_rotate_right+0x88/0x90 0xffffffff81261620 is in __rb_rotate_right (lib/rbtree.c:51). 46 rb_set_parent(node, right); 47 } 48 49 static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) 50 { 51 struct rb_node *left = node->rb_left; 52 struct rb_node *parent = rb_parent(node); 53 54 if ((node->rb_left = left->rb_right)) 55 rb_set_parent(left->rb_right, node);
sys info: CPUS: 24 DATE: Sun Jan 10 10:30:02 2016 UPTIME: 10:01:10 LOAD AVERAGE: 9.41, 2.61, 1.17 TASKS: 1945 NODENAME: openstack422 RELEASE: 2.6.32-220.7.1.el6.3.0.1.1.x86_64 VERSION: #1 SMP Mon Feb 2 13:39:59 CST 2015 MACHINE: x86_64 (2599 Mhz) MEMORY: 96 GB PANIC: ""
other Oops: sys: KERNEL: /root/vmlinux DUMPFILE: vmcore.11.53 [PARTIAL DUMP] CPUS: 24 DATE: Sat Jan 9 11:52:09 2016 UPTIME: 05:17:52 LOAD AVERAGE: 11.50, 4.85, 3.83 TASKS: 1951 NODENAME: openstack422 RELEASE: 2.6.32-220.7.1.el6.3.0.1.1.x86_64 VERSION: #1 SMP Mon Feb 2 13:39:59 CST 2015 MACHINE: x86_64 (2600 Mhz) MEMORY: 96 GB PANIC: "" PID: 3562 COMMAND: "vhost-3560" TASK: ffff880c5eb227d0 [THREAD_INFO: ffff880c5fbf2000] CPU: 22 STATE: TASK_RUNNING (NMI) log: HTB: quantum of class 18718 is small. Consider r2q change. HTB: quantum of class 10001 is small. Consider r2q change. kvm: 3560: cpu0 unhandled rdmsr: 0x606 kvm: 4131: cpu0 unhandled rdmsr: 0x606 kvm: 3560: cpu0 unhandled rdmsr: 0x611 kvm: 3560: cpu0 unhandled rdmsr: 0x639 kvm: 3560: cpu0 unhandled rdmsr: 0x641 kvm: 3560: cpu0 unhandled rdmsr: 0x619 kvm: 4131: cpu0 unhandled rdmsr: 0x611 kvm: 4131: cpu0 unhandled rdmsr: 0x639 kvm: 4131: cpu0 unhandled rdmsr: 0x641 kvm: 4131: cpu0 unhandled rdmsr: 0x619 kvm: 3560: cpu0 unhandled rdmsr: 0x1ad hrtimer: interrupt took 20952 ns BUG: NMI Watchdog detected LOCKUP on CPU22, ip ffffffff812616a8, registers: CPU 22 Modules linked in: act_police cls_u32 sch_ingress ip6_tables iptable_filter ip_tables ebtable_nat ebtables sch_htb openvswitch(U) libcrc32c ttm vhost_net macvtap macvlan tun kvm_intel kvm wmi acpi_pad microcode i2c_i801 iTCO_wdt iTCO_vendor_support sg igb hwmon dca i2c_algo_bit i2c_core shpchp ext4 mbcache jbd2 isci libsas scsi_transport_sas sd_mod crc_t10dif ahci libata megaraid_sas scsi_mod dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan] Pid: 3562, comm: vhost-3560 Not tainted 2.6.32-220.7.1.el6.3.0.1.1.x86_64 #1 Sugon I620-G10/X9DR3-F RIP: 0010:[<ffffffff812616a8>] [<ffffffff812616a8>] __rb_rotate_right+0x88/0x90 RSP: 0018:ffff880caa7438b8 EFLAGS: 00000086 RAX: ffff881861ec9718 RBX: ffff881861ec9718 RCX: ffff881861ec9718 RDX: ffff881861ec9718 RSI: ffff880caa74fdb8 RDI: ffff881861ec9718 RBP: ffff880caa7438b8 R08: 0000000000000000 R09: 00000000000003e4 R10: 0000000000000000 R11: 16fac402303dfe78 R12: ffff881861ec9718 R13: ffff881861ec9718 R14: ffff881861ec9718 R15: ffff881861ec9718 FS: 0000000000000000(0000) GS:ffff880caa740000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 00007fe285ff9da4 CR3: 0000000c4b256000 CR4: 00000000001426e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process vhost-3560 (pid: 3562, threadinfo ffff880c5fbf2000, task ffff880c5eb227d0) Stack: ffff880caa7438f8 ffffffff812617d5 ffff880caa74fdb8 ffff881861ec9718 <0> 0000000000000000 0000000000000000 000000000000fd60 0000000000000000 <0> ffff880caa743928 ffffffff8108cec2 0000000000000000 ffff881861ec9718 Call Trace: <IRQ> [<ffffffff812617d5>] rb_insert_color+0x125/0x160 [<ffffffff8108cec2>] enqueue_hrtimer+0x82/0xd0 [<ffffffff8108daec>] __hrtimer_start_range_ns+0xec/0x340 [<ffffffff8126fed0>] ? swiotlb_map_page+0x0/0x100 [<ffffffff8108dd58>] hrtimer_start+0x18/0x20 [<ffffffff81420daf>] qdisc_watchdog_schedule+0x2f/0x40 [<ffffffffa02e337f>] htb_dequeue+0x69f/0x7c0 [sch_htb] [<ffffffff8141f23f>] __qdisc_run+0x3f/0xe0 [<ffffffff81405b43>] dev_queue_xmit+0x433/0x6b0 [<ffffffffa02ce991>] rpl_dev_queue_xmit+0x31/0x270 [openvswitch] [<ffffffffa02cf303>] ? arch_fast_hash2+0x23/0x50 [openvswitch] [<ffffffffa02cc7b0>] netdev_send+0x50/0xc0 [openvswitch] [<ffffffff8100bbd3>] ? apic_timer_interrupt+0x13/0x20 [<ffffffffa02ca342>] ovs_vport_send+0x22/0x90 [openvswitch] [<ffffffffa02c057e>] do_output+0x2e/0x50 [openvswitch] [<ffffffffa02c0b54>] do_execute_actions+0x5b4/0xe30 [openvswitch] [<ffffffff8109885a>] ? tick_program_event+0x2a/0x30 [<ffffffffa02c9439>] ? flow_lookup+0x59/0xe0 [openvswitch] [<ffffffffa02c14ad>] ovs_execute_actions+0x7d/0xe0 [openvswitch] [<ffffffffa02c265f>] ovs_dp_process_packet_with_key+0xdf/0x150 [openvswitch] [<ffffffffa02c2721>] ovs_dp_process_received_packet+0x51/0x80 [openvswitch] [<ffffffffa02ca3de>] ovs_vport_receive+0x2e/0x30 [openvswitch] [<ffffffffa02cca34>] netdev_frame_hook+0x94/0xf0 [openvswitch] [<ffffffff81401809>] __netif_receive_skb+0x519/0x6e0 [<ffffffff81401a6a>] process_backlog+0x9a/0x120 [<ffffffff8140722e>] net_rx_action+0x10e/0x300 [<ffffffff8106a8a1>] __do_softirq+0xc1/0x1d0 [<ffffffff8100c20c>] ? call_softirq+0x1c/0x30 [<ffffffff8100c20c>] call_softirq+0x1c/0x30 <EOI> [<ffffffff8100de25>] ? do_softirq+0x65/0xa0 [<ffffffff814067a8>] netif_rx_ni+0x28/0x30 [<ffffffffa0287769>] tun_sendmsg+0x229/0x4ec [tun] [<ffffffffa02a1f25>] handle_tx+0x275/0x5e0 [vhost_net] [<ffffffffa02a22c5>] handle_tx_kick+0x15/0x20 [vhost_net] [<ffffffffa029f4fc>] vhost_worker+0xbc/0x140 [vhost_net] [<ffffffffa029f440>] ? vhost_worker+0x0/0x140 [vhost_net] [<ffffffff81088b16>] kthread+0x96/0xa0 [<ffffffff8100c10a>] child_rip+0xa/0x20 [<ffffffff81088a80>] ? kthread+0x0/0xa0 [<ffffffff8100c100>] ? child_rip+0x0/0x20 Code: 00 00 00 48 89 06 48 8b 17 83 e2 03 48 09 c2 48 89 17 c9 c3 0f 1f 80 00 00 00 00 48 89 42 08 48 8b 17 83 e2 03 48 09 c2 48 89 17 <c9> c3 66 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 49 89 fe 41 55 code: crash>l *__rb_rotate_right+0x88/0x90 0xffffffff81261620 is in __rb_rotate_right (lib/rbtree.c:51). 46 rb_set_parent(node, right); 47 } 48 49 static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) 50 { 51 struct rb_node *left = node->rb_left; 52 struct rb_node *parent = rb_parent(node); 53 54 if ((node->rb_left = left->rb_right)) 55 rb_set_parent(left->rb_right, node);
new crash info: __ratelimit: 14 callbacks suppressed kvm: 3347: cpu0 unhandled rdmsr: 0x1ad hrtimer: interrupt took 13211 ns BUG: NMI Watchdog detected LOCKUP on CPU21, ip ffffffff814c5af1, registers: CPU 21 Modules linked in: act_police cls_u32 sch_ingress ip6_tables iptable_filter ip_tables ebtable_nat ebtables sch_htb openvswitch(U) libcrc32c ttm vhost_net macvtap macvlan tun kvm_intel kvm wmi acpi_pad microcode i2c_i801 iTCO_wdt iTCO_vendor_support sg igb hwmon dca i2c_algo_bit i2c_core shpchp ext4 mbcache jbd2 isci libsas scsi_transport_sas sd_mod crc_t10dif ahci libata megaraid_sas scsi_mod dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan] Pid: 2225, comm: handler16 Not tainted 2.6.32-220.7.1.el6.3.0.1.1.x86_64 #1 Sugon I620-G10/X9DR3-F RIP: 0010:[<ffffffff814c5af1>] [<ffffffff814c5af1>] _spin_lock+0x21/0x30 RSP: 0018:ffff880caa723eb8 EFLAGS: 00000006 RAX: 000000000000b424 RBX: ffff880c5ffa5f18 RCX: 0000000000000000 RDX: 000000000000b427 RSI: ffff880caa66fda8 RDI: ffff880caa66fd60 RBP: ffff880caa723eb8 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000001 R12: ffff880caa66fda8 R13: ffff880caa66fd60 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f62875fe700(0000) GS:ffff880caa720000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fe3c8001000 CR3: 000000186059d000 CR4: 00000000001426e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process handler16 (pid: 2225, threadinfo ffff880c5f4a4000, task ffff880c5e3fa0c0) Stack: ffff880caa723f08 ffffffff8108d487 ffff880caa723ed8 ffffffff810129f9 <0> ffff880caa723f08 ffff880caa72fd68 ffff880caa723f48 ffff880caa72fd60 <0> 0000000000000001 7fffffffffffffff ffff880caa723f88 ffffffff8108d7f6 Call Trace: <IRQ> [<ffffffff8108d487>] __run_hrtimer+0x97/0x170 [<ffffffff810129f9>] ? read_tsc+0x9/0x20 [<ffffffff8108d7f6>] hrtimer_interrupt+0xe6/0x250 [<ffffffff814cb76b>] smp_apic_timer_interrupt+0x6b/0x9b [<ffffffff8100bbd3>] apic_timer_interrupt+0x13/0x20 <EOI> [<ffffffff8104c00d>] ? mutex_spin_on_owner+0x7d/0xb0 [<ffffffff814c4761>] __mutex_lock_slowpath+0x71/0x190 [<ffffffff814c46cb>] mutex_lock+0x2b/0x50 [<ffffffff8142b37d>] genl_rcv+0x1d/0x40 [<ffffffff8142a006>] netlink_unicast+0x2e6/0x300 [<ffffffff8142a990>] netlink_sendmsg+0x200/0x2e0 [<ffffffff813ef27a>] sock_sendmsg+0x11a/0x150 [<ffffffff813ef863>] ? sock_recvmsg+0x133/0x160 [<ffffffff81088e90>] ? autoremove_wake_function+0x0/0x40 [<ffffffff8117b0d0>] ? pollwake+0x0/0x60 [<ffffffff813ef716>] __sys_sendmsg+0x406/0x420 [<ffffffff813eb94a>] ? sock_poll+0x1a/0x20 [<ffffffff811a8741>] ? ep_send_events_proc+0x71/0x110 [<ffffffff814c5b37>] ? _spin_unlock_irqrestore+0x17/0x20 [<ffffffff811a9653>] ? ep_scan_ready_list+0x183/0x190 [<ffffffff811a9720>] ? sys_epoll_wait+0xa0/0x300 [<ffffffff813f03c9>] sys_sendmsg+0x49/0x90 [<ffffffff8100b0b2>] system_call_fastpath+0x16/0x1b Code: 01 74 05 e8 82 18 da ff c9 c3 55 48 89 e5 0f 1f 44 00 00 b8 00 00 01 00 f0 0f c1 07 0f b7 d0 c1 e8 10 39 c2 74 0e f3 90 0f b7 17 <eb> f5 83 3f 00 75 f4 eb df c9 c3 0f 1f 40 00 55 48 89 e5 0f 1f
like? https://bugs.centos.org/print_bug_page.php?bug_id=7051 patch: commit 0bd9ac380a44611e953b1657d7f54075b2f77fb0 Author: Salman Qazi <sqazi@google.com> Date: Tue Oct 12 07:25:19 2010 -0700 hrtimer: Preserve timer state in remove_hrtimer() commit f13d4f979c518119bba5439dd2364d76d31dcd3f upstream. The race is described as follows: CPU X CPU Y remove_hrtimer // state & QUEUED == 0 timer->state = CALLBACK unlock timer base timer->f(n) //very long hrtimer_start lock timer base remove_hrtimer // no effect hrtimer_enqueue timer->state = CALLBACK | QUEUED unlock timer base hrtimer_start lock timer base remove_hrtimer mode = INACTIVE // CALLBACK bit lost! switch_hrtimer_base CALLBACK bit not set: timer->base changes to a different CPU. lock this CPU's timer base The bug was introduced with commit ca109491f (hrtimer: removing all ur callback modes) in 2.6.29 [ tglx: Feed new state via local variable and add a comment. ] Signed-off-by: Salman Qazi <sqazi@google.com> Cc: akpm@linux-foundation.org Cc: Peter Zijlstra <peterz@infradead.org> LKML-Reference: <20101012142351.8485.21823.stgit@dungbeetle.mtv.corp.google.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 931a4d9..a6e9d00 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -920,6 +920,7 @@ static inline int remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) { if (hrtimer_is_queued(timer)) { + unsigned long state; int reprogram; /* @@ -933,8 +934,13 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) debug_deactivate(timer); timer_stats_hrtimer_clear_start_info(timer); reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); - __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, - reprogram); + /* + * We must preserve the CALLBACK state flag here, + * otherwise we could move the timer base in + * switch_hrtimer_base. + */ + state = timer->state & HRTIMER_STATE_CALLBACK; + __remove_hrtimer(timer, base, state, reprogram); return 1; } return 0; @@ -1221,6 +1227,9 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); enqueue_hrtimer(timer, base); } + + WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK)); + timer->state &= ~HRTIMER_STATE_CALLBACK; }
test it's OK https://bugs.centos.org/print_bug_page.php?bug_id=7051