Created attachment 297857 [details] dmesg (kernel 5.14-rc1, Talos II) My Talos II run fine for a few hours building stuff but at system shutdown (systemctl poweroff) I got this: [...] BUG: Kernel NULL pointer dereference on read at 0x00000000 Faulting instruction address: 0xc00000000034396c Oops: Kernel access of bad area, sig: 7 [#1] BE PAGE_SIZE=4K MMU=Radix SMP NR_CPUS=192 DEBUG_PAGEALLOC NUMA PowerNV Modules linked in: auth_rpcgss nfsv4 dns_resolver nfs lockd grace sunrpc rfkill ecb xts ctr cbc aes_generic libaes ibmpowernv evdev radeon snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hwdep ghash_generic snd_hda_core drm_ttm_helper xhci_pci ofpart snd_pcm vmx_crypto ttm gf128mul powernv_flash xhci_hcd mtd i2c_algo_bit snd_timer opal_prd hwmon drm_kms_helper usbcore cfbfillrect cfbcopyarea cfbimgblt sysimgblt snd syscopyarea sysfillrect fb_sys_fops usb_common soundcore at24 regmap_i2c zram zsmalloc powernv_cpufreq drm fuse drm_panel_orientation_quirks backlight configfs CPU: 26 PID: 345930 Comm: kworker/u66:5 Not tainted 5.14.0-rc1-TalosII #2 Workqueue: events_unbound .cleanup_offline_cgwbs_workfn NIP: c00000000034396c LR: c000000000343850 CTR: 0000000000000000 REGS: c00020016bf9f7d0 TRAP: 0300 Not tainted (5.14.0-rc1-TalosII) MSR: 9000000000009032 <SF,HV,EE,ME,IR,DR,RI> CR: 44002228 XER: 00000004 CFAR: c000000000343864 DAR: 0000000000000000 DSISR: 00080000 IRQMASK: 1 GPR00: c000000000343848 c00020016bf9fa70 c0000000012d6100 0000000000000001 GPR04: c000200183630ac0 00000000ffffffff 0000000090163e29 0040900000000000 GPR08: 0000000000000000 0000000000000003 0000000000000000 c00000000168c488 GPR12: 0000000044002228 c0002007ff7f4c00 c000000000115e20 c000200002950340 GPR16: 0000000000000000 0000000000000001 c00000000112ef30 c000000000ea2db8 GPR20: c000000000ea2d68 c000000000ea2d98 0000000000000001 c0000000011c6352 GPR24: 0000000000000001 c000200183630080 c00000000114cf28 c00000000114ced8 GPR28: c00020016bf9faf8 c00000000114cde8 c00020000229a000 c00020000229a510 NIP [c00000000034396c] .cleanup_offline_cgwbs_workfn+0x3ac/0x410 LR [c000000000343850] .cleanup_offline_cgwbs_workfn+0x290/0x410 Call Trace: [c00020016bf9fa70] [c000000000343848] .cleanup_offline_cgwbs_workfn+0x288/0x410 (unreliable) [c00020016bf9fb90] [c00000000010871c] .process_one_work+0x2dc/0x7d0 [c00020016bf9fc70] [c000000000108ca8] .worker_thread+0x98/0x500 [c00020016bf9fd50] [c000000000115fa8] .kthread+0x188/0x190 [c00020016bf9fe10] [c00000000000cef8] .ret_from_kernel_thread+0x58/0x60 Instruction dump: 7e85a378 388002b0 7ea3ab78 9ad70002 4be3ff21 60000000 e93fff08 712a0003 4182ff0c e95fff10 39000000 7c0004ac <7d2050a8> 7c294000 41820018 7d384a14 ---[ end trace d475291d44c4d324 ]--- note: kworker/u66:5[345930] exited with preempt_count 2 watchdog: CPU 16 self-detected hard LOCKUP @ .do_raw_spin_lock+0x90/0x1d0 watchdog: CPU 16 TB:5873709870913, last heartbeat TB:5867694041597 (11749ms ago) Modules linked in: auth_rpcgss nfsv4 dns_resolver nfs lockd grace sunrpc rfkill ecb xts ctr cbc aes_generic libaes ibmpowernv evdev radeon snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hwdep ghash_generic snd_hda_core drm_ttm_helper xhci_pci ofpart snd_pcm vmx_crypto ttm gf128mul powernv_flash xhci_hcd mtd i2c_algo_bit snd_timer opal_prd hwmon drm_kms_helper usbcore cfbfillrect cfbcopyarea cfbimgblt sysimgblt snd syscopyarea sysfillrect fb_sys_fops usb_common soundcore at24 regmap_i2c zram zsmalloc powernv_cpufreq drm fuse drm_panel_orientation_quirks backlight configfs irq event stamp: 3697758 hardirqs last enabled at (3697757): [<c0000000003d8d84>] .__slab_free+0x3b4/0x5f0 hardirqs last disabled at (3697758): [<c000000000c7e228>] ._raw_spin_lock_irq+0x88/0xa0 softirqs last enabled at (3697726): [<c000000000342d1c>] .wb_shutdown+0x5c/0x140 softirqs last disabled at (3697724): [<c000000000342ce0>] .wb_shutdown+0x20/0x140 CPU: 16 PID: 292187 Comm: kworker/16:0 Tainted: G D 5.14.0-rc1-TalosII #2 Workqueue: cgwb_release .cgwb_release_workfn NIP: c000000000185e90 LR: c000000000c7e204 CTR: 0000000000000000 REGS: c0002007ff667d60 TRAP: 0900 Tainted: G D (5.14.0-rc1-TalosII) MSR: 9000000000009032 <SF,HV,EE,ME,IR,DR,RI> CR: 44002228 XER: 20040000 CFAR: c000000000185e9c IRQMASK: 1 GPR00: c000000000c7e1f8 c0002000e15cf9f0 c0000000012d6100 c00000000114ced8 GPR04: c0002000067b8a98 00000000ffffffff 00000000d7d91060 0e98300000000000 GPR08: ffffffff62ff980e 000000008000001a 0000000080000010 c00000000168c488 GPR12: 0000000044002222 c0002007ff7ffc00 c000000000115e20 c000200002950640 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 c00000000133fa08 c0000000010f0840 GPR24: c0002000022d52d4 0000000000000000 c0000000012e8ce0 0000000000000000 GPR28: c00020000229a000 0000000000000001 c00020000229a520 c00000000114ced8 NIP [c000000000185e90] .do_raw_spin_lock+0x90/0x1d0 LR [c000000000c7e204] ._raw_spin_lock_irq+0x64/0xa0 Call Trace: [c0002000e15cf9f0] [c000000000c7d994] ._raw_spin_unlock_irqrestore+0x84/0xd0 (unreliable) [c0002000e15cfa70] [c000000000c7e1f8] ._raw_spin_lock_irq+0x58/0xa0 [c0002000e15cfb00] [c000000000344c14] .cgwb_release_workfn+0xd4/0x200 [c0002000e15cfb90] [c00000000010871c] .process_one_work+0x2dc/0x7d0 [c0002000e15cfc70] [c000000000108ca8] .worker_thread+0x98/0x500 [c0002000e15cfd50] [c000000000115fa8] .kthread+0x188/0x190 [c0002000e15cfe10] [c00000000000cef8] .ret_from_kernel_thread+0x58/0x60 Instruction dump: 40820030 a14d0378 a0ed0002 38210080 e90d0128 394a0001 b14d0378 90ff0008 f91f0010 ebe1fff8 4e800020 60000000 <7c210b78> 813f0000 2c290000 4082fff4 Kernel panic - not syncing: Hard LOCKUP watchdog: CPU 28 self-detected hard LOCKUP @ .do_raw_spin_lock+0x90/0x1d0 watchdog: CPU 28 TB:5873733764122, last heartbeat TB:5867705991531 (11772ms ago) Modules linked in: auth_rpcgss nfsv4 dns_resolver nfs lockd grace sunrpc rfkill ecb xts ctr cbc aes_generic libaes ibmpowernv evdev radeon snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hwdep ghash_generic snd_hda_core drm_ttm_helper xhci_pci ofpart snd_pcm vmx_crypto ttm gf128mul powernv_flash xhci_hcd mtd i2c_algo_bit snd_timer opal_prd hwmon drm_kms_helper usbcore cfbfillrect cfbcopyarea cfbimgblt sysimgblt snd syscopyarea sysfillrect fb_sys_fops usb_common soundcore at24 regmap_i2c zram zsmalloc powernv_cpufreq drm fuse drm_panel_orientation_quirks backlight configfs irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [<c0000000000ce3c4>] .copy_process+0x1134/0x3fd0 softirqs last enabled at (0): [<c0000000000ce3c4>] .copy_process+0x1134/0x3fd0 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 28 PID: 425922 Comm: systemd-update- Tainted: G D 5.14.0-rc1-TalosII #2 NIP: c000000000185e90 LR: c000000000c7e3f8 CTR: 0000000000000000 REGS: c0002007ff5d7d60 TRAP: 0900 Tainted: G D (5.14.0-rc1-TalosII) MSR: 9000000000009032 <SF,HV,EE,ME,IR,DR,RI> CR: 44002482 XER: 2004008c CFAR: c000000000185e9c IRQMASK: 1 GPR00: c000000000c7e3ec c00020006d5ef610 c0000000012d6100 c00000000114ced8 GPR04: 0000000000000000 0000000000000000 0000000000000000 0000000000000001 GPR08: 0000000000000000 000000008000001a 000000008000001c fffffffffffffffd GPR12: 0000000024002482 c0002007ff7f4400 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000180 00000000003e0100 c000000010f78000 GPR20: c00020000265c380 0000000000000000 c0000000020a37b8 0000000000000000 GPR24: c000200002ba15d8 c00000000114ced8 0000000000000cc0 c000200002ba1000 GPR28: 0000000000000000 c00020000265c380 0000000000000000 c00000000114ced8 NIP [c000000000185e90] .do_raw_spin_lock+0x90/0x1d0 LR [c000000000c7e3f8] ._raw_spin_lock_irqsave+0x68/0xb0 Call Trace: [c00020006d5ef610] [c00000000112ef30] rcu_lock_map+0x0/0x28 (unreliable) [c00020006d5ef690] [c000000000c7e3ec] ._raw_spin_lock_irqsave+0x5c/0xb0 [c00020006d5ef720] [c000000000344e2c] .wb_get_create+0xec/0x8d0 [c00020006d5ef820] [c00000000031848c] .balance_dirty_pages_ratelimited+0x2ec/0x880 [c00020006d5ef8e0] [c00000000064c798] .btrfs_buffered_write+0x548/0x740 [c00020006d5efa40] [c00000000064cb9c] .btrfs_file_write_iter+0x20c/0x4f0 [c00020006d5efb10] [c000000000419254] .new_sync_write+0x124/0x1c0 [c00020006d5efc00] [c00000000041b7e4] .vfs_write+0x1c4/0x230 [c00020006d5efca0] [c00000000041ba04] .ksys_write+0x74/0x130 [c00020006d5efd40] [c000000000033ea0] .system_call_exception+0x1c0/0x420 [c00020006d5efe10] [c00000000000c070] system_call_vectored_common+0xf0/0x280 --- interrupt: 3000 at 0x3fff9ea5a73c NIP: 00003fff9ea5a73c LR: 0000000000000000 CTR: 0000000000000000 REGS: c00020006d5efe80 TRAP: 3000 Tainted: G D (5.14.0-rc1-TalosII) MSR: 900000000280f032 <SF,HV,VEC,VSX,EE,PR,FP,ME,IR,DR,RI> CR: 44000482 XER: 00000000 IRQMASK: 0 GPR00: 0000000000000004 00003fffedcba9a0 00003fff9eb44300 0000000000000005 GPR04: 00003fffedcbac68 0000000000000180 00003fffedcba990 0000000000000002 GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR12: 0000000000000000 00003fff9ef78810 00003ffff543cda8 0000000000000000 GPR16: 0000000000000000 0000000000000000 00003ffff543ca00 00000001525b4260 GPR20: 00003ffff543ca18 0000000000000001 0000000000000000 0000000000000001 GPR24: 0000000000000000 0000000000000000 0000000000000000 0000000000000002 GPR28: 0000000000000000 00003fffedcbac68 00000000003e0100 0000000000000005 NIP [00003fff9ea5a73c] 0x3fff9ea5a73c LR [0000000000000000] 0x0 --- interrupt: 3000 Instruction dump: 40820030 a14d0378 a0ed0002 38210080 e90d0128 394a0001 b14d0378 90ff0008 f91f0010 ebe1fff8 4e800020 60000000 <7c210b78> 813f0000 2c290000 4082fff4
Created attachment 297859 [details] kernel .config (kernel 5.14-rc1, Talos II)
Have not seen this since quite some stable releases, nor on 5.19-rcs. Closing for now as obsolete.