Created attachment 303794 [details] Patch to reproduce the problem We can easily reproduce this problem by injecting the fail function fault. 1. Apply the fail function patch 2. Enable follow kernel config CONFIG_MTD_NAND_NANDSIM=m CONFIG_MTD=y CONFIG_MTD_PARTITIONED_MASTER=y CONFIG_MTD_UBI=m CONFIG_MTD_UBI_FASTMAP=y CONFIG_FUNCTION_ERROR_INJECTION=y CONFIG_FAULT_INJECTION=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAIL_FUNCTION=y 3. Start the virtual machine environment and execute the following script ~~~shell #! /bin/bash ID="0xec,0xa1,0x00,0x25" # 128M 256KB 2KB 512 Blocks FAILTYPE=fail_function FAILFUNC=mtd_erase clean_ubi() { modprobe -r ubifs #2>/dev/null modprobe -r ubi #2>/dev/null modprobe -r nandsim #2>/dev/null } do_test() { echo ""> /sys/kernel/debug/$FAILTYPE/inject cat /sys/kernel/debug/$FAILTYPE/inject clean_ubi modprobe nandsim id_bytes=$ID # 128M 256KB 2KB 512 Blocks flash_erase /dev/mtd0 0 0 modprobe ubi fm_autoconvert echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject # Overwrite the mtd_erase return value to -EBUSY printf %#x -16 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval echo 100 > /sys/kernel/debug/$FAILTYPE/probability echo 0 > /sys/kernel/debug/$FAILTYPE/interval echo 10000 > /sys/kernel/debug/$FAILTYPE/times # Skip the first 500 calls echo 500 > /sys/kernel/debug/$FAILTYPE/space echo 2 > /sys/kernel/debug/$FAILTYPE/verbose dmesg -C # Now we must use the -f parameter to enable fastmap. ubiattach -m 0 -d 0 -f # Triggering ubi_update_fastmap() by ubidettach ubidetach -p /dev/mtd0 2>/dev/null clean_ubi } for ((i=1;i<=10;i++)) do echo "-----round $i------" do_test done ~~~
[root@localhost ~]# ps -aux | grep "D." | grep -v grep USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND root 83 0.8 0.0 0 0 ? D 20:59 0:03 [torture_stutter] root 1690 0.0 0.0 0 0 ? D 21:00 0:00 [ubi_bgt0d] root 1692 0.0 0.0 4416 1408 pts/0 D+ 21:00 0:00 ubidetach -p /dev/mtd0 [ 123.897437] INFO: lockdep is turned off. [ 124.402840] scftorture: scf_invoked_count ver: 12725442 resched: 2021797 single: 1904795/1900629 single_ofl: 0/0 single_rpc: 1903742 single_rpc_ofl: 0 many: 237393/238125 all: 118962/119017 onoff: 0/0:0/0 -1,0:-1,0 0:0 (HZ=1000) ste: 0 stnmie: 0 stnmoe: 0 staf: 0 [ 139.250851] INFO: task ubi_bgt0d:1690 blocked for more than 76 seconds. [ 139.251810] Not tainted 6.2.0-12486-gc2e9a8e97294 #42 [ 139.252403] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 139.253210] task:ubi_bgt0d state:D stack:0 pid:1690 ppid:2 flags:0x00004000 [ 139.253270] Call Trace: [ 139.253290] <TASK> [ 139.253344] __schedule+0x8d0/0x1b40 [ 139.253424] schedule+0x96/0x1b0 [ 139.253438] schedule_preempt_disabled+0x25/0x40 [ 139.253455] __down_read_common+0x652/0xfe0 [ 139.253473] ? _raw_spin_unlock_irqrestore+0x61/0xd0 [ 139.253508] ? kmemleak_alloc+0x50/0xd0 [ 139.253534] ? down_read+0x5e/0x70 [ 139.253543] down_read+0x5e/0x70 [ 139.253559] ? schedule_ubi_work+0x27/0x60 [ubi] [ 139.253704] schedule_ubi_work+0x27/0x60 [ubi] [ 139.253753] schedule_erase+0x8d/0x170 [ubi] [ 139.253804] __erase_worker+0x16e/0x850 [ubi] [ 139.253859] ? lock_release+0x383/0x540 [ 139.253872] erase_worker+0xcd/0x100 [ubi] [ 139.253900] do_work+0xce/0x1b0 [ubi] [ 139.253930] ubi_thread+0x231/0x3b0 [ubi] [ 139.253975] ? ubi_bitflip_check+0x560/0x560 [ubi] [ 139.254007] kthread+0x13e/0x1a0 [ 139.254025] ? kthread_exit+0x50/0x50 [ 139.254062] ret_from_fork+0x1f/0x30 [ 139.254102] </TASK> [ 139.254123] INFO: task ubidetach:1692 blocked for more than 76 seconds. [ 139.254957] Not tainted 6.2.0-12486-gc2e9a8e97294 #42 [ 139.255695] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 139.256706] task:ubidetach state:D stack:0 pid:1692 ppid:1527 flags:0x00004000 [ 139.256718] Call Trace: [ 139.256731] <TASK> [ 139.256746] __schedule+0x8d0/0x1b40 [ 139.256794] schedule+0x96/0x1b0 [ 139.256805] schedule_preempt_disabled+0x25/0x40 [ 139.256813] rwsem_down_write_slowpath+0x589/0x1340 [ 139.256900] down_write+0x182/0x1c0 [ 139.256917] ubi_update_fastmap+0x68/0x2730 [ubi] [ 139.256944] ? _printk+0x64/0x90 [ 139.256964] ? ubi_msg+0x7b/0xa0 [ubi] [ 139.256994] ubi_detach_mtd_dev+0x255/0x310 [ubi] [ 139.257014] ctrl_cdev_ioctl+0x203/0x320 [ubi] [ 139.257042] __x64_sys_ioctl+0xea/0x140 [ 139.257055] do_syscall_64+0x39/0x80 [ 139.257062] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 139.257074] RIP: 0033:0x7fd73f706577 [ 139.257085] RSP: 002b:00007fff9d2f8398 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [ 139.257096] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fd73f706577 [ 139.257102] RDX: 00007fff9d2f83ac RSI: 0000000040046f41 RDI: 0000000000000003 [ 139.257105] RBP: 000000000040381a R08: 0000000000000000 R09: 0000000000000001 [ 139.257108] R10: 0000000000000003 R11: 0000000000000202 R12: 000000000040381a [ 139.257113] R13: 00007fff9d2f8510 R14: 0000000000000000 R15: 0000000000000000 [ 139.257129] </TASK> [ 139.257132] Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings [ 139.257133] INFO: lockdep is turned off.
Quick question, as this is afaics unclear from the report: Did this work in earlier kernels like 6.1.y? Or IOW: is this a regression?