Bug 217093

Summary: Deadlock occurs when sync_erase fails and updating fastmap
Product: Drivers Reporter: wangzhaolong1
Component: Flash/Memory Technology DevicesAssignee: David Woodhouse (dwmw2)
Status: NEW ---    
Severity: normal CC: regressions
Priority: P1    
Hardware: All   
OS: Linux   
Kernel Version: v6.2 Subsystem:
Regression: No Bisected commit-id:
Attachments: Patch to reproduce the problem

Description wangzhaolong1 2023-02-27 12:38:15 UTC
Created attachment 303794 [details]
Patch to reproduce the problem

We can easily reproduce this problem by injecting the fail function fault.

1. Apply the fail function patch

2. Enable follow kernel config

CONFIG_MTD_NAND_NANDSIM=m
CONFIG_MTD=y
CONFIG_MTD_PARTITIONED_MASTER=y
CONFIG_MTD_UBI=m
CONFIG_MTD_UBI_FASTMAP=y

CONFIG_FUNCTION_ERROR_INJECTION=y
CONFIG_FAULT_INJECTION=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y
CONFIG_FAIL_FUNCTION=y

3. Start the virtual machine environment and execute the following script


~~~shell
#! /bin/bash

ID="0xec,0xa1,0x00,0x25" # 128M 256KB 2KB 512 Blocks
FAILTYPE=fail_function
FAILFUNC=mtd_erase

clean_ubi() {
	modprobe -r ubifs     #2>/dev/null
	modprobe -r ubi       #2>/dev/null
	modprobe -r nandsim   #2>/dev/null
}

do_test() {
	echo  ""> /sys/kernel/debug/$FAILTYPE/inject
	cat /sys/kernel/debug/$FAILTYPE/inject

	clean_ubi

	modprobe nandsim id_bytes=$ID # 128M 256KB 2KB 512 Blocks

	flash_erase /dev/mtd0 0 0
	modprobe ubi fm_autoconvert

	echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject

        # Overwrite the mtd_erase return value to -EBUSY
	printf %#x -16 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval

	echo 100 > /sys/kernel/debug/$FAILTYPE/probability
	echo 0 > /sys/kernel/debug/$FAILTYPE/interval
	echo 10000 > /sys/kernel/debug/$FAILTYPE/times

        # Skip the first 500 calls
	echo 500 > /sys/kernel/debug/$FAILTYPE/space
	echo 2 > /sys/kernel/debug/$FAILTYPE/verbose

	dmesg -C
        # Now we must use the -f parameter to enable fastmap.
	ubiattach -m 0 -d 0 -f
        # Triggering ubi_update_fastmap() by ubidettach
        ubidetach -p /dev/mtd0 2>/dev/null
	clean_ubi
}

for ((i=1;i<=10;i++)) do
	echo "-----round $i------"
	do_test
done

~~~
Comment 1 wangzhaolong1 2023-02-27 12:42:26 UTC
[root@localhost ~]# ps -aux | grep "D." | grep -v grep
USER       PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root        83  0.8  0.0      0     0 ?        D    20:59   0:03 [torture_stutter]
root      1690  0.0  0.0      0     0 ?        D    21:00   0:00 [ubi_bgt0d]
root      1692  0.0  0.0   4416  1408 pts/0    D+   21:00   0:00 ubidetach -p /dev/mtd0





[  123.897437] INFO: lockdep is turned off.
[  124.402840] scftorture:  scf_invoked_count ver: 12725442 resched: 2021797 single: 1904795/1900629 single_ofl: 0/0 single_rpc: 1903742 single_rpc_ofl: 0 many: 237393/238125 all: 118962/119017 onoff: 0/0:0/0 -1,0:-1,0 0:0 (HZ=1000) ste: 0 stnmie: 0 stnmoe: 0 staf: 0
[  139.250851] INFO: task ubi_bgt0d:1690 blocked for more than 76 seconds.
[  139.251810]       Not tainted 6.2.0-12486-gc2e9a8e97294 #42
[  139.252403] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  139.253210] task:ubi_bgt0d       state:D stack:0     pid:1690  ppid:2      flags:0x00004000
[  139.253270] Call Trace:
[  139.253290]  <TASK>
[  139.253344]  __schedule+0x8d0/0x1b40
[  139.253424]  schedule+0x96/0x1b0
[  139.253438]  schedule_preempt_disabled+0x25/0x40
[  139.253455]  __down_read_common+0x652/0xfe0
[  139.253473]  ? _raw_spin_unlock_irqrestore+0x61/0xd0
[  139.253508]  ? kmemleak_alloc+0x50/0xd0
[  139.253534]  ? down_read+0x5e/0x70
[  139.253543]  down_read+0x5e/0x70
[  139.253559]  ? schedule_ubi_work+0x27/0x60 [ubi]
[  139.253704]  schedule_ubi_work+0x27/0x60 [ubi]
[  139.253753]  schedule_erase+0x8d/0x170 [ubi]
[  139.253804]  __erase_worker+0x16e/0x850 [ubi]
[  139.253859]  ? lock_release+0x383/0x540
[  139.253872]  erase_worker+0xcd/0x100 [ubi]
[  139.253900]  do_work+0xce/0x1b0 [ubi]
[  139.253930]  ubi_thread+0x231/0x3b0 [ubi]
[  139.253975]  ? ubi_bitflip_check+0x560/0x560 [ubi]
[  139.254007]  kthread+0x13e/0x1a0
[  139.254025]  ? kthread_exit+0x50/0x50
[  139.254062]  ret_from_fork+0x1f/0x30
[  139.254102]  </TASK>
[  139.254123] INFO: task ubidetach:1692 blocked for more than 76 seconds.
[  139.254957]       Not tainted 6.2.0-12486-gc2e9a8e97294 #42
[  139.255695] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  139.256706] task:ubidetach       state:D stack:0     pid:1692  ppid:1527   flags:0x00004000
[  139.256718] Call Trace:
[  139.256731]  <TASK>
[  139.256746]  __schedule+0x8d0/0x1b40
[  139.256794]  schedule+0x96/0x1b0
[  139.256805]  schedule_preempt_disabled+0x25/0x40
[  139.256813]  rwsem_down_write_slowpath+0x589/0x1340
[  139.256900]  down_write+0x182/0x1c0
[  139.256917]  ubi_update_fastmap+0x68/0x2730 [ubi]
[  139.256944]  ? _printk+0x64/0x90
[  139.256964]  ? ubi_msg+0x7b/0xa0 [ubi]
[  139.256994]  ubi_detach_mtd_dev+0x255/0x310 [ubi]
[  139.257014]  ctrl_cdev_ioctl+0x203/0x320 [ubi]
[  139.257042]  __x64_sys_ioctl+0xea/0x140
[  139.257055]  do_syscall_64+0x39/0x80
[  139.257062]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
[  139.257074] RIP: 0033:0x7fd73f706577
[  139.257085] RSP: 002b:00007fff9d2f8398 EFLAGS: 00000202 ORIG_RAX: 0000000000000010
[  139.257096] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fd73f706577
[  139.257102] RDX: 00007fff9d2f83ac RSI: 0000000040046f41 RDI: 0000000000000003
[  139.257105] RBP: 000000000040381a R08: 0000000000000000 R09: 0000000000000001
[  139.257108] R10: 0000000000000003 R11: 0000000000000202 R12: 000000000040381a
[  139.257113] R13: 00007fff9d2f8510 R14: 0000000000000000 R15: 0000000000000000
[  139.257129]  </TASK>
[  139.257132] Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings
[  139.257133] INFO: lockdep is turned off.
Comment 2 wangzhaolong1 2023-02-27 12:43:00 UTC
[root@localhost ~]# ps -aux | grep "D." | grep -v grep
USER       PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root        83  0.8  0.0      0     0 ?        D    20:59   0:03 [torture_stutter]
root      1690  0.0  0.0      0     0 ?        D    21:00   0:00 [ubi_bgt0d]
root      1692  0.0  0.0   4416  1408 pts/0    D+   21:00   0:00 ubidetach -p /dev/mtd0





[  123.897437] INFO: lockdep is turned off.
[  124.402840] scftorture:  scf_invoked_count ver: 12725442 resched: 2021797 single: 1904795/1900629 single_ofl: 0/0 single_rpc: 1903742 single_rpc_ofl: 0 many: 237393/238125 all: 118962/119017 onoff: 0/0:0/0 -1,0:-1,0 0:0 (HZ=1000) ste: 0 stnmie: 0 stnmoe: 0 staf: 0
[  139.250851] INFO: task ubi_bgt0d:1690 blocked for more than 76 seconds.
[  139.251810]       Not tainted 6.2.0-12486-gc2e9a8e97294 #42
[  139.252403] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  139.253210] task:ubi_bgt0d       state:D stack:0     pid:1690  ppid:2      flags:0x00004000
[  139.253270] Call Trace:
[  139.253290]  <TASK>
[  139.253344]  __schedule+0x8d0/0x1b40
[  139.253424]  schedule+0x96/0x1b0
[  139.253438]  schedule_preempt_disabled+0x25/0x40
[  139.253455]  __down_read_common+0x652/0xfe0
[  139.253473]  ? _raw_spin_unlock_irqrestore+0x61/0xd0
[  139.253508]  ? kmemleak_alloc+0x50/0xd0
[  139.253534]  ? down_read+0x5e/0x70
[  139.253543]  down_read+0x5e/0x70
[  139.253559]  ? schedule_ubi_work+0x27/0x60 [ubi]
[  139.253704]  schedule_ubi_work+0x27/0x60 [ubi]
[  139.253753]  schedule_erase+0x8d/0x170 [ubi]
[  139.253804]  __erase_worker+0x16e/0x850 [ubi]
[  139.253859]  ? lock_release+0x383/0x540
[  139.253872]  erase_worker+0xcd/0x100 [ubi]
[  139.253900]  do_work+0xce/0x1b0 [ubi]
[  139.253930]  ubi_thread+0x231/0x3b0 [ubi]
[  139.253975]  ? ubi_bitflip_check+0x560/0x560 [ubi]
[  139.254007]  kthread+0x13e/0x1a0
[  139.254025]  ? kthread_exit+0x50/0x50
[  139.254062]  ret_from_fork+0x1f/0x30
[  139.254102]  </TASK>
[  139.254123] INFO: task ubidetach:1692 blocked for more than 76 seconds.
[  139.254957]       Not tainted 6.2.0-12486-gc2e9a8e97294 #42
[  139.255695] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  139.256706] task:ubidetach       state:D stack:0     pid:1692  ppid:1527   flags:0x00004000
[  139.256718] Call Trace:
[  139.256731]  <TASK>
[  139.256746]  __schedule+0x8d0/0x1b40
[  139.256794]  schedule+0x96/0x1b0
[  139.256805]  schedule_preempt_disabled+0x25/0x40
[  139.256813]  rwsem_down_write_slowpath+0x589/0x1340
[  139.256900]  down_write+0x182/0x1c0
[  139.256917]  ubi_update_fastmap+0x68/0x2730 [ubi]
[  139.256944]  ? _printk+0x64/0x90
[  139.256964]  ? ubi_msg+0x7b/0xa0 [ubi]
[  139.256994]  ubi_detach_mtd_dev+0x255/0x310 [ubi]
[  139.257014]  ctrl_cdev_ioctl+0x203/0x320 [ubi]
[  139.257042]  __x64_sys_ioctl+0xea/0x140
[  139.257055]  do_syscall_64+0x39/0x80
[  139.257062]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
[  139.257074] RIP: 0033:0x7fd73f706577
[  139.257085] RSP: 002b:00007fff9d2f8398 EFLAGS: 00000202 ORIG_RAX: 0000000000000010
[  139.257096] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fd73f706577
[  139.257102] RDX: 00007fff9d2f83ac RSI: 0000000040046f41 RDI: 0000000000000003
[  139.257105] RBP: 000000000040381a R08: 0000000000000000 R09: 0000000000000001
[  139.257108] R10: 0000000000000003 R11: 0000000000000202 R12: 000000000040381a
[  139.257113] R13: 00007fff9d2f8510 R14: 0000000000000000 R15: 0000000000000000
[  139.257129]  </TASK>
[  139.257132] Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings
[  139.257133] INFO: lockdep is turned off.
Comment 3 The Linux kernel's regression tracker (Thorsten Leemhuis) 2023-02-28 05:22:37 UTC
Quick question, as this is afaics unclear from the report:

Did this work in earlier kernels like 6.1.y? Or IOW: is this a regression?