(Excuse me, but I'm directly registering here because I'm currently having a trouble with sending mails to / receiving mails from vger.kernel.org domain.) The system deadlocks when call_usermodehelper(UMH_WAIT_EXEC) request triggered call_usermodehelper(UMH_WAIT_PROC) request. For example, # : > /tmp/dummy # chmod 755 /tmp/dummy # echo /tmp/dummy > /proc/sys/kernel/hotplug # modprobe whatever generates below output. INFO: task kworker/u:1:14 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/u:1 D 0000000e 6476 14 2 0x00000000 deee9d68 00000046 30c7da8f 0000000e 30c7da8f 000001b7 0007a4d1 00000000 d7878d78 dd79e000 c1a258e0 deee63a0 c1a258e0 30cf7f1d 0000000e c1a258e0 c1a258e0 00000001 0000000e deee63a0 0000000e 0018db76 00000000 ffffffff Call Trace: [<c105ca91>] ? sched_clock_cpu+0x131/0x190 [<c13946f0>] schedule+0x30/0x50 [<c1394a45>] schedule_timeout+0x155/0x1c0 [<c106b6b4>] ? mark_held_locks+0x64/0xf0 [<c1397012>] ? _raw_spin_unlock_irq+0x22/0x40 [<c106bb1b>] ? trace_hardirqs_on+0xb/0x10 [<c1393a92>] wait_for_common+0xd2/0x130 [<c1036920>] ? try_to_wake_up+0x2b0/0x2b0 [<c1393b92>] wait_for_completion+0x12/0x20 [<c103acd8>] do_fork+0xc8/0x280 [<c13939eb>] ? wait_for_common+0x2b/0x130 [<c1009d16>] kernel_thread+0x86/0xa0 [<c104ef30>] ? call_usermodehelper_setup+0x90/0x90 [<c104ef30>] ? call_usermodehelper_setup+0x90/0x90 [<c1397e74>] ? common_interrupt+0x34/0x34 [<c104f2c8>] __call_usermodehelper+0x28/0x90 [<c104fda8>] process_one_work+0x198/0x3e0 [<c104fd3c>] ? process_one_work+0x12c/0x3e0 [<c104f2a0>] ? call_usermodehelper_exec+0x100/0x100 [<c1051d43>] worker_thread+0x133/0x310 [<c106bb1b>] ? trace_hardirqs_on+0xb/0x10 [<c1051c10>] ? manage_workers+0x1e0/0x1e0 [<c1056cf4>] kthread+0x74/0x80 [<c1056c80>] ? __init_kthread_worker+0x60/0x60 [<c1397e7a>] kernel_thread_helper+0x6/0xd 2 locks held by kworker/u:1/14: #0: (khelper){.+.+.+}, at: [<c104fd3c>] process_one_work+0x12c/0x3e0 #1: ((&sub_info->work)){+.+.+.}, at: [<c104fd3c>] process_one_work+0x12c/0x3e0 I wrote two patches. Please review. ----- Patch A start ----- [PATCH v3] kmod: Avoid deadlock by recursive kmod call. The system deadlocks when call_usermodehelper(UMH_WAIT_EXEC) request triggered call_usermodehelper(UMH_WAIT_PROC) request. This is because "khelper thread is waiting at wait_for_completion() in do_fork() since the worker thread was created with CLONE_VFORK flag" and "the worker thread cannot call complete() because do_execve() is blocked at UMH_WAIT_PROC request (e.g. request_module() from search_binary_handler())" and "the khelper thread cannot start processing UMH_WAIT_PROC request because the khelper thread is waiting at wait_for_completion() in do_fork()". In order to avoid deadlock, do not try to call wait_for_completion() in call_usermodehelper_exec() if the worker thread was created by khelper thread with CLONE_VFORK flag. This bug exists at least since 2.6.10. Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> --- fs/exec.c | 1 + include/linux/sched.h | 3 +++ kernel/kmod.c | 13 ++++++++++++- 3 files changed, 16 insertions(+), 1 deletion(-) --- linux-3.2.orig/fs/exec.c +++ linux-3.2/fs/exec.c @@ -1406,6 +1406,7 @@ int search_binary_handler(struct linux_b fput(bprm->file); bprm->file = NULL; current->did_exec = 1; + current->kmod_thread = 0; proc_exec_connector(current); return retval; } --- linux-3.2.orig/include/linux/sched.h +++ linux-3.2/include/linux/sched.h @@ -1302,6 +1302,9 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; + /* Prevent recursive kmod request. */ + unsigned kmod_thread:1; + pid_t pid; pid_t tgid; --- linux-3.2.orig/kernel/kmod.c +++ linux-3.2/kernel/kmod.c @@ -189,6 +189,13 @@ fail: do_exit(0); } +static int call_helper(void *data) +{ + /* Do not trigger recursive kmod call. */ + current->kmod_thread = 1; + return ____call_usermodehelper(data); +} + void call_usermodehelper_freeinfo(struct subprocess_info *info) { if (info->cleanup) @@ -252,7 +259,7 @@ static void __call_usermodehelper(struct pid = kernel_thread(wait_for_helper, sub_info, CLONE_FS | CLONE_FILES | SIGCHLD); else - pid = kernel_thread(____call_usermodehelper, sub_info, + pid = kernel_thread(call_helper, sub_info, CLONE_VFORK | SIGCHLD); switch (wait) { @@ -428,6 +435,10 @@ int call_usermodehelper_exec(struct subp retval = -EBUSY; goto out; } + if (wait != UMH_NO_WAIT && current->kmod_thread) { + retval = -EBUSY; + goto out; + } sub_info->complete = &done; sub_info->wait = wait; ----- Patch A end ----- ----- Patch B start ----- [PATCH v3] kmod: Avoid deadlock by recursive kmod call. The system deadlocks when call_usermodehelper(UMH_WAIT_EXEC) request triggered call_usermodehelper(UMH_WAIT_PROC) request. This is because "khelper thread is waiting at wait_for_completion() in do_fork() since the worker thread was created with CLONE_VFORK flag" and "the worker thread cannot call complete() because do_execve() is blocked at UMH_WAIT_PROC request (e.g. request_module() from search_binary_handler())" and "the khelper thread cannot start processing UMH_WAIT_PROC request because the khelper thread is waiting at wait_for_completion() in do_fork()". In order to avoid deadlock, do not try to call wait_for_completion() in call_usermodehelper_exec() if the worker thread is blocking khelper thread. This bug exists at least since 2.6.10. Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> --- kernel/kmod.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) --- linux-3.2.orig/kernel/kmod.c +++ linux-3.2/kernel/kmod.c @@ -43,6 +43,8 @@ extern int max_threads; static struct workqueue_struct *khelper_wq; +static const struct task_struct *khelper_task; +static const void *khelper_stack; #define CAP_BSET (void *)1 #define CAP_PI (void *)2 @@ -245,6 +247,8 @@ static void __call_usermodehelper(struct enum umh_wait wait = sub_info->wait; pid_t pid; + khelper_task = current; + khelper_stack = task_stack_page(khelper_task); /* CLONE_VFORK: wait until the usermode helper has execve'd * successfully We need the data structures to stay around * until that is done. */ @@ -402,6 +406,12 @@ void call_usermodehelper_setfns(struct s } EXPORT_SYMBOL(call_usermodehelper_setfns); +static inline bool object_is_on_khelper_stack(void *obj) +{ + return obj && obj >= khelper_stack && + obj < (khelper_stack + THREAD_SIZE); +} + /** * call_usermodehelper_exec - start a usermode application * @sub_info: information about the subprocessa @@ -428,6 +438,11 @@ int call_usermodehelper_exec(struct subp retval = -EBUSY; goto out; } + if (wait != UMH_NO_WAIT && current != khelper_task && + object_is_on_khelper_stack(current->vfork_done)) { + retval = -EBUSY; + goto out; + } sub_info->complete = &done; sub_info->wait = wait; ----- Patch B end -----
Created attachment 72068 [details] Patch A
Created attachment 72069 [details] Patch B
> (Excuse me, but I'm directly registering here because I'm currently having a > trouble with sending mails to / receiving mails from vger.kernel.org domain.) This trouble seems to be fixed. We can move to ML on vger.kernel.org if we prefer.
Closing as moved to lkml (and patches should always go there)