Bug 200405

Summary: Potential deadlock in btrfs_tree_lock() when perform writing to btrfs filesystem
Product: File System Reporter: Wen Xu (wen.xu)
Component: btrfsAssignee: BTRFS virtual assignee (fs_btrfs)
Status: RESOLVED CODE_FIX    
Severity: normal CC: dsterba, wen.xu
Priority: P1    
Hardware: All   
OS: Linux   
Kernel Version: 4.18 Subsystem:
Regression: No Bisected commit-id:
Attachments: The (compressed) crafted image which causes crash

Description Wen Xu 2018-07-04 17:26:32 UTC
Created attachment 277169 [details]
The (compressed) crafted image which causes crash

- Reproduce
# mkdir mnt
# mount -t btrfs 6.img mnt
# gcc -o poc poc.c
# ./poc ./mnt

- POC (poc.c)
    #define _GNU_SOURCE
    #include <sys/types.h>
    #include <sys/mount.h>
    #include <sys/mman.h>
    #include <sys/stat.h>
    #include <sys/xattr.h>

    #include <dirent.h>
    #include <errno.h>
    #include <error.h>
    #include <fcntl.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <unistd.h>

    #include <linux/falloc.h>
    #include <linux/loop.h>

    static void activity(char *mpoint) {

      char *foo_bar_baz;
      int err;

      static int buf[8192];
      memset(buf, 0, sizeof(buf));

      err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint);

      int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777);
      if (fd >= 0) { 
        write(fd, (char *)buf, 517); 
        write(fd, (char *)buf, sizeof(buf)); 
        fdatasync(fd);
        fsync(fd);
        close(fd); 
      }  
      
    }

    int main(int argc, char *argv[]) {
      activity(argv[1]);
      return 0;
    }

- Kernel message
Kernel hangs with the following dmesg
[  266.413453] BTRFS: device fsid 3381d111-94a3-4ac7-8f39-611bbbdab7e6 devid 1 transid 8 /dev/loop0
[  266.421264] BTRFS info (device loop0): disk space caching is enabled
[  266.421276] BTRFS info (device loop0): has skinny extents
[  278.113360] WARNING: CPU: 1 PID: 41 at fs/btrfs/locking.c:230 btrfs_tree_lock+0x3e2/0x400
[  278.113368] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too qxl drm_kms_helper crct10dif_pclmul syscopyarea sysfillrect sysimgblt fb_sys_fops ttm crc32_pclmul aesni_intel drm aes_x86_64 crypto_simd cryptd glue_helper 8139cp mii pata_acpi floppy
[  278.113737] CPU: 1 PID: 41 Comm: kworker/u4:1 Not tainted 4.18.0-rc1+ #8
[  278.113745] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[  278.113753] Workqueue: btrfs-endio-write btrfs_endio_write_helper
[  278.113761] RIP: 0010:btrfs_tree_lock+0x3e2/0x400
[  278.113762] Code: 00 48 c7 40 08 00 00 00 00 48 8b 45 d0 65 48 33 04 25 28 00 00 00 75 20 48 81 c4 a0 00 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d c3 <0f> 0b e9 d4 fc ff ff 0f 0b e9 61 ff ff ff e8 ab f4 87 ff 90 66 2e
[  278.113818] RSP: 0018:ffff8801f407f488 EFLAGS: 00010246
[  278.113828] RAX: 0000000000000000 RBX: ffff8801ef9cc6b8 RCX: ffffffffa584d87f
[  278.113831] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffff8801f505d720
[  278.113834] RBP: ffff8801f407f550 R08: ffffed003de4cc0b R09: ffffed003de4cc0b
[  278.113837] R10: 0000000000000001 R11: ffffed003de4cc0a R12: ffff8801f505d280
[  278.113844] R13: 0000000000000029 R14: ffff8801ef266000 R15: ffff8801e0364200
[  278.113848] FS:  0000000000000000(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
[  278.113851] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  278.113854] CR2: 00000000006fcdf4 CR3: 00000001f0820000 CR4: 00000000000006e0
[  278.113865] Call Trace:
[  278.113876]  ? btrfs_tree_read_unlock_blocking+0xf0/0xf0
[  278.113900]  ? alloc_extent_buffer+0x85/0x4e0
[  278.113921]  ? btrfs_reserve_extent+0x1a9/0x280
[  278.113936]  btrfs_alloc_tree_block+0x39f/0x770
[  278.113942]  ? btrfs_reserve_extent+0x280/0x280
[  278.113972]  ? memcpy+0x45/0x50
[  278.113988]  __btrfs_cow_block+0x285/0x9e0
[  278.113995]  ? update_ref_for_cow+0x5e0/0x5e0
[  278.114008]  ? kasan_check_write+0x14/0x20
[  278.114014]  ? btrfs_tree_lock+0x39e/0x400
[  278.114019]  ? btrfs_tree_read_unlock_blocking+0x20/0xf0
[  278.114024]  ? kasan_check_write+0x14/0x20
[  278.114029]  btrfs_cow_block+0x191/0x2e0
[  278.114035]  btrfs_search_slot+0x492/0x1160
[  278.114040]  ? normal_work_helper+0xf6/0x500
[  278.114044]  ? btrfs_endio_write_helper+0x12/0x20
[  278.114076]  ? kthread+0x180/0x1d0
[  278.114114]  ? ret_from_fork+0x35/0x40
[  278.114120]  ? split_leaf+0xbf0/0xbf0
[  278.114124]  ? ret_from_fork+0x35/0x40
[  278.114137]  ? insert_delayed_ref+0xeb/0x450
[  278.114146]  btrfs_lookup_csum+0xec/0x280
[  278.114151]  ? truncate_one_csum+0x170/0x170
[  278.114154]  ? kasan_kmalloc+0x30/0xe0
[  278.114167]  ? memcg_kmem_put_cache+0x1b/0xa0
[  278.114173]  ? kmem_cache_alloc+0x17c/0x1e0
[  278.114177]  ? btrfs_alloc_path+0x26/0x30
[  278.114182]  btrfs_csum_file_blocks+0x2be/0xa60
[  278.114187]  ? kmem_cache_free+0x89/0x1e0
[  278.114192]  ? btrfs_del_csums+0x5c0/0x5c0
[  278.114199]  ? dio_read_error+0x700/0x700
[  278.114204]  ? join_transaction+0x16e/0x6a0
[  278.114208]  ? btrfs_record_root_in_trans+0x99/0xb0
[  278.114226]  ? rb_next+0x24/0x80
[  278.114232]  add_pending_csums+0xaf/0xf0
[  278.114238]  btrfs_finish_ordered_io+0x74b/0xc90
[  278.114244]  ? btrfs_unlink_subvol+0x890/0x890
[  278.114249]  ? kasan_check_write+0x14/0x20
[  278.114262]  ? finish_task_switch+0xec/0x340
[  278.114275]  ? put_prev_task_fair+0x3a/0x50
[  278.114281]  finish_ordered_fn+0x15/0x20
[  278.114285]  normal_work_helper+0xf6/0x500
[  278.114295]  ? read_word_at_a_time+0x12/0x20
[  278.114300]  ? strscpy+0xbd/0x1e0
[  278.114305]  btrfs_endio_write_helper+0x12/0x20
[  278.114310]  process_one_work+0x302/0x770
[  278.114315]  worker_thread+0x81/0x6d0
[  278.114321]  kthread+0x180/0x1d0
[  278.114325]  ? rescuer_thread+0x710/0x710
[  278.114330]  ? kthread_associate_blkcg+0x150/0x150
[  278.114334]  ret_from_fork+0x35/0x40
[  278.114339] ---[ end trace 2e85051acb5f6dc1 ]---

- Location
https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/btrfs/locking.c#L230
	WARN_ON(eb->lock_owner == current->pid); <--
again:
	wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
	wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);

Found by Wen Xu and Po-Ning Tseng from SSLab at Gatech.
Comment 1 David Sterba 2019-05-21 12:25:59 UTC
Thanks for the report. Fixed by b72c3aba09a53fc7c1 "btrfs: locking: Add extra check in btrfs_init_new_buffer() to avoid deadlock", in 4.20.