Bug 206161 - Reshape RAID 5 to RAID 6 fails with active journal
Summary: Reshape RAID 5 to RAID 6 fails with active journal
Status: NEW
Alias: None
Product: IO/Storage
Classification: Unclassified
Component: MD (show other bugs)
Hardware: x86-64 Linux
: P1 normal
Assignee: io_md
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2020-01-11 13:49 UTC by Philipp
Modified: 2020-02-27 20:20 UTC (History)
2 users (show)

See Also:
Kernel Version: 5.4.10-arch1-1
Subsystem:
Regression: No
Bisected commit-id:


Attachments
dmesg output (4.73 KB, text/plain)
2020-01-11 13:49 UTC, Philipp
Details

Description Philipp 2020-01-11 13:49:21 UTC
Created attachment 286751 [details]
dmesg output

I am not able to reshape a RAID 5 with journal to a RAID 6 with journal.

I added the extra hard disk with 
mdadm --add /dev/md0 /dev/sdc1

mdadm --detail gives the following:
root@nas ~# mdadm --detail /dev/md0
/dev/md0:
           Version : 1.2
     Creation Time : Thu May 23 17:36:39 2019
        Raid Level : raid5
        Array Size : 7813871616 (7451.89 GiB 8001.40 GB)
     Used Dev Size : 3906935808 (3725.94 GiB 4000.70 GB)
      Raid Devices : 3
     Total Devices : 5
       Persistence : Superblock is persistent

       Update Time : Sat Jan 11 13:50:29 2020
             State : clean 
    Active Devices : 3
   Working Devices : 5
    Failed Devices : 0
     Spare Devices : 1

            Layout : left-symmetric
        Chunk Size : 256K

Consistency Policy : journal

              Name : any:dataraid
              UUID : 96fe77fb:b761a770:ad6c1452:2129f2b5
            Events : 11001

    Number   Major   Minor   RaidDevice State
       1       8       17        0      active sync   /dev/sdb1
       4       8       49        1      active sync   /dev/sdd1
       3       8       65        2      active sync   /dev/sde1

       0       8        4        -      journal   /dev/sda4
       5       8       33        -      spare   /dev/sdc1


And then tried the reshape and get the following:
root@nas ~# mdadm --grow /dev/md0 --level 6 --raid-devices 4 --verbose
mdadm: level of /dev/md0 changed to raid6
mdadm: Cannot set device shape for /dev/md0
mdadm: aborting level change

The dmesg in the attachment suggests, that there is something wrong with the device driver.
I tried this on two different machines to exclude hardware failure.
The same dmesg output came on on both machines.

Steps to recreate:

dd if=/dev/zero of=sda.img count=100 bs=1M
dd if=/dev/zero of=sdb.img count=100 bs=1M
dd if=/dev/zero of=sdc.img count=100 bs=1M
dd if=/dev/zero of=sdd.img count=100 bs=1M
dd if=/dev/zero of=sde.img count=100 bs=1M

losetup /dev/loop0 sda.img
losetup /dev/loop1 sdb.img
losetup /dev/loop2 sdc.img
losetup /dev/loop3 sdd.img
losetup /dev/loop4 sde.img

mdadm --create --verbose --level=5 --raid-devices=3 /dev/md0 /dev/loop1 /dev/loop2 /dev/loo3 --write-journal=/dev/loop0

Then convert the RAID 5 to RAID 6 by adding a disk and reshape the array:
mdadm --add /dev/md0 /dev/loop4

mdadm --grow /dev/md0 --level=6 --raid-devices=4

Without the "--write-journal=/dev/loop0" I am able to reshape the array to RAID 6 successfully.

I tried this on two machines:
processor	: 0
vendor_id	: GenuineIntel
cpu family	: 6
model		: 122
model name	: Intel(R) Celeron(R) J4105 CPU @ 1.50GHz
stepping	: 1
microcode	: 0x32
cpu MHz		: 798.772
cache size	: 4096 KB
physical id	: 0
siblings	: 4
core id		: 0
cpu cores	: 4
apicid		: 0
initial apicid	: 0
fpu		: yes
fpu_exception	: yes
cpuid level	: 24
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx est tm2 ssse3 sdbg cx16 xtpr pdcm sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave rdrand lahf_lm 3dnowprefetch cpuid_fault cat_l2 pti cdp_l2 ssbd ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust smep erms mpx rdt_a rdseed smap clflushopt intel_pt sha_ni xsaveopt xsavec xgetbv1 xsaves dtherm ida arat pln pts umip rdpid md_clear arch_capabilities
bugs		: cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass
bogomips	: 2996.00
clflush size	: 64
cache_alignment	: 64
address sizes	: 39 bits physical, 48 bits virtual
power management:

With this kernel:
Linux nas 5.4.10-arch1-1 #1 SMP PREEMPT Thu, 09 Jan 2020 10:14:29 +0000 x86_64 GNU/Linux

And on Debian:
processor	: 0
vendor_id	: AuthenticAMD
cpu family	: 23
model		: 8
model name	: AMD Ryzen 7 2700 Eight-Core Processor
stepping	: 2
microcode	: 0x6000626
cpu MHz		: 3199.994
cache size	: 512 KB
physical id	: 0
siblings	: 1
core id		: 0
cpu cores	: 1
apicid		: 0
initial apicid	: 0
fpu		: yes
fpu_exception	: yes
cpuid level	: 13
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid tsc_known_freq pni pclmulqdq monitor ssse3 cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx rdrand hypervisor lahf_lm cr8_legacy abm sse4a misalignsse 3dnowprefetch ssbd vmmcall fsgsbase avx2 rdseed clflushopt arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif
bugs		: fxsave_leak sysret_ss_attrs null_seg spectre_v1 spectre_v2 spec_store_bypass
bogomips	: 6399.98
TLB size	: 2560 4K pages
clflush size	: 64
cache_alignment	: 64
address sizes	: 48 bits physical, 48 bits virtual
power management:

Linux nas2 4.19.0-6-amd64 #1 SMP Debian 4.19.67-2+deb10u2 (2019-11-11) x86_64 GNU/Linux
Comment 1 Guoqing Jiang 2020-02-09 05:01:44 UTC
I think reshape is not supported by raid5 with journal device.

static int check_reshape(struct mddev *mddev)
{
        struct r5conf *conf = mddev->private;

        if (raid5_has_log(conf) || raid5_has_ppl(conf))
                return -EINVAL;


And for the calltrace, the below changes could resolve it.

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3dfa3f4aa647..b1f150450fd8 100644
--- a/drivers/md/md.c
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -90,7 +90,7 @@ static struct workqueue_struct *md_misc_wq;
 
 static int remove_and_add_spares(struct mddev *mddev,
                                 struct md_rdev *this);
-static void mddev_detach(struct mddev *mddev);
+static void mddev_detach(struct mddev *mddev, bool quiesced);
 
 /*
  * Default number of read corrections we'll attempt on an rdev
@@ -3983,7 +3983,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
 
        /* Looks like we have a winner */
        mddev_suspend(mddev);
-       mddev_detach(mddev);
+       mddev_detach(mddev, true);
 
        spin_lock(&mddev->lock);
        oldpers = mddev->pers;
@@ -5988,7 +5988,7 @@ int md_run(struct mddev *mddev)
        return 0;
 
 bitmap_abort:
-       mddev_detach(mddev);
+       mddev_detach(mddev, false);
        if (mddev->private)
                pers->free(mddev, mddev->private);
        mddev->private = NULL;
@@ -6181,11 +6181,12 @@ void md_stop_writes(struct mddev *mddev)
 }
 EXPORT_SYMBOL_GPL(md_stop_writes);
 
-static void mddev_detach(struct mddev *mddev)
+static void mddev_detach(struct mddev *mddev, bool quiesced)
 {
        md_bitmap_wait_behind_writes(mddev);
        if (mddev->pers && mddev->pers->quiesce) {
-               mddev->pers->quiesce(mddev, 1);
+               if (!quiesced)
+                       mddev->pers->quiesce(mddev, 1);
                mddev->pers->quiesce(mddev, 0);
        }
        md_unregister_thread(&mddev->thread);
@@ -6197,7 +6198,7 @@ static void __md_stop(struct mddev *mddev)
 {
        struct md_personality *pers = mddev->pers;
        md_bitmap_destroy(mddev);
-       mddev_detach(mddev);
+       mddev_detach(mddev, false);
        /* Ensure ->event_work is done */
        flush_workqueue(md_misc_wq);
Comment 2 Philipp 2020-02-27 20:20:47 UTC
Thank you so much. This works for me and didn't cause a trace.

I still cannot convert my RAID 5 to RAID 6 in this form, but I found a workaround.
The problem seems to be the journaling device.

If I remove the journaling device, then convert the array to RAID 6 and add the journal disk back in it successfully converted.

# Remove journal from array
mdadm --manage /dev/md0 --remove /dev/loop0

# Add disk
mdadm --add /dev/md0 /dev/loop4

# Set new RAID level
mdadm --grow /dev/md0 --level=6 --raid-devices=4

# Add journaling disk back in
mdadm --manage /dev/md0 --add-journal /dev/loop0

After that it is a degraded RAID 6 and the array begins to rebuild.

Note You need to log in before you can comment on or make changes to this bug.