Bug 205709 - Kernel panic due to a bug in md_ioctl
Summary: Kernel panic due to a bug in md_ioctl
Status: NEW
Alias: None
Product: Drivers
Classification: Unclassified
Component: Other (show other bugs)
Hardware: All Linux
: P1 normal
Assignee: drivers_other
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2019-11-29 17:10 UTC by Tristan Madani
Modified: 2019-11-29 17:10 UTC (History)
0 users

See Also:
Kernel Version: 5.3.10
Subsystem:
Regression: No
Bisected commit-id:


Attachments

Description Tristan Madani 2019-11-29 17:10:56 UTC
LINUX KERNEL 5.3.10 - Kernel panic due to a bug in md_ioctl

0x01 - Introduction
===

# Product: Linux Kernel 
# Version: 5.3.10 (and probably other versions)
# Bug: UAF (Read)
# Tested on: GNU/Linux Debian 9 x86_64


0x02 - Crash report
===

md: md0 stopped.
md: md0 stopped.
md: md0 stopped.
md: md0 stopped.
md: md0 stopped.
WARNING: CPU: 1 PID: 3598 at drivers/md/md.c:7279 md_ioctl+0x3829/0x61e0 drivers/md/md.c:7279
Kernel panic - not syncing: panic_on_warn set ...
CPU: 1 PID: 3598 Comm: syz-executor.0 Not tainted 5.3.10 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0xca/0x13e lib/dump_stack.c:113
 panic+0x290/0x6c9 kernel/panic.c:220
 __warn.cold.12+0x20/0x45 kernel/panic.c:577
 report_bug+0x246/0x2d0 lib/bug.c:186
 fixup_bug arch/x86/kernel/traps.c:179 [inline]
 fixup_bug arch/x86/kernel/traps.c:174 [inline]
 do_error_trap+0x123/0x1e0 arch/x86/kernel/traps.c:272
 do_invalid_op+0x31/0x40 arch/x86/kernel/traps.c:291
 invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1028
RIP: 0010:md_ioctl+0x3829/0x61e0 drivers/md/md.c:7279
Code: 00 00 48 8b 85 70 fd ff ff 31 ff 44 8b 60 40 44 89 e6 e8 da c5 46 fe 45 85 e4 0f 85 2a f0 ff ff e9 45 fa ff ff e8 b7 c4 46 fe <0f> 0b e9 8e d3 ff ff e8 ab c4 46 fe 44 8b ad 40 fd ff ff 31 ff 44
RSP: 0018:ffff88800df778b8 EFLAGS: 00010212
RAX: 0000000000040000 RBX: 1ffff11001beef21 RCX: ffffc90000975000
RDX: 0000000000000099 RSI: ffffffff82f73bc9 RDI: 0000000000000007
RBP: ffff88800df77b90 R08: ffff88800e183000 R09: ffffed100561f666
R10: ffffed100561f665 R11: ffff88802b0fb32f R12: ffff88802e0105c0
R13: 0000000000000932 R14: 0000000000000932 R15: ffff88802b0fb4c8
 __blkdev_driver_ioctl block/ioctl.c:304 [inline]
 blkdev_ioctl+0x92b/0x1c50 block/ioctl.c:606
 block_ioctl+0xe9/0x130 fs/block_dev.c:1954
 vfs_ioctl fs/ioctl.c:46 [inline]
 file_ioctl fs/ioctl.c:509 [inline]
 do_vfs_ioctl+0x1c5/0x1310 fs/ioctl.c:696
 ksys_ioctl+0x9b/0xc0 fs/ioctl.c:713
 __do_sys_ioctl fs/ioctl.c:720 [inline]
 __se_sys_ioctl fs/ioctl.c:718 [inline]
 __x64_sys_ioctl+0x6f/0xb0 fs/ioctl.c:718
 do_syscall_64+0xbc/0x560 arch/x86/entry/common.c:296
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x465b89
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f8f3431dc68 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 000000000051bf00 RCX: 0000000000465b89
RDX: 0000000000000000 RSI: 0000000000000932 RDI: 000000000000002d
RBP: 00000000ffffffff R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000004a6427
R13: 00000000004e6570 R14: 00000000004a7dd1 R15: 00007f8f3431e6bc
Dumping ftrace buffer:
   (ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 1 seconds..


0x03 - Syzkaller reproducer
===


# {Threaded:true Collide:false Repeat:true RepeatTimes:0 Procs:4 Sandbox: Fault:false FaultCall:-1 FaultNth:0 Leak:false EnableTun:false EnableNetDev:false EnableNetReset:false EnableCgroups:false EnableBinfmtMisc:false EnableCloseFds:false EnableKCSAN:false EnableDevlinkPCI:true UseTmpDir:false HandleSegv:false Repro:false Trace:false}
r0 = openat$md(0xffffffffffffff9c, &(0x7f0000000000)='/dev/md0\x00', 0x0, 0x0)
ioctl$BLKROSET(r0, 0x932, 0x0)
clone3(&(0x7f00000003c0)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 0x40)

0x04 - C reproducer
===

#define _GNU_SOURCE 

#include <arpa/inet.h>
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <net/if.h>
#include <netinet/in.h>
#include <pthread.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/prctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#include <linux/futex.h>
#include <linux/genetlink.h>
#include <linux/if_addr.h>
#include <linux/if_link.h>
#include <linux/in6.h>
#include <linux/neighbour.h>
#include <linux/net.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/veth.h>

unsigned long long procid;

static void sleep_ms(uint64_t ms)
{
	usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
	struct timespec ts;
	if (clock_gettime(CLOCK_MONOTONIC, &ts))
	exit(1);
	return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static void thread_start(void* (*fn)(void*), void* arg)
{
	pthread_t th;
	pthread_attr_t attr;
	pthread_attr_init(&attr);
	pthread_attr_setstacksize(&attr, 128 << 10);
	int i;
	for (i = 0; i < 100; i++) {
		if (pthread_create(&th, &attr, fn, arg) == 0) {
			pthread_attr_destroy(&attr);
			return;
		}
		if (errno == EAGAIN) {
			usleep(50);
			continue;
		}
		break;
	}
	exit(1);
}

typedef struct {
	int state;
} event_t;

static void event_init(event_t* ev)
{
	ev->state = 0;
}

static void event_reset(event_t* ev)
{
	ev->state = 0;
}

static void event_set(event_t* ev)
{
	if (ev->state)
	exit(1);
	__atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
	syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG);
}

static void event_wait(event_t* ev)
{
	while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
		syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}

static int event_isset(event_t* ev)
{
	return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}

static int event_timedwait(event_t* ev, uint64_t timeout)
{
	uint64_t start = current_time_ms();
	uint64_t now = start;
	for (;;) {
		uint64_t remain = timeout - (now - start);
		struct timespec ts;
		ts.tv_sec = remain / 1000;
		ts.tv_nsec = (remain % 1000) * 1000 * 1000;
		syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
		if (__atomic_load_n(&ev->state, __ATOMIC_RELAXED))
			return 1;
		now = current_time_ms();
		if (now - start > timeout)
			return 0;
	}
}

static bool write_file(const char* file, const char* what, ...)
{
	char buf[1024];
	va_list args;
	va_start(args, what);
	vsnprintf(buf, sizeof(buf), what, args);
	va_end(args);
	buf[sizeof(buf) - 1] = 0;
	int len = strlen(buf);
	int fd = open(file, O_WRONLY | O_CLOEXEC);
	if (fd == -1)
		return false;
	if (write(fd, buf, len) != len) {
		int err = errno;
		close(fd);
		errno = err;
		return false;
	}
	close(fd);
	return true;
}

static struct {
	char* pos;
	int nesting;
	struct nlattr* nested[8];
	char buf[1024];
} nlmsg;

static void netlink_init(int typ, int flags, const void* data, int size)
{
	memset(&nlmsg, 0, sizeof(nlmsg));
	struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg.buf;
	hdr->nlmsg_type = typ;
	hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
	memcpy(hdr + 1, data, size);
	nlmsg.pos = (char*)(hdr + 1) + NLMSG_ALIGN(size);
}

static void netlink_attr(int typ, const void* data, int size)
{
	struct nlattr* attr = (struct nlattr*)nlmsg.pos;
	attr->nla_len = sizeof(*attr) + size;
	attr->nla_type = typ;
	memcpy(attr + 1, data, size);
	nlmsg.pos += NLMSG_ALIGN(attr->nla_len);
}

static int netlink_send_ext(int sock, uint16_t reply_type, int* reply_len)
{
	if (nlmsg.pos > nlmsg.buf + sizeof(nlmsg.buf) || nlmsg.nesting)
	exit(1);
	struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg.buf;
	hdr->nlmsg_len = nlmsg.pos - nlmsg.buf;
	struct sockaddr_nl addr;
	memset(&addr, 0, sizeof(addr));
	addr.nl_family = AF_NETLINK;
	unsigned n = sendto(sock, nlmsg.buf, hdr->nlmsg_len, 0, (struct sockaddr*)&addr, sizeof(addr));
	if (n != hdr->nlmsg_len)
	exit(1);
	n = recv(sock, nlmsg.buf, sizeof(nlmsg.buf), 0);
	if (n < sizeof(struct nlmsghdr))
	exit(1);
	if (reply_len && hdr->nlmsg_type == reply_type) {
		*reply_len = n;
		return 0;
	}
	if (n < sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr))
	exit(1);
	if (hdr->nlmsg_type != NLMSG_ERROR)
	exit(1);
	return -((struct nlmsgerr*)(hdr + 1))->error;
}

static int netlink_send(int sock)
{
	return netlink_send_ext(sock, 0, NULL);
}

const int kInitNetNsFd = 239;

#define DEVLINK_FAMILY_NAME "devlink"

#define DEVLINK_CMD_RELOAD 37
#define DEVLINK_ATTR_BUS_NAME 1
#define DEVLINK_ATTR_DEV_NAME 2
#define DEVLINK_ATTR_NETNS_FD 137

static void netlink_devlink_netns_move(const char* bus_name, const char* dev_name, int netns_fd)
{
	struct genlmsghdr genlhdr;
	struct nlattr* attr;
	int sock, err, n;
	uint16_t id = 0;
	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
	if (sock == -1)
	exit(1);
	memset(&genlhdr, 0, sizeof(genlhdr));
	genlhdr.cmd = CTRL_CMD_GETFAMILY;
	netlink_init(GENL_ID_CTRL, 0, &genlhdr, sizeof(genlhdr));
	netlink_attr(CTRL_ATTR_FAMILY_NAME, DEVLINK_FAMILY_NAME, strlen(DEVLINK_FAMILY_NAME) + 1);
	err = netlink_send_ext(sock, GENL_ID_CTRL, &n);
	if (err) {
		goto error;
	}
	attr = (struct nlattr*)(nlmsg.buf + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr)));
	for (; (char*)attr < nlmsg.buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) {
		if (attr->nla_type == CTRL_ATTR_FAMILY_ID) {
			id = *(uint16_t*)(attr + 1);
			break;
		}
	}
	if (!id) {
		goto error;
	}
	recv(sock, nlmsg.buf, sizeof(nlmsg.buf), 0); /* recv ack */
	memset(&genlhdr, 0, sizeof(genlhdr));
	genlhdr.cmd = DEVLINK_CMD_RELOAD;
	netlink_init(id, 0, &genlhdr, sizeof(genlhdr));
	netlink_attr(DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1);
	netlink_attr(DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1);
	netlink_attr(DEVLINK_ATTR_NETNS_FD, &netns_fd, sizeof(netns_fd));
	netlink_send(sock);
error:
	close(sock);
}

static void initialize_devlink_pci(void)
{
	int netns = open("/proc/self/ns/net", O_RDONLY);
	if (netns == -1)
	exit(1);
	int ret = setns(kInitNetNsFd, 0);
	if (ret == -1)
	exit(1);
	netlink_devlink_netns_move("pci", "0000:00:10.0", netns);
	ret = setns(netns, 0);
	if (ret == -1)
	exit(1);
	close(netns);
}

static void kill_and_wait(int pid, int* status)
{
	kill(-pid, SIGKILL);
	kill(pid, SIGKILL);
	int i;
	for (i = 0; i < 100; i++) {
		if (waitpid(-1, status, WNOHANG | __WALL) == pid)
			return;
		usleep(1000);
	}
	DIR* dir = opendir("/sys/fs/fuse/connections");
	if (dir) {
		for (;;) {
			struct dirent* ent = readdir(dir);
			if (!ent)
				break;
			if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
				continue;
			char abort[300];
			snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name);
			int fd = open(abort, O_WRONLY);
			if (fd == -1) {
				continue;
			}
			if (write(fd, abort, 1) < 0) {
			}
			close(fd);
		}
		closedir(dir);
	} else {
	}
	while (waitpid(-1, status, __WALL) != pid) {
	}
}

static void setup_test()
{
	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
	setpgrp();
	write_file("/proc/self/oom_score_adj", "1000");
}

struct thread_t {
	int created, call;
	event_t ready, done;
};

static struct thread_t threads[16];
static void execute_call(int call);
static int running;

static void* thr(void* arg)
{
	struct thread_t* th = (struct thread_t*)arg;
	for (;;) {
		event_wait(&th->ready);
		event_reset(&th->ready);
		execute_call(th->call);
		__atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
		event_set(&th->done);
	}
	return 0;
}

static void execute_one(void)
{
	int i, call, thread;
	for (call = 0; call < 3; call++) {
		for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0])); thread++) {
			struct thread_t* th = &threads[thread];
			if (!th->created) {
				th->created = 1;
				event_init(&th->ready);
				event_init(&th->done);
				event_set(&th->done);
				thread_start(thr, th);
			}
			if (!event_isset(&th->done))
				continue;
			event_reset(&th->done);
			th->call = call;
			__atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
			event_set(&th->ready);
			event_timedwait(&th->done, 45);
			break;
		}
	}
	for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
		sleep_ms(1);
}

static void execute_one(void);

#define WAIT_FLAGS __WALL

static void loop(void)
{
	int iter;
	for (iter = 0;; iter++) {
		int pid = fork();
		if (pid < 0)
	exit(1);
		if (pid == 0) {
			setup_test();
			execute_one();
			exit(0);
		}
		int status = 0;
		uint64_t start = current_time_ms();
		for (;;) {
			if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
				break;
			sleep_ms(1);
			if (current_time_ms() - start < 5 * 1000)
				continue;
			kill_and_wait(pid, &status);
			break;
		}
	}
}

#ifndef __NR_clone3
#define __NR_clone3 435
#endif

uint64_t r[1] = {0xffffffffffffffff};

void execute_call(int call)
{
		intptr_t res;	switch (call) {
	case 0:
memcpy((void*)0x20000000, "/dev/md0\000", 9);
		res = syscall(__NR_openat, 0xffffffffffffff9cul, 0x20000000ul, 0ul, 0ul);
		if (res != -1)
				r[0] = res;
		break;
	case 1:
		syscall(__NR_ioctl, r[0], 0x932ul, 0ul);
		break;
	case 2:
*(uint64_t*)0x200003c0 = 0;
*(uint64_t*)0x200003c8 = 0;
*(uint64_t*)0x200003d0 = 0;
*(uint64_t*)0x200003d8 = 0;
*(uint32_t*)0x200003e0 = 0;
*(uint32_t*)0x200003e4 = 0;
*(uint64_t*)0x200003e8 = 0;
*(uint64_t*)0x200003f0 = 0;
*(uint64_t*)0x200003f8 = 0;
		syscall(__NR_clone3, 0x200003c0ul, 0x40ul);
		break;
	}

}
int main(void)
{
		syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 3ul, 0x32ul, -1, 0);
	for (procid = 0; procid < 4; procid++) {
		if (fork() == 0) {
			loop();
		}
	}
	sleep(1000000);
	return 0;
}

Note You need to log in before you can comment on or make changes to this bug.