View | Details | Raw Unified | Return to bug 5563 | Differences between
and this patch

Collapse All | Expand All

(-)linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.c (+1003 lines)
Line 0 Link Here
1
/*
2
 *   (C) Copyright IBM Corp. 2002, 2004
3
 *
4
 *   This program is free software;  you can redistribute it and/or modify
5
 *   it under the terms of the GNU General Public License as published by
6
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   (at your option) any later version.
8
 *
9
 *   This program is distributed in the hope that it will be useful,
10
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
11
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12
 *   the GNU General Public License for more details.
13
 *
14
 *   You should have received a copy of the GNU General Public License
15
 *   along with this program;  if not, write to the Free Software
16
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
 *
18
 * linux/drivers/md/dm-bbr.c
19
 *
20
 * Bad-block-relocation (BBR) target for device-mapper.
21
 *
22
 * The BBR target is designed to remap I/O write failures to another safe
23
 * location on disk. Note that most disk drives have BBR built into them,
24
 * this means that our software BBR will be only activated when all hardware
25
 * BBR replacement sectors have been used.
26
 */
27
28
#include <linux/module.h>
29
#include <linux/init.h>
30
#include <linux/bio.h>
31
#include <linux/spinlock.h>
32
#include <linux/slab.h>
33
#include <linux/mempool.h>
34
#include <linux/workqueue.h>
35
#include <linux/vmalloc.h>
36
37
#include "dm.h"
38
#include "dm-bio-list.h"
39
#include "dm-bio-record.h"
40
#include "dm-bbr.h"
41
#include "dm-io.h"
42
43
#define SECTOR_SIZE (1 << SECTOR_SHIFT)
44
45
static struct workqueue_struct *dm_bbr_wq = NULL;
46
static void bbr_remap_handler(void *data);
47
static kmem_cache_t *bbr_remap_cache;
48
static kmem_cache_t *bbr_io_cache;
49
static mempool_t *bbr_io_pool;
50
51
/**
52
 * bbr_binary_tree_destroy
53
 *
54
 * Destroy the binary tree.
55
 **/
56
static void bbr_binary_tree_destroy(struct bbr_runtime_remap *root)
57
{
58
	struct bbr_runtime_remap **link = NULL;
59
	struct bbr_runtime_remap *node = root;
60
61
	while (node) {
62
		if (node->left) {
63
			link = &(node->left);
64
			node = node->left;
65
			continue;
66
		}
67
		if (node->right) {
68
			link = &(node->right);
69
			node = node->right;
70
			continue;
71
		}
72
73
		kmem_cache_free(bbr_remap_cache, node);
74
		if (node == root) {
75
			/* If root is deleted, we're done. */
76
			break;
77
		}
78
79
		/* Back to root. */
80
		node = root;
81
		*link = NULL;
82
	}
83
}
84
85
static void bbr_free_remap(struct bbr_private *bbr_id)
86
{
87
	spin_lock_irq(&bbr_id->remap_root_lock);
88
	bbr_binary_tree_destroy(bbr_id->remap_root);
89
	bbr_id->remap_root = NULL;
90
	spin_unlock_irq(&bbr_id->remap_root_lock);
91
}
92
93
static struct bbr_private *bbr_alloc_private(void)
94
{
95
	struct bbr_private *bbr_id;
96
97
	bbr_id = kmalloc(sizeof(*bbr_id), GFP_KERNEL);
98
	if (bbr_id) {
99
		memset(bbr_id, 0, sizeof(*bbr_id));
100
		INIT_WORK(&bbr_id->remap_work, bbr_remap_handler, bbr_id);
101
		bbr_id->remap_root_lock = SPIN_LOCK_UNLOCKED;
102
		bbr_id->remap_ios_lock = SPIN_LOCK_UNLOCKED;
103
		bbr_id->in_use_replacement_blks = (atomic_t)ATOMIC_INIT(0);
104
	}
105
106
	return bbr_id;
107
}
108
109
static void bbr_free_private(struct bbr_private *bbr_id)
110
{
111
	if (bbr_id->bbr_table) {
112
		vfree(bbr_id->bbr_table);
113
	}
114
	bbr_free_remap(bbr_id);
115
	kfree(bbr_id);
116
}
117
118
static u32 crc_table[256];
119
static u32 crc_table_built = 0;
120
121
static void build_crc_table(void)
122
{
123
	u32 i, j, crc;
124
125
	for (i = 0; i <= 255; i++) {
126
		crc = i;
127
		for (j = 8; j > 0; j--) {
128
			if (crc & 1)
129
				crc = (crc >> 1) ^ CRC_POLYNOMIAL;
130
			else
131
				crc >>= 1;
132
		}
133
		crc_table[i] = crc;
134
	}
135
	crc_table_built = 1;
136
}
137
138
static u32 calculate_crc(u32 crc, void *buffer, u32 buffersize)
139
{
140
	unsigned char *current_byte;
141
	u32 temp1, temp2, i;
142
143
	current_byte = (unsigned char *) buffer;
144
	/* Make sure the crc table is available */
145
	if (!crc_table_built)
146
		build_crc_table();
147
	/* Process each byte in the buffer. */
148
	for (i = 0; i < buffersize; i++) {
149
		temp1 = (crc >> 8) & 0x00FFFFFF;
150
		temp2 = crc_table[(crc ^ (u32) * current_byte) &
151
				  (u32) 0xff];
152
		current_byte++;
153
		crc = temp1 ^ temp2;
154
	}
155
	return crc;
156
}
157
158
/**
159
 * le_bbr_table_sector_to_cpu
160
 *
161
 * Convert bbr meta data from on-disk (LE) format
162
 * to the native cpu endian format.
163
 **/
164
static void le_bbr_table_sector_to_cpu(struct bbr_table *p)
165
{
166
	int i;
167
	p->signature		= le32_to_cpup(&p->signature);
168
	p->crc			= le32_to_cpup(&p->crc);
169
	p->sequence_number	= le32_to_cpup(&p->sequence_number);
170
	p->in_use_cnt		= le32_to_cpup(&p->in_use_cnt);
171
	for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
172
		p->entries[i].bad_sect =
173
			le64_to_cpup(&p->entries[i].bad_sect);
174
		p->entries[i].replacement_sect =
175
			le64_to_cpup(&p->entries[i].replacement_sect);
176
	}
177
}
178
179
/**
180
 * cpu_bbr_table_sector_to_le
181
 *
182
 * Convert bbr meta data from cpu endian format to on-disk (LE) format
183
 **/
184
static void cpu_bbr_table_sector_to_le(struct bbr_table *p,
185
				       struct bbr_table *le)
186
{
187
	int i;
188
	le->signature		= cpu_to_le32p(&p->signature);
189
	le->crc			= cpu_to_le32p(&p->crc);
190
	le->sequence_number	= cpu_to_le32p(&p->sequence_number);
191
	le->in_use_cnt		= cpu_to_le32p(&p->in_use_cnt);
192
	for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
193
		le->entries[i].bad_sect =
194
			cpu_to_le64p(&p->entries[i].bad_sect);
195
		le->entries[i].replacement_sect =
196
			cpu_to_le64p(&p->entries[i].replacement_sect);
197
	}
198
}
199
200
/**
201
 * validate_bbr_table_sector
202
 *
203
 * Check the specified BBR table sector for a valid signature and CRC. If it's
204
 * valid, endian-convert the table sector.
205
 **/
206
static int validate_bbr_table_sector(struct bbr_table *p)
207
{
208
	int rc = 0;
209
	int org_crc, final_crc;
210
211
	if (le32_to_cpup(&p->signature) != BBR_TABLE_SIGNATURE) {
212
		DMERR("dm-bbr: BBR table signature doesn't match!");
213
		DMERR("dm-bbr: Found 0x%x. Expecting 0x%x",
214
		      le32_to_cpup(&p->signature), BBR_TABLE_SIGNATURE);
215
		rc = -EINVAL;
216
		goto out;
217
	}
218
219
	if (!p->crc) {
220
		DMERR("dm-bbr: BBR table sector has no CRC!");
221
		rc = -EINVAL;
222
		goto out;
223
	}
224
225
	org_crc = le32_to_cpup(&p->crc);
226
	p->crc = 0;
227
	final_crc = calculate_crc(INITIAL_CRC, (void *)p, sizeof(*p));
228
	if (final_crc != org_crc) {
229
		DMERR("dm-bbr: CRC failed!");
230
		DMERR("dm-bbr: Found 0x%x. Expecting 0x%x",
231
		      org_crc, final_crc);
232
		rc = -EINVAL;
233
		goto out;
234
	}
235
236
	p->crc = cpu_to_le32p(&org_crc);
237
	le_bbr_table_sector_to_cpu(p);
238
239
out:
240
	return rc;
241
}
242
243
/**
244
 * bbr_binary_tree_insert
245
 *
246
 * Insert a node into the binary tree.
247
 **/
248
static void bbr_binary_tree_insert(struct bbr_runtime_remap **root,
249
				   struct bbr_runtime_remap *newnode)
250
{
251
	struct bbr_runtime_remap **node = root;
252
	while (node && *node) {
253
		if (newnode->remap.bad_sect > (*node)->remap.bad_sect) {
254
			node = &((*node)->right);
255
		} else {
256
			node = &((*node)->left);
257
		}
258
	}
259
260
	newnode->left = newnode->right = NULL;
261
	*node = newnode;
262
}
263
264
/**
265
 * bbr_binary_search
266
 *
267
 * Search for a node that contains bad_sect == lsn.
268
 **/
269
static struct bbr_runtime_remap *bbr_binary_search(
270
	struct bbr_runtime_remap *root,
271
	u64 lsn)
272
{
273
	struct bbr_runtime_remap *node = root;
274
	while (node) {
275
		if (node->remap.bad_sect == lsn) {
276
			break;
277
		}
278
		if (lsn > node->remap.bad_sect) {
279
			node = node->right;
280
		} else {
281
			node = node->left;
282
		}
283
	}
284
	return node;
285
}
286
287
/**
288
 * bbr_insert_remap_entry
289
 *
290
 * Create a new remap entry and add it to the binary tree for this node.
291
 **/
292
static int bbr_insert_remap_entry(struct bbr_private *bbr_id,
293
				  struct bbr_table_entry *new_bbr_entry)
294
{
295
	struct bbr_runtime_remap *newnode;
296
297
	newnode = kmem_cache_alloc(bbr_remap_cache, GFP_NOIO);
298
	if (!newnode) {
299
		DMERR("dm-bbr: Could not allocate from remap cache!");
300
		return -ENOMEM;
301
	}
302
	newnode->remap.bad_sect  = new_bbr_entry->bad_sect;
303
	newnode->remap.replacement_sect = new_bbr_entry->replacement_sect;
304
	spin_lock_irq(&bbr_id->remap_root_lock);
305
	bbr_binary_tree_insert(&bbr_id->remap_root, newnode);
306
	spin_unlock_irq(&bbr_id->remap_root_lock);
307
	return 0;
308
}
309
310
/**
311
 * bbr_table_to_remap_list
312
 *
313
 * The on-disk bbr table is sorted by the replacement sector LBA. In order to
314
 * improve run time performance, the in memory remap list must be sorted by
315
 * the bad sector LBA. This function is called at discovery time to initialize
316
 * the remap list. This function assumes that at least one copy of meta data
317
 * is valid.
318
 **/
319
static u32 bbr_table_to_remap_list(struct bbr_private *bbr_id)
320
{
321
	u32 in_use_blks = 0;
322
	int i, j;
323
	struct bbr_table *p;
324
325
	for (i = 0, p = bbr_id->bbr_table;
326
	     i < bbr_id->nr_sects_bbr_table;
327
	     i++, p++) {
328
		if (!p->in_use_cnt) {
329
			break;
330
		}
331
		in_use_blks += p->in_use_cnt;
332
		for (j = 0; j < p->in_use_cnt; j++) {
333
			bbr_insert_remap_entry(bbr_id, &p->entries[j]);
334
		}
335
	}
336
	if (in_use_blks) {
337
		char b[32];
338
		DMWARN("dm-bbr: There are %u BBR entries for device %s",
339
		       in_use_blks, format_dev_t(b, bbr_id->dev->bdev->bd_dev));
340
	}
341
342
	return in_use_blks;
343
}
344
345
/**
346
 * bbr_search_remap_entry
347
 *
348
 * Search remap entry for the specified sector. If found, return a pointer to
349
 * the table entry. Otherwise, return NULL.
350
 **/
351
static struct bbr_table_entry *bbr_search_remap_entry(
352
	struct bbr_private *bbr_id,
353
	u64 lsn)
354
{
355
	struct bbr_runtime_remap *p;
356
357
	spin_lock_irq(&bbr_id->remap_root_lock);
358
	p = bbr_binary_search(bbr_id->remap_root, lsn);
359
	spin_unlock_irq(&bbr_id->remap_root_lock);
360
	if (p) {
361
		return (&p->remap);
362
	} else {
363
		return NULL;
364
	}
365
}
366
367
/**
368
 * bbr_remap
369
 *
370
 * If *lsn is in the remap table, return TRUE and modify *lsn,
371
 * else, return FALSE.
372
 **/
373
static inline int bbr_remap(struct bbr_private *bbr_id,
374
			    u64 *lsn)
375
{
376
	struct bbr_table_entry *e;
377
378
	if (atomic_read(&bbr_id->in_use_replacement_blks)) {
379
		e = bbr_search_remap_entry(bbr_id, *lsn);
380
		if (e) {
381
			*lsn = e->replacement_sect;
382
			return 1;
383
		}
384
	}
385
	return 0;
386
}
387
388
/**
389
 * bbr_remap_probe
390
 *
391
 * If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap
392
 * table return TRUE, Else, return FALSE.
393
 **/
394
static inline int bbr_remap_probe(struct bbr_private *bbr_id,
395
				  u64 lsn, u64 nr_sects)
396
{
397
	u64 tmp, cnt;
398
399
	if (atomic_read(&bbr_id->in_use_replacement_blks)) {
400
		for (cnt = 0, tmp = lsn;
401
		     cnt < nr_sects;
402
		     cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) {
403
			if (bbr_remap(bbr_id,&tmp)) {
404
				return 1;
405
			}
406
		}
407
	}
408
	return 0;
409
}
410
411
/**
412
 * bbr_setup
413
 *
414
 * Read the remap tables from disk and set up the initial remap tree.
415
 **/
416
static int bbr_setup(struct bbr_private *bbr_id)
417
{
418
	struct bbr_table *table = bbr_id->bbr_table;
419
	struct io_region job;
420
	unsigned long error;
421
	int i, rc = 0;
422
423
	job.bdev = bbr_id->dev->bdev;
424
	job.count = 1;
425
426
	/* Read and verify each BBR table sector individually. */
427
	for (i = 0; i < bbr_id->nr_sects_bbr_table; i++, table++) {
428
		job.sector = bbr_id->lba_table1 + i;
429
		rc = dm_io_sync_vm(1, &job, READ, table, &error);
430
		if (rc && bbr_id->lba_table2) {
431
			job.sector = bbr_id->lba_table2 + i;
432
			rc = dm_io_sync_vm(1, &job, READ, table, &error);
433
		}
434
		if (rc) {
435
			goto out;
436
		}
437
438
		rc = validate_bbr_table_sector(table);
439
		if (rc) {
440
			goto out;
441
		}
442
	}
443
	atomic_set(&bbr_id->in_use_replacement_blks,
444
		   bbr_table_to_remap_list(bbr_id));
445
446
out:
447
	if (rc) {
448
		DMERR("dm-bbr: error during device setup: %d", rc);
449
	}
450
	return rc;
451
}
452
453
/**
454
 * bbr_io_remap_error
455
 * @bbr_id:		Private data for the BBR node.
456
 * @rw:			READ or WRITE.
457
 * @starting_lsn:	Starting sector of request to remap.
458
 * @count:		Number of sectors in the request.
459
 * @page:		Page containing the data for the request.
460
 * @offset:		Byte-offset of the data within the page.
461
 *
462
 * For the requested range, try to write each sector individually. For each
463
 * sector that fails, find the next available remap location and write the
464
 * data to that new location. Then update the table and write both copies
465
 * of the table to disk. Finally, update the in-memory mapping and do any
466
 * other necessary bookkeeping.
467
 **/
468
static int bbr_io_remap_error(struct bbr_private *bbr_id,
469
			      int rw,
470
			      u64 starting_lsn,
471
			      u64 count,
472
			      struct page *page,
473
			      unsigned int offset)
474
{
475
	struct bbr_table *bbr_table;
476
	struct io_region job;
477
	struct page_list pl;
478
	unsigned long table_sector_index;
479
	unsigned long table_sector_offset;
480
	unsigned long index;
481
	unsigned long error;
482
	u64 lsn, new_lsn;
483
	char b[32];
484
	int rc;
485
486
	job.bdev = bbr_id->dev->bdev;
487
	job.count = 1;
488
	pl.page = page;
489
	pl.next = NULL;
490
491
	/* For each sector in the request. */
492
	for (lsn = 0; lsn < count; lsn++, offset += SECTOR_SIZE) {
493
		job.sector = starting_lsn + lsn;
494
		rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
495
		while (rc) {
496
			/* Find the next available relocation sector. */
497
			new_lsn = atomic_read(&bbr_id->in_use_replacement_blks);
498
			if (new_lsn >= bbr_id->nr_replacement_blks) {
499
				/* No more replacement sectors available. */
500
				return -EIO;
501
			}
502
			new_lsn += bbr_id->start_replacement_sect;
503
504
			/* Write the data to its new location. */
505
			DMWARN("dm-bbr: device %s: Trying to remap bad sector "PFU64" to sector "PFU64,
506
			       format_dev_t(b, bbr_id->dev->bdev->bd_dev),
507
			       starting_lsn + lsn, new_lsn);
508
			job.sector = new_lsn;
509
			rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
510
			if (rc) {
511
				/* This replacement sector is bad.
512
				 * Try the next one.
513
				 */
514
				DMERR("dm-bbr: device %s: replacement sector "PFU64" is bad. Skipping.",
515
				      format_dev_t(b, bbr_id->dev->bdev->bd_dev), new_lsn);
516
				atomic_inc(&bbr_id->in_use_replacement_blks);
517
				continue;
518
			}
519
520
			/* Add this new entry to the on-disk table. */
521
			table_sector_index = new_lsn -
522
					     bbr_id->start_replacement_sect;
523
			table_sector_offset = table_sector_index /
524
					      BBR_ENTRIES_PER_SECT;
525
			index = table_sector_index % BBR_ENTRIES_PER_SECT;
526
527
			bbr_table = &bbr_id->bbr_table[table_sector_offset];
528
			bbr_table->entries[index].bad_sect = starting_lsn + lsn;
529
			bbr_table->entries[index].replacement_sect = new_lsn;
530
			bbr_table->in_use_cnt++;
531
			bbr_table->sequence_number++;
532
			bbr_table->crc = 0;
533
			bbr_table->crc = calculate_crc(INITIAL_CRC,
534
						       bbr_table,
535
						       sizeof(struct bbr_table));
536
537
			/* Write the table to disk. */
538
			cpu_bbr_table_sector_to_le(bbr_table, bbr_table);
539
			if (bbr_id->lba_table1) {
540
				job.sector = bbr_id->lba_table1 + table_sector_offset;
541
				rc = dm_io_sync_vm(1, &job, WRITE, bbr_table, &error);
542
			}
543
			if (bbr_id->lba_table2) {
544
				job.sector = bbr_id->lba_table2 + table_sector_offset;
545
				rc |= dm_io_sync_vm(1, &job, WRITE, bbr_table, &error);
546
			}
547
			le_bbr_table_sector_to_cpu(bbr_table);
548
549
			if (rc) {
550
				/* Error writing one of the tables to disk. */
551
				DMERR("dm-bbr: device %s: error updating BBR tables on disk.",
552
				      format_dev_t(b, bbr_id->dev->bdev->bd_dev));
553
				return rc;
554
			}
555
556
			/* Insert a new entry in the remapping binary-tree. */
557
			rc = bbr_insert_remap_entry(bbr_id,
558
						    &bbr_table->entries[index]);
559
			if (rc) {
560
				DMERR("dm-bbr: device %s: error adding new entry to remap tree.",
561
				      format_dev_t(b, bbr_id->dev->bdev->bd_dev));
562
				return rc;
563
			}
564
565
			atomic_inc(&bbr_id->in_use_replacement_blks);
566
		}
567
	}
568
569
	return 0;
570
}
571
572
/**
573
 * bbr_io_process_request
574
 *
575
 * For each sector in this request, check if the sector has already
576
 * been remapped. If so, process all previous sectors in the request,
577
 * followed by the remapped sector. Then reset the starting lsn and
578
 * count, and keep going with the rest of the request as if it were
579
 * a whole new request. If any of the sync_io's return an error,
580
 * call the remapper to relocate the bad sector(s).
581
 *
582
 * 2.5 Note: When switching over to bio's for the I/O path, we have made
583
 * the assumption that the I/O request described by the bio is one
584
 * virtually contiguous piece of memory (even though the bio vector
585
 * describes it using a series of physical page addresses).
586
 **/
587
static int bbr_io_process_request(struct bbr_private *bbr_id,
588
				  struct bio *bio)
589
{
590
	struct io_region job;
591
	u64 starting_lsn = bio->bi_sector;
592
	u64 count, lsn, remapped_lsn;
593
	struct page_list pl;
594
	unsigned int offset;
595
	unsigned long error;
596
	int i, rw = bio_data_dir(bio);
597
	int rc = 0;
598
599
	job.bdev = bbr_id->dev->bdev;
600
	pl.next = NULL;
601
602
	/* Each bio can contain multiple vectors, each with a different page.
603
	 * Treat each vector as a separate request.
604
	 */
605
	/* KMC: Is this the right way to walk the bvec list? */
606
	for (i = 0;
607
	     i < bio->bi_vcnt;
608
	     i++, bio->bi_idx++, starting_lsn += count) {
609
610
		/* Bvec info: number of sectors, page,
611
		 * and byte-offset within page.
612
		 */
613
		count = bio_iovec(bio)->bv_len >> SECTOR_SHIFT;
614
		pl.page = bio_iovec(bio)->bv_page;
615
		offset = bio_iovec(bio)->bv_offset;
616
617
		/* For each sector in this bvec, check if the sector has
618
		 * already been remapped. If so, process all previous sectors
619
		 * in this request, followed by the remapped sector. Then reset
620
		 * the starting lsn and count and keep going with the rest of
621
		 * the request as if it were a whole new request.
622
		 */
623
		for (lsn = 0; lsn < count; lsn++) {
624
			remapped_lsn = starting_lsn + lsn;
625
			rc = bbr_remap(bbr_id, &remapped_lsn);
626
			if (!rc) {
627
				/* This sector is fine. */
628
				continue;
629
			}
630
631
			/* Process all sectors in the request up to this one. */
632
			if (lsn > 0) {
633
				job.sector = starting_lsn;
634
				job.count = lsn;
635
				rc = dm_io_sync(1, &job, rw, &pl,
636
						offset, &error);
637
				if (rc) {
638
					/* If this I/O failed, then one of the
639
					 * sectors in this request needs to be
640
					 * relocated.
641
					 */
642
					rc = bbr_io_remap_error(bbr_id, rw,
643
								starting_lsn,
644
								lsn, pl.page,
645
								offset);
646
					if (rc) {
647
						/* KMC: Return? Or continue to next bvec? */
648
						return rc;
649
					}
650
				}
651
				offset += (lsn << SECTOR_SHIFT);
652
			}
653
	
654
			/* Process the remapped sector. */
655
			job.sector = remapped_lsn;
656
			job.count = 1;
657
			rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
658
			if (rc) {
659
				/* BUGBUG - Need more processing if this caused
660
				 * an error. If this I/O failed, then the
661
				 * existing remap is now bad, and we need to
662
				 * find a new remap. Can't use
663
				 * bbr_io_remap_error(), because the existing
664
				 * map entry needs to be changed, not added
665
				 * again, and the original table entry also
666
				 * needs to be changed.
667
				 */
668
				return rc;
669
			}
670
671
			starting_lsn	+= (lsn + 1);
672
			count		-= (lsn + 1);
673
			lsn		= -1;
674
			offset		+= SECTOR_SIZE;
675
		}
676
677
		/* Check for any remaining sectors after the last split. This
678
		 * could potentially be the whole request, but that should be a
679
		 * rare case because requests should only be processed by the
680
		 * thread if we know an error occurred or they contained one or
681
		 * more remapped sectors.
682
		 */
683
		if (count) {
684
			job.sector = starting_lsn;
685
			job.count = count;
686
			rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
687
			if (rc) {
688
				/* If this I/O failed, then one of the sectors
689
				 * in this request needs to be relocated.
690
				 */
691
				rc = bbr_io_remap_error(bbr_id, rw, starting_lsn,
692
							count, pl.page, offset);
693
				if (rc) {
694
					/* KMC: Return? Or continue to next bvec? */
695
					return rc;
696
				}
697
			}
698
		}
699
	}
700
701
	return 0;
702
}
703
704
static void bbr_io_process_requests(struct bbr_private *bbr_id,
705
				    struct bio *bio)
706
{
707
	struct bio *next;
708
	int rc;
709
710
	while (bio) {
711
		next = bio->bi_next;
712
		bio->bi_next = NULL;
713
714
		rc = bbr_io_process_request(bbr_id, bio);
715
716
		bio_endio(bio, bio->bi_size, rc);
717
718
		bio = next;
719
	}
720
}
721
722
/**
723
 * bbr_remap_handler
724
 *
725
 * This is the handler for the bbr work-queue.
726
 *
727
 * I/O requests should only be sent to this handler if we know that:
728
 * a) the request contains at least one remapped sector.
729
 *   or
730
 * b) the request caused an error on the normal I/O path.
731
 *
732
 * This function uses synchronous I/O, so sending a request to this
733
 * thread that doesn't need special processing will cause severe
734
 * performance degredation.
735
 **/
736
static void bbr_remap_handler(void *data)
737
{
738
	struct bbr_private *bbr_id = data;
739
	struct bio *bio;
740
	unsigned long flags;
741
742
	spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
743
	bio = bio_list_get(&bbr_id->remap_ios);
744
	spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
745
746
	bbr_io_process_requests(bbr_id, bio);
747
}
748
749
/**
750
 * bbr_endio
751
 *
752
 * This is the callback for normal write requests. Check for an error
753
 * during the I/O, and send to the thread for processing if necessary.
754
 **/
755
static int bbr_endio(struct dm_target *ti, struct bio *bio,
756
		     int error, union map_info *map_context)
757
{
758
	struct bbr_private *bbr_id = ti->private;
759
	struct dm_bio_details *bbr_io = map_context->ptr;
760
761
	if (error && bbr_io) {
762
		unsigned long flags;
763
		char b[32];
764
765
		dm_bio_restore(bbr_io, bio);
766
		map_context->ptr = NULL;
767
768
		DMERR("dm-bbr: device %s: I/O failure on sector %lu. "
769
		      "Scheduling for retry.",
770
		      format_dev_t(b, bbr_id->dev->bdev->bd_dev),
771
		      (unsigned long)bio->bi_sector);
772
773
		spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
774
		bio_list_add(&bbr_id->remap_ios, bio);
775
		spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
776
777
		queue_work(dm_bbr_wq, &bbr_id->remap_work);
778
779
		error = 1;
780
	}
781
782
	if (bbr_io)
783
		mempool_free(bbr_io, bbr_io_pool);
784
785
	return error;
786
}
787
788
/**
789
 * Construct a bbr mapping
790
 **/
791
static int bbr_ctr(struct dm_target *ti, unsigned int argc, char **argv)
792
{
793
	struct bbr_private *bbr_id;
794
	unsigned long block_size;
795
	char *end;
796
	int rc = -EINVAL;
797
798
	if (argc != 8) {
799
		ti->error = "dm-bbr requires exactly 8 arguments: "
800
			    "device offset table1_lsn table2_lsn table_size start_replacement nr_replacement_blks block_size";
801
		goto out1;
802
	}
803
804
	bbr_id = bbr_alloc_private();
805
	if (!bbr_id) {
806
		ti->error = "dm-bbr: Error allocating bbr private data.";
807
		goto out1;
808
	}
809
810
	bbr_id->offset = simple_strtoull(argv[1], &end, 10);
811
	bbr_id->lba_table1 = simple_strtoull(argv[2], &end, 10);
812
	bbr_id->lba_table2 = simple_strtoull(argv[3], &end, 10);
813
	bbr_id->nr_sects_bbr_table = simple_strtoull(argv[4], &end, 10);
814
	bbr_id->start_replacement_sect = simple_strtoull(argv[5], &end, 10);
815
	bbr_id->nr_replacement_blks = simple_strtoull(argv[6], &end, 10);
816
	block_size = simple_strtoul(argv[7], &end, 10);
817
	bbr_id->blksize_in_sects = (block_size >> SECTOR_SHIFT);
818
819
	bbr_id->bbr_table = vmalloc(bbr_id->nr_sects_bbr_table << SECTOR_SHIFT);
820
	if (!bbr_id->bbr_table) {
821
		ti->error = "dm-bbr: Error allocating bbr table.";
822
		goto out2;
823
	}
824
825
	if (dm_get_device(ti, argv[0], 0, ti->len,
826
			  dm_table_get_mode(ti->table), &bbr_id->dev)) {
827
		ti->error = "dm-bbr: Device lookup failed";
828
		goto out2;
829
	}
830
831
	rc = bbr_setup(bbr_id);
832
	if (rc) {
833
		ti->error = "dm-bbr: Device setup failed";
834
		goto out3;
835
	}
836
837
	ti->private = bbr_id;
838
	return 0;
839
840
out3:
841
	dm_put_device(ti, bbr_id->dev);
842
out2:
843
	bbr_free_private(bbr_id);
844
out1:
845
	return rc;
846
}
847
848
static void bbr_dtr(struct dm_target *ti)
849
{
850
	struct bbr_private *bbr_id = ti->private;
851
852
	dm_put_device(ti, bbr_id->dev);
853
	bbr_free_private(bbr_id);
854
}
855
856
static int bbr_map(struct dm_target *ti, struct bio *bio,
857
		   union map_info *map_context)
858
{
859
	struct bbr_private *bbr_id = ti->private;
860
	struct dm_bio_details *bbr_io;
861
	unsigned long flags;
862
	int rc = 1;
863
864
	bio->bi_sector += bbr_id->offset;
865
866
	if (atomic_read(&bbr_id->in_use_replacement_blks) == 0 ||
867
	    !bbr_remap_probe(bbr_id, bio->bi_sector, bio_sectors(bio))) {
868
		/* No existing remaps or this request doesn't
869
		 * contain any remapped sectors.
870
		 */
871
		bio->bi_bdev = bbr_id->dev->bdev;
872
873
		bbr_io = mempool_alloc(bbr_io_pool, GFP_NOIO);
874
		dm_bio_record(bbr_io, bio);
875
		map_context->ptr = bbr_io;
876
	} else {
877
		/* This request has at least one remapped sector.
878
		 * Give it to the work-queue for processing.
879
		 */
880
		map_context->ptr = NULL;
881
		spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
882
		bio_list_add(&bbr_id->remap_ios, bio);
883
		spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
884
885
		queue_work(dm_bbr_wq, &bbr_id->remap_work);
886
		rc = 0;
887
	}
888
889
	return rc;
890
}
891
892
static int bbr_status(struct dm_target *ti, status_type_t type,
893
		      char *result, unsigned int maxlen)
894
{
895
	struct bbr_private *bbr_id = ti->private;
896
	char b[BDEVNAME_SIZE];
897
898
	switch (type) {
899
	case STATUSTYPE_INFO:
900
		result[0] = '\0';
901
		break;
902
903
	case STATUSTYPE_TABLE:
904
		snprintf(result, maxlen, "%s "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" %u",
905
			 format_dev_t(b, bbr_id->dev->bdev->bd_dev),
906
			 bbr_id->offset, bbr_id->lba_table1, bbr_id->lba_table2,
907
			 bbr_id->nr_sects_bbr_table,
908
			 bbr_id->start_replacement_sect,
909
			 bbr_id->nr_replacement_blks,
910
			 bbr_id->blksize_in_sects << SECTOR_SHIFT);
911
		 break;
912
	}
913
	return 0;
914
}
915
916
static struct target_type bbr_target = {
917
	.name	= "bbr",
918
	.version= {1, 0, 1},
919
	.module	= THIS_MODULE,
920
	.ctr	= bbr_ctr,
921
	.dtr	= bbr_dtr,
922
	.map	= bbr_map,
923
	.end_io	= bbr_endio,
924
	.status	= bbr_status,
925
};
926
927
int __init dm_bbr_init(void)
928
{
929
	int rc;
930
931
	rc = dm_register_target(&bbr_target);
932
	if (rc) {
933
		DMERR("dm-bbr: error registering target.");
934
		goto err1;
935
	}
936
937
	bbr_remap_cache = kmem_cache_create("bbr-remap",
938
					    sizeof(struct bbr_runtime_remap),
939
					    0, SLAB_HWCACHE_ALIGN, NULL, NULL);
940
	if (!bbr_remap_cache) {
941
		DMERR("dm-bbr: error creating remap cache.");
942
		rc = ENOMEM;
943
		goto err2;
944
	}
945
946
	bbr_io_cache = kmem_cache_create("bbr-io", sizeof(struct dm_bio_details),
947
					 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
948
	if (!bbr_io_cache) {
949
		DMERR("dm-bbr: error creating io cache.");
950
		rc = ENOMEM;
951
		goto err3;
952
	}
953
954
	bbr_io_pool = mempool_create(256, mempool_alloc_slab,
955
				     mempool_free_slab, bbr_io_cache);
956
	if (!bbr_io_pool) {
957
		DMERR("dm-bbr: error creating io mempool.");
958
		rc = ENOMEM;
959
		goto err4;
960
	}
961
962
	dm_bbr_wq = create_workqueue("dm-bbr");
963
	if (!dm_bbr_wq) {
964
		DMERR("dm-bbr: error creating work-queue.");
965
		rc = ENOMEM;
966
		goto err5;
967
	}
968
969
	rc = dm_io_get(1);
970
	if (rc) {
971
		DMERR("dm-bbr: error initializing I/O service.");
972
		goto err6;
973
	}
974
975
	return 0;
976
977
err6:
978
	destroy_workqueue(dm_bbr_wq);
979
err5:
980
	mempool_destroy(bbr_io_pool);
981
err4:
982
	kmem_cache_destroy(bbr_io_cache);
983
err3:
984
	kmem_cache_destroy(bbr_remap_cache);
985
err2:
986
	dm_unregister_target(&bbr_target);
987
err1:
988
	return rc;
989
}
990
991
void __exit dm_bbr_exit(void)
992
{
993
	dm_io_put(1);
994
	destroy_workqueue(dm_bbr_wq);
995
	mempool_destroy(bbr_io_pool);
996
	kmem_cache_destroy(bbr_io_cache);
997
	kmem_cache_destroy(bbr_remap_cache);
998
	dm_unregister_target(&bbr_target);
999
}
1000
1001
module_init(dm_bbr_init);
1002
module_exit(dm_bbr_exit);
1003
MODULE_LICENSE("GPL");
(-)linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.h (+125 lines)
Line 0 Link Here
1
/*
2
 *   (C) Copyright IBM Corp. 2002, 2004
3
 *
4
 *   This program is free software;  you can redistribute it and/or modify
5
 *   it under the terms of the GNU General Public License as published by
6
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   (at your option) any later version.
8
 *
9
 *   This program is distributed in the hope that it will be useful,
10
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
11
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12
 *   the GNU General Public License for more details.
13
 *
14
 *   You should have received a copy of the GNU General Public License
15
 *   along with this program;  if not, write to the Free Software
16
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
 *
18
 * linux/drivers/md/dm-bbr.h
19
 *
20
 * Bad-block-relocation (BBR) target for device-mapper.
21
 *
22
 * The BBR target is designed to remap I/O write failures to another safe
23
 * location on disk. Note that most disk drives have BBR built into them,
24
 * this means that our software BBR will be only activated when all hardware
25
 * BBR replacement sectors have been used.
26
 */
27
28
#define BBR_TABLE_SIGNATURE		0x42627254 /* BbrT */
29
#define BBR_ENTRIES_PER_SECT		31
30
#define INITIAL_CRC			0xFFFFFFFF
31
#define CRC_POLYNOMIAL			0xEDB88320L
32
33
/**
34
 * Macros to cleanly print 64-bit numbers on both 32-bit and 64-bit machines.
35
 * Use these in place of %Ld, %Lu, and %Lx.
36
 **/
37
#if BITS_PER_LONG > 32
38
#define PFU64 "%lu"
39
#else
40
#define PFU64 "%Lu"
41
#endif
42
43
/**
44
 * struct bbr_table_entry
45
 * @bad_sect:		LBA of bad location.
46
 * @replacement_sect:	LBA of new location.
47
 *
48
 * Structure to describe one BBR remap.
49
 **/
50
struct bbr_table_entry {
51
	u64 bad_sect;
52
	u64 replacement_sect;
53
};
54
55
/**
56
 * struct bbr_table
57
 * @signature:		Signature on each BBR table sector.
58
 * @crc:		CRC for this table sector.
59
 * @sequence_number:	Used to resolve conflicts when primary and secondary
60
 *			tables do not match.
61
 * @in_use_cnt:		Number of in-use table entries.
62
 * @entries:		Actual table of remaps.
63
 *
64
 * Structure to describe each sector of the metadata table. Each sector in this
65
 * table can describe 31 remapped sectors.
66
 **/
67
struct bbr_table {
68
	u32			signature;
69
	u32			crc;
70
	u32			sequence_number;
71
	u32			in_use_cnt;
72
	struct bbr_table_entry	entries[BBR_ENTRIES_PER_SECT];
73
};
74
75
/**
76
 * struct bbr_runtime_remap
77
 *
78
 * Node in the binary tree used to keep track of remaps.
79
 **/
80
struct bbr_runtime_remap {
81
	struct bbr_table_entry		remap;
82
	struct bbr_runtime_remap	*left;
83
	struct bbr_runtime_remap	*right;
84
};
85
86
/**
87
 * struct bbr_private
88
 * @dev:			Info about underlying device.
89
 * @bbr_table:			Copy of metadata table.
90
 * @remap_root:			Binary tree containing all remaps.
91
 * @remap_root_lock:		Lock for the binary tree.
92
 * @remap_work:			For adding work items to the work-queue.
93
 * @remap_ios:			List of I/Os for the work-queue to handle.
94
 * @remap_ios_lock:		Lock for the remap_ios list.
95
 * @offset:			LBA of data area.
96
 * @lba_table1:			LBA of primary BBR table.
97
 * @lba_table2:			LBA of secondary BBR table.
98
 * @nr_sects_bbr_table:		Size of each BBR table.
99
 * @nr_replacement_blks:	Number of replacement blocks.
100
 * @start_replacement_sect:	LBA of start of replacement blocks.
101
 * @blksize_in_sects:		Size of each block.
102
 * @in_use_replacement_blks:	Current number of remapped blocks.
103
 *
104
 * Private data for each BBR target.
105
 **/
106
struct bbr_private {
107
	struct dm_dev			*dev;
108
	struct bbr_table		*bbr_table;
109
	struct bbr_runtime_remap	*remap_root;
110
	spinlock_t			remap_root_lock;
111
112
	struct work_struct		remap_work;
113
	struct bio_list			remap_ios;
114
	spinlock_t			remap_ios_lock;
115
116
	u64				offset;
117
	u64				lba_table1;
118
	u64				lba_table2;
119
	u64				nr_sects_bbr_table;
120
	u64				start_replacement_sect;
121
	u64				nr_replacement_blks;
122
	u32				blksize_in_sects;
123
	atomic_t			in_use_replacement_blks;
124
};
125
(-)linux-2.6.12-rc2-gentoo/drivers/md/Kconfig (+11 lines)
Lines 236-240 config DM_MULTIPATH_EMC Link Here
236
	---help---
236
	---help---
237
	  Multipath support for EMC CX/AX series hardware.
237
	  Multipath support for EMC CX/AX series hardware.
238
238
239
config BLK_DEV_DM_BBR
240
	tristate "Bad Block Relocation Device Target (EXPERIMENTAL)"
241
	depends on BLK_DEV_DM && EXPERIMENTAL
242
	---help---
243
	  Support for devices with software-based bad-block-relocation.
244
245
	  To compile this as a module, choose M here: the module will be
246
	  called dm-bbr.
247
248
	  If unsure, say N.
249
239
endmenu
250
endmenu
240
251
(-)linux-2.6.12-rc2-gentoo/drivers/md/Makefile (+1 lines)
Lines 36-41 obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc Link Here
36
obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
36
obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
37
obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o
37
obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o
38
obj-$(CONFIG_DM_ZERO)		+= dm-zero.o
38
obj-$(CONFIG_DM_ZERO)		+= dm-zero.o
39
obj-$(CONFIG_BLK_DEV_DM_BBR)	+= dm-bbr.o
39
40
40
quiet_cmd_unroll = UNROLL  $@
41
quiet_cmd_unroll = UNROLL  $@
41
      cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
42
      cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \

Return to bug 5563