Line 0
Link Here
|
|
|
1 |
/* |
2 |
* (C) Copyright IBM Corp. 2002, 2004 |
3 |
* |
4 |
* This program is free software; you can redistribute it and/or modify |
5 |
* it under the terms of the GNU General Public License as published by |
6 |
* the Free Software Foundation; either version 2 of the License, or |
7 |
* (at your option) any later version. |
8 |
* |
9 |
* This program is distributed in the hope that it will be useful, |
10 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
12 |
* the GNU General Public License for more details. |
13 |
* |
14 |
* You should have received a copy of the GNU General Public License |
15 |
* along with this program; if not, write to the Free Software |
16 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 |
* |
18 |
* linux/drivers/md/dm-bbr.c |
19 |
* |
20 |
* Bad-block-relocation (BBR) target for device-mapper. |
21 |
* |
22 |
* The BBR target is designed to remap I/O write failures to another safe |
23 |
* location on disk. Note that most disk drives have BBR built into them, |
24 |
* this means that our software BBR will be only activated when all hardware |
25 |
* BBR replacement sectors have been used. |
26 |
*/ |
27 |
|
28 |
#include <linux/module.h> |
29 |
#include <linux/init.h> |
30 |
#include <linux/bio.h> |
31 |
#include <linux/spinlock.h> |
32 |
#include <linux/slab.h> |
33 |
#include <linux/mempool.h> |
34 |
#include <linux/workqueue.h> |
35 |
#include <linux/vmalloc.h> |
36 |
|
37 |
#include "dm.h" |
38 |
#include "dm-bio-list.h" |
39 |
#include "dm-bio-record.h" |
40 |
#include "dm-bbr.h" |
41 |
#include "dm-io.h" |
42 |
|
43 |
#define SECTOR_SIZE (1 << SECTOR_SHIFT) |
44 |
|
45 |
static struct workqueue_struct *dm_bbr_wq = NULL; |
46 |
static void bbr_remap_handler(void *data); |
47 |
static kmem_cache_t *bbr_remap_cache; |
48 |
static kmem_cache_t *bbr_io_cache; |
49 |
static mempool_t *bbr_io_pool; |
50 |
|
51 |
/** |
52 |
* bbr_binary_tree_destroy |
53 |
* |
54 |
* Destroy the binary tree. |
55 |
**/ |
56 |
static void bbr_binary_tree_destroy(struct bbr_runtime_remap *root) |
57 |
{ |
58 |
struct bbr_runtime_remap **link = NULL; |
59 |
struct bbr_runtime_remap *node = root; |
60 |
|
61 |
while (node) { |
62 |
if (node->left) { |
63 |
link = &(node->left); |
64 |
node = node->left; |
65 |
continue; |
66 |
} |
67 |
if (node->right) { |
68 |
link = &(node->right); |
69 |
node = node->right; |
70 |
continue; |
71 |
} |
72 |
|
73 |
kmem_cache_free(bbr_remap_cache, node); |
74 |
if (node == root) { |
75 |
/* If root is deleted, we're done. */ |
76 |
break; |
77 |
} |
78 |
|
79 |
/* Back to root. */ |
80 |
node = root; |
81 |
*link = NULL; |
82 |
} |
83 |
} |
84 |
|
85 |
static void bbr_free_remap(struct bbr_private *bbr_id) |
86 |
{ |
87 |
spin_lock_irq(&bbr_id->remap_root_lock); |
88 |
bbr_binary_tree_destroy(bbr_id->remap_root); |
89 |
bbr_id->remap_root = NULL; |
90 |
spin_unlock_irq(&bbr_id->remap_root_lock); |
91 |
} |
92 |
|
93 |
static struct bbr_private *bbr_alloc_private(void) |
94 |
{ |
95 |
struct bbr_private *bbr_id; |
96 |
|
97 |
bbr_id = kmalloc(sizeof(*bbr_id), GFP_KERNEL); |
98 |
if (bbr_id) { |
99 |
memset(bbr_id, 0, sizeof(*bbr_id)); |
100 |
INIT_WORK(&bbr_id->remap_work, bbr_remap_handler, bbr_id); |
101 |
bbr_id->remap_root_lock = SPIN_LOCK_UNLOCKED; |
102 |
bbr_id->remap_ios_lock = SPIN_LOCK_UNLOCKED; |
103 |
bbr_id->in_use_replacement_blks = (atomic_t)ATOMIC_INIT(0); |
104 |
} |
105 |
|
106 |
return bbr_id; |
107 |
} |
108 |
|
109 |
static void bbr_free_private(struct bbr_private *bbr_id) |
110 |
{ |
111 |
if (bbr_id->bbr_table) { |
112 |
vfree(bbr_id->bbr_table); |
113 |
} |
114 |
bbr_free_remap(bbr_id); |
115 |
kfree(bbr_id); |
116 |
} |
117 |
|
118 |
static u32 crc_table[256]; |
119 |
static u32 crc_table_built = 0; |
120 |
|
121 |
static void build_crc_table(void) |
122 |
{ |
123 |
u32 i, j, crc; |
124 |
|
125 |
for (i = 0; i <= 255; i++) { |
126 |
crc = i; |
127 |
for (j = 8; j > 0; j--) { |
128 |
if (crc & 1) |
129 |
crc = (crc >> 1) ^ CRC_POLYNOMIAL; |
130 |
else |
131 |
crc >>= 1; |
132 |
} |
133 |
crc_table[i] = crc; |
134 |
} |
135 |
crc_table_built = 1; |
136 |
} |
137 |
|
138 |
static u32 calculate_crc(u32 crc, void *buffer, u32 buffersize) |
139 |
{ |
140 |
unsigned char *current_byte; |
141 |
u32 temp1, temp2, i; |
142 |
|
143 |
current_byte = (unsigned char *) buffer; |
144 |
/* Make sure the crc table is available */ |
145 |
if (!crc_table_built) |
146 |
build_crc_table(); |
147 |
/* Process each byte in the buffer. */ |
148 |
for (i = 0; i < buffersize; i++) { |
149 |
temp1 = (crc >> 8) & 0x00FFFFFF; |
150 |
temp2 = crc_table[(crc ^ (u32) * current_byte) & |
151 |
(u32) 0xff]; |
152 |
current_byte++; |
153 |
crc = temp1 ^ temp2; |
154 |
} |
155 |
return crc; |
156 |
} |
157 |
|
158 |
/** |
159 |
* le_bbr_table_sector_to_cpu |
160 |
* |
161 |
* Convert bbr meta data from on-disk (LE) format |
162 |
* to the native cpu endian format. |
163 |
**/ |
164 |
static void le_bbr_table_sector_to_cpu(struct bbr_table *p) |
165 |
{ |
166 |
int i; |
167 |
p->signature = le32_to_cpup(&p->signature); |
168 |
p->crc = le32_to_cpup(&p->crc); |
169 |
p->sequence_number = le32_to_cpup(&p->sequence_number); |
170 |
p->in_use_cnt = le32_to_cpup(&p->in_use_cnt); |
171 |
for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) { |
172 |
p->entries[i].bad_sect = |
173 |
le64_to_cpup(&p->entries[i].bad_sect); |
174 |
p->entries[i].replacement_sect = |
175 |
le64_to_cpup(&p->entries[i].replacement_sect); |
176 |
} |
177 |
} |
178 |
|
179 |
/** |
180 |
* cpu_bbr_table_sector_to_le |
181 |
* |
182 |
* Convert bbr meta data from cpu endian format to on-disk (LE) format |
183 |
**/ |
184 |
static void cpu_bbr_table_sector_to_le(struct bbr_table *p, |
185 |
struct bbr_table *le) |
186 |
{ |
187 |
int i; |
188 |
le->signature = cpu_to_le32p(&p->signature); |
189 |
le->crc = cpu_to_le32p(&p->crc); |
190 |
le->sequence_number = cpu_to_le32p(&p->sequence_number); |
191 |
le->in_use_cnt = cpu_to_le32p(&p->in_use_cnt); |
192 |
for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) { |
193 |
le->entries[i].bad_sect = |
194 |
cpu_to_le64p(&p->entries[i].bad_sect); |
195 |
le->entries[i].replacement_sect = |
196 |
cpu_to_le64p(&p->entries[i].replacement_sect); |
197 |
} |
198 |
} |
199 |
|
200 |
/** |
201 |
* validate_bbr_table_sector |
202 |
* |
203 |
* Check the specified BBR table sector for a valid signature and CRC. If it's |
204 |
* valid, endian-convert the table sector. |
205 |
**/ |
206 |
static int validate_bbr_table_sector(struct bbr_table *p) |
207 |
{ |
208 |
int rc = 0; |
209 |
int org_crc, final_crc; |
210 |
|
211 |
if (le32_to_cpup(&p->signature) != BBR_TABLE_SIGNATURE) { |
212 |
DMERR("dm-bbr: BBR table signature doesn't match!"); |
213 |
DMERR("dm-bbr: Found 0x%x. Expecting 0x%x", |
214 |
le32_to_cpup(&p->signature), BBR_TABLE_SIGNATURE); |
215 |
rc = -EINVAL; |
216 |
goto out; |
217 |
} |
218 |
|
219 |
if (!p->crc) { |
220 |
DMERR("dm-bbr: BBR table sector has no CRC!"); |
221 |
rc = -EINVAL; |
222 |
goto out; |
223 |
} |
224 |
|
225 |
org_crc = le32_to_cpup(&p->crc); |
226 |
p->crc = 0; |
227 |
final_crc = calculate_crc(INITIAL_CRC, (void *)p, sizeof(*p)); |
228 |
if (final_crc != org_crc) { |
229 |
DMERR("dm-bbr: CRC failed!"); |
230 |
DMERR("dm-bbr: Found 0x%x. Expecting 0x%x", |
231 |
org_crc, final_crc); |
232 |
rc = -EINVAL; |
233 |
goto out; |
234 |
} |
235 |
|
236 |
p->crc = cpu_to_le32p(&org_crc); |
237 |
le_bbr_table_sector_to_cpu(p); |
238 |
|
239 |
out: |
240 |
return rc; |
241 |
} |
242 |
|
243 |
/** |
244 |
* bbr_binary_tree_insert |
245 |
* |
246 |
* Insert a node into the binary tree. |
247 |
**/ |
248 |
static void bbr_binary_tree_insert(struct bbr_runtime_remap **root, |
249 |
struct bbr_runtime_remap *newnode) |
250 |
{ |
251 |
struct bbr_runtime_remap **node = root; |
252 |
while (node && *node) { |
253 |
if (newnode->remap.bad_sect > (*node)->remap.bad_sect) { |
254 |
node = &((*node)->right); |
255 |
} else { |
256 |
node = &((*node)->left); |
257 |
} |
258 |
} |
259 |
|
260 |
newnode->left = newnode->right = NULL; |
261 |
*node = newnode; |
262 |
} |
263 |
|
264 |
/** |
265 |
* bbr_binary_search |
266 |
* |
267 |
* Search for a node that contains bad_sect == lsn. |
268 |
**/ |
269 |
static struct bbr_runtime_remap *bbr_binary_search( |
270 |
struct bbr_runtime_remap *root, |
271 |
u64 lsn) |
272 |
{ |
273 |
struct bbr_runtime_remap *node = root; |
274 |
while (node) { |
275 |
if (node->remap.bad_sect == lsn) { |
276 |
break; |
277 |
} |
278 |
if (lsn > node->remap.bad_sect) { |
279 |
node = node->right; |
280 |
} else { |
281 |
node = node->left; |
282 |
} |
283 |
} |
284 |
return node; |
285 |
} |
286 |
|
287 |
/** |
288 |
* bbr_insert_remap_entry |
289 |
* |
290 |
* Create a new remap entry and add it to the binary tree for this node. |
291 |
**/ |
292 |
static int bbr_insert_remap_entry(struct bbr_private *bbr_id, |
293 |
struct bbr_table_entry *new_bbr_entry) |
294 |
{ |
295 |
struct bbr_runtime_remap *newnode; |
296 |
|
297 |
newnode = kmem_cache_alloc(bbr_remap_cache, GFP_NOIO); |
298 |
if (!newnode) { |
299 |
DMERR("dm-bbr: Could not allocate from remap cache!"); |
300 |
return -ENOMEM; |
301 |
} |
302 |
newnode->remap.bad_sect = new_bbr_entry->bad_sect; |
303 |
newnode->remap.replacement_sect = new_bbr_entry->replacement_sect; |
304 |
spin_lock_irq(&bbr_id->remap_root_lock); |
305 |
bbr_binary_tree_insert(&bbr_id->remap_root, newnode); |
306 |
spin_unlock_irq(&bbr_id->remap_root_lock); |
307 |
return 0; |
308 |
} |
309 |
|
310 |
/** |
311 |
* bbr_table_to_remap_list |
312 |
* |
313 |
* The on-disk bbr table is sorted by the replacement sector LBA. In order to |
314 |
* improve run time performance, the in memory remap list must be sorted by |
315 |
* the bad sector LBA. This function is called at discovery time to initialize |
316 |
* the remap list. This function assumes that at least one copy of meta data |
317 |
* is valid. |
318 |
**/ |
319 |
static u32 bbr_table_to_remap_list(struct bbr_private *bbr_id) |
320 |
{ |
321 |
u32 in_use_blks = 0; |
322 |
int i, j; |
323 |
struct bbr_table *p; |
324 |
|
325 |
for (i = 0, p = bbr_id->bbr_table; |
326 |
i < bbr_id->nr_sects_bbr_table; |
327 |
i++, p++) { |
328 |
if (!p->in_use_cnt) { |
329 |
break; |
330 |
} |
331 |
in_use_blks += p->in_use_cnt; |
332 |
for (j = 0; j < p->in_use_cnt; j++) { |
333 |
bbr_insert_remap_entry(bbr_id, &p->entries[j]); |
334 |
} |
335 |
} |
336 |
if (in_use_blks) { |
337 |
char b[32]; |
338 |
DMWARN("dm-bbr: There are %u BBR entries for device %s", |
339 |
in_use_blks, format_dev_t(b, bbr_id->dev->bdev->bd_dev)); |
340 |
} |
341 |
|
342 |
return in_use_blks; |
343 |
} |
344 |
|
345 |
/** |
346 |
* bbr_search_remap_entry |
347 |
* |
348 |
* Search remap entry for the specified sector. If found, return a pointer to |
349 |
* the table entry. Otherwise, return NULL. |
350 |
**/ |
351 |
static struct bbr_table_entry *bbr_search_remap_entry( |
352 |
struct bbr_private *bbr_id, |
353 |
u64 lsn) |
354 |
{ |
355 |
struct bbr_runtime_remap *p; |
356 |
|
357 |
spin_lock_irq(&bbr_id->remap_root_lock); |
358 |
p = bbr_binary_search(bbr_id->remap_root, lsn); |
359 |
spin_unlock_irq(&bbr_id->remap_root_lock); |
360 |
if (p) { |
361 |
return (&p->remap); |
362 |
} else { |
363 |
return NULL; |
364 |
} |
365 |
} |
366 |
|
367 |
/** |
368 |
* bbr_remap |
369 |
* |
370 |
* If *lsn is in the remap table, return TRUE and modify *lsn, |
371 |
* else, return FALSE. |
372 |
**/ |
373 |
static inline int bbr_remap(struct bbr_private *bbr_id, |
374 |
u64 *lsn) |
375 |
{ |
376 |
struct bbr_table_entry *e; |
377 |
|
378 |
if (atomic_read(&bbr_id->in_use_replacement_blks)) { |
379 |
e = bbr_search_remap_entry(bbr_id, *lsn); |
380 |
if (e) { |
381 |
*lsn = e->replacement_sect; |
382 |
return 1; |
383 |
} |
384 |
} |
385 |
return 0; |
386 |
} |
387 |
|
388 |
/** |
389 |
* bbr_remap_probe |
390 |
* |
391 |
* If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap |
392 |
* table return TRUE, Else, return FALSE. |
393 |
**/ |
394 |
static inline int bbr_remap_probe(struct bbr_private *bbr_id, |
395 |
u64 lsn, u64 nr_sects) |
396 |
{ |
397 |
u64 tmp, cnt; |
398 |
|
399 |
if (atomic_read(&bbr_id->in_use_replacement_blks)) { |
400 |
for (cnt = 0, tmp = lsn; |
401 |
cnt < nr_sects; |
402 |
cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) { |
403 |
if (bbr_remap(bbr_id,&tmp)) { |
404 |
return 1; |
405 |
} |
406 |
} |
407 |
} |
408 |
return 0; |
409 |
} |
410 |
|
411 |
/** |
412 |
* bbr_setup |
413 |
* |
414 |
* Read the remap tables from disk and set up the initial remap tree. |
415 |
**/ |
416 |
static int bbr_setup(struct bbr_private *bbr_id) |
417 |
{ |
418 |
struct bbr_table *table = bbr_id->bbr_table; |
419 |
struct io_region job; |
420 |
unsigned long error; |
421 |
int i, rc = 0; |
422 |
|
423 |
job.bdev = bbr_id->dev->bdev; |
424 |
job.count = 1; |
425 |
|
426 |
/* Read and verify each BBR table sector individually. */ |
427 |
for (i = 0; i < bbr_id->nr_sects_bbr_table; i++, table++) { |
428 |
job.sector = bbr_id->lba_table1 + i; |
429 |
rc = dm_io_sync_vm(1, &job, READ, table, &error); |
430 |
if (rc && bbr_id->lba_table2) { |
431 |
job.sector = bbr_id->lba_table2 + i; |
432 |
rc = dm_io_sync_vm(1, &job, READ, table, &error); |
433 |
} |
434 |
if (rc) { |
435 |
goto out; |
436 |
} |
437 |
|
438 |
rc = validate_bbr_table_sector(table); |
439 |
if (rc) { |
440 |
goto out; |
441 |
} |
442 |
} |
443 |
atomic_set(&bbr_id->in_use_replacement_blks, |
444 |
bbr_table_to_remap_list(bbr_id)); |
445 |
|
446 |
out: |
447 |
if (rc) { |
448 |
DMERR("dm-bbr: error during device setup: %d", rc); |
449 |
} |
450 |
return rc; |
451 |
} |
452 |
|
453 |
/** |
454 |
* bbr_io_remap_error |
455 |
* @bbr_id: Private data for the BBR node. |
456 |
* @rw: READ or WRITE. |
457 |
* @starting_lsn: Starting sector of request to remap. |
458 |
* @count: Number of sectors in the request. |
459 |
* @page: Page containing the data for the request. |
460 |
* @offset: Byte-offset of the data within the page. |
461 |
* |
462 |
* For the requested range, try to write each sector individually. For each |
463 |
* sector that fails, find the next available remap location and write the |
464 |
* data to that new location. Then update the table and write both copies |
465 |
* of the table to disk. Finally, update the in-memory mapping and do any |
466 |
* other necessary bookkeeping. |
467 |
**/ |
468 |
static int bbr_io_remap_error(struct bbr_private *bbr_id, |
469 |
int rw, |
470 |
u64 starting_lsn, |
471 |
u64 count, |
472 |
struct page *page, |
473 |
unsigned int offset) |
474 |
{ |
475 |
struct bbr_table *bbr_table; |
476 |
struct io_region job; |
477 |
struct page_list pl; |
478 |
unsigned long table_sector_index; |
479 |
unsigned long table_sector_offset; |
480 |
unsigned long index; |
481 |
unsigned long error; |
482 |
u64 lsn, new_lsn; |
483 |
char b[32]; |
484 |
int rc; |
485 |
|
486 |
job.bdev = bbr_id->dev->bdev; |
487 |
job.count = 1; |
488 |
pl.page = page; |
489 |
pl.next = NULL; |
490 |
|
491 |
/* For each sector in the request. */ |
492 |
for (lsn = 0; lsn < count; lsn++, offset += SECTOR_SIZE) { |
493 |
job.sector = starting_lsn + lsn; |
494 |
rc = dm_io_sync(1, &job, rw, &pl, offset, &error); |
495 |
while (rc) { |
496 |
/* Find the next available relocation sector. */ |
497 |
new_lsn = atomic_read(&bbr_id->in_use_replacement_blks); |
498 |
if (new_lsn >= bbr_id->nr_replacement_blks) { |
499 |
/* No more replacement sectors available. */ |
500 |
return -EIO; |
501 |
} |
502 |
new_lsn += bbr_id->start_replacement_sect; |
503 |
|
504 |
/* Write the data to its new location. */ |
505 |
DMWARN("dm-bbr: device %s: Trying to remap bad sector "PFU64" to sector "PFU64, |
506 |
format_dev_t(b, bbr_id->dev->bdev->bd_dev), |
507 |
starting_lsn + lsn, new_lsn); |
508 |
job.sector = new_lsn; |
509 |
rc = dm_io_sync(1, &job, rw, &pl, offset, &error); |
510 |
if (rc) { |
511 |
/* This replacement sector is bad. |
512 |
* Try the next one. |
513 |
*/ |
514 |
DMERR("dm-bbr: device %s: replacement sector "PFU64" is bad. Skipping.", |
515 |
format_dev_t(b, bbr_id->dev->bdev->bd_dev), new_lsn); |
516 |
atomic_inc(&bbr_id->in_use_replacement_blks); |
517 |
continue; |
518 |
} |
519 |
|
520 |
/* Add this new entry to the on-disk table. */ |
521 |
table_sector_index = new_lsn - |
522 |
bbr_id->start_replacement_sect; |
523 |
table_sector_offset = table_sector_index / |
524 |
BBR_ENTRIES_PER_SECT; |
525 |
index = table_sector_index % BBR_ENTRIES_PER_SECT; |
526 |
|
527 |
bbr_table = &bbr_id->bbr_table[table_sector_offset]; |
528 |
bbr_table->entries[index].bad_sect = starting_lsn + lsn; |
529 |
bbr_table->entries[index].replacement_sect = new_lsn; |
530 |
bbr_table->in_use_cnt++; |
531 |
bbr_table->sequence_number++; |
532 |
bbr_table->crc = 0; |
533 |
bbr_table->crc = calculate_crc(INITIAL_CRC, |
534 |
bbr_table, |
535 |
sizeof(struct bbr_table)); |
536 |
|
537 |
/* Write the table to disk. */ |
538 |
cpu_bbr_table_sector_to_le(bbr_table, bbr_table); |
539 |
if (bbr_id->lba_table1) { |
540 |
job.sector = bbr_id->lba_table1 + table_sector_offset; |
541 |
rc = dm_io_sync_vm(1, &job, WRITE, bbr_table, &error); |
542 |
} |
543 |
if (bbr_id->lba_table2) { |
544 |
job.sector = bbr_id->lba_table2 + table_sector_offset; |
545 |
rc |= dm_io_sync_vm(1, &job, WRITE, bbr_table, &error); |
546 |
} |
547 |
le_bbr_table_sector_to_cpu(bbr_table); |
548 |
|
549 |
if (rc) { |
550 |
/* Error writing one of the tables to disk. */ |
551 |
DMERR("dm-bbr: device %s: error updating BBR tables on disk.", |
552 |
format_dev_t(b, bbr_id->dev->bdev->bd_dev)); |
553 |
return rc; |
554 |
} |
555 |
|
556 |
/* Insert a new entry in the remapping binary-tree. */ |
557 |
rc = bbr_insert_remap_entry(bbr_id, |
558 |
&bbr_table->entries[index]); |
559 |
if (rc) { |
560 |
DMERR("dm-bbr: device %s: error adding new entry to remap tree.", |
561 |
format_dev_t(b, bbr_id->dev->bdev->bd_dev)); |
562 |
return rc; |
563 |
} |
564 |
|
565 |
atomic_inc(&bbr_id->in_use_replacement_blks); |
566 |
} |
567 |
} |
568 |
|
569 |
return 0; |
570 |
} |
571 |
|
572 |
/** |
573 |
* bbr_io_process_request |
574 |
* |
575 |
* For each sector in this request, check if the sector has already |
576 |
* been remapped. If so, process all previous sectors in the request, |
577 |
* followed by the remapped sector. Then reset the starting lsn and |
578 |
* count, and keep going with the rest of the request as if it were |
579 |
* a whole new request. If any of the sync_io's return an error, |
580 |
* call the remapper to relocate the bad sector(s). |
581 |
* |
582 |
* 2.5 Note: When switching over to bio's for the I/O path, we have made |
583 |
* the assumption that the I/O request described by the bio is one |
584 |
* virtually contiguous piece of memory (even though the bio vector |
585 |
* describes it using a series of physical page addresses). |
586 |
**/ |
587 |
static int bbr_io_process_request(struct bbr_private *bbr_id, |
588 |
struct bio *bio) |
589 |
{ |
590 |
struct io_region job; |
591 |
u64 starting_lsn = bio->bi_sector; |
592 |
u64 count, lsn, remapped_lsn; |
593 |
struct page_list pl; |
594 |
unsigned int offset; |
595 |
unsigned long error; |
596 |
int i, rw = bio_data_dir(bio); |
597 |
int rc = 0; |
598 |
|
599 |
job.bdev = bbr_id->dev->bdev; |
600 |
pl.next = NULL; |
601 |
|
602 |
/* Each bio can contain multiple vectors, each with a different page. |
603 |
* Treat each vector as a separate request. |
604 |
*/ |
605 |
/* KMC: Is this the right way to walk the bvec list? */ |
606 |
for (i = 0; |
607 |
i < bio->bi_vcnt; |
608 |
i++, bio->bi_idx++, starting_lsn += count) { |
609 |
|
610 |
/* Bvec info: number of sectors, page, |
611 |
* and byte-offset within page. |
612 |
*/ |
613 |
count = bio_iovec(bio)->bv_len >> SECTOR_SHIFT; |
614 |
pl.page = bio_iovec(bio)->bv_page; |
615 |
offset = bio_iovec(bio)->bv_offset; |
616 |
|
617 |
/* For each sector in this bvec, check if the sector has |
618 |
* already been remapped. If so, process all previous sectors |
619 |
* in this request, followed by the remapped sector. Then reset |
620 |
* the starting lsn and count and keep going with the rest of |
621 |
* the request as if it were a whole new request. |
622 |
*/ |
623 |
for (lsn = 0; lsn < count; lsn++) { |
624 |
remapped_lsn = starting_lsn + lsn; |
625 |
rc = bbr_remap(bbr_id, &remapped_lsn); |
626 |
if (!rc) { |
627 |
/* This sector is fine. */ |
628 |
continue; |
629 |
} |
630 |
|
631 |
/* Process all sectors in the request up to this one. */ |
632 |
if (lsn > 0) { |
633 |
job.sector = starting_lsn; |
634 |
job.count = lsn; |
635 |
rc = dm_io_sync(1, &job, rw, &pl, |
636 |
offset, &error); |
637 |
if (rc) { |
638 |
/* If this I/O failed, then one of the |
639 |
* sectors in this request needs to be |
640 |
* relocated. |
641 |
*/ |
642 |
rc = bbr_io_remap_error(bbr_id, rw, |
643 |
starting_lsn, |
644 |
lsn, pl.page, |
645 |
offset); |
646 |
if (rc) { |
647 |
/* KMC: Return? Or continue to next bvec? */ |
648 |
return rc; |
649 |
} |
650 |
} |
651 |
offset += (lsn << SECTOR_SHIFT); |
652 |
} |
653 |
|
654 |
/* Process the remapped sector. */ |
655 |
job.sector = remapped_lsn; |
656 |
job.count = 1; |
657 |
rc = dm_io_sync(1, &job, rw, &pl, offset, &error); |
658 |
if (rc) { |
659 |
/* BUGBUG - Need more processing if this caused |
660 |
* an error. If this I/O failed, then the |
661 |
* existing remap is now bad, and we need to |
662 |
* find a new remap. Can't use |
663 |
* bbr_io_remap_error(), because the existing |
664 |
* map entry needs to be changed, not added |
665 |
* again, and the original table entry also |
666 |
* needs to be changed. |
667 |
*/ |
668 |
return rc; |
669 |
} |
670 |
|
671 |
starting_lsn += (lsn + 1); |
672 |
count -= (lsn + 1); |
673 |
lsn = -1; |
674 |
offset += SECTOR_SIZE; |
675 |
} |
676 |
|
677 |
/* Check for any remaining sectors after the last split. This |
678 |
* could potentially be the whole request, but that should be a |
679 |
* rare case because requests should only be processed by the |
680 |
* thread if we know an error occurred or they contained one or |
681 |
* more remapped sectors. |
682 |
*/ |
683 |
if (count) { |
684 |
job.sector = starting_lsn; |
685 |
job.count = count; |
686 |
rc = dm_io_sync(1, &job, rw, &pl, offset, &error); |
687 |
if (rc) { |
688 |
/* If this I/O failed, then one of the sectors |
689 |
* in this request needs to be relocated. |
690 |
*/ |
691 |
rc = bbr_io_remap_error(bbr_id, rw, starting_lsn, |
692 |
count, pl.page, offset); |
693 |
if (rc) { |
694 |
/* KMC: Return? Or continue to next bvec? */ |
695 |
return rc; |
696 |
} |
697 |
} |
698 |
} |
699 |
} |
700 |
|
701 |
return 0; |
702 |
} |
703 |
|
704 |
static void bbr_io_process_requests(struct bbr_private *bbr_id, |
705 |
struct bio *bio) |
706 |
{ |
707 |
struct bio *next; |
708 |
int rc; |
709 |
|
710 |
while (bio) { |
711 |
next = bio->bi_next; |
712 |
bio->bi_next = NULL; |
713 |
|
714 |
rc = bbr_io_process_request(bbr_id, bio); |
715 |
|
716 |
bio_endio(bio, bio->bi_size, rc); |
717 |
|
718 |
bio = next; |
719 |
} |
720 |
} |
721 |
|
722 |
/** |
723 |
* bbr_remap_handler |
724 |
* |
725 |
* This is the handler for the bbr work-queue. |
726 |
* |
727 |
* I/O requests should only be sent to this handler if we know that: |
728 |
* a) the request contains at least one remapped sector. |
729 |
* or |
730 |
* b) the request caused an error on the normal I/O path. |
731 |
* |
732 |
* This function uses synchronous I/O, so sending a request to this |
733 |
* thread that doesn't need special processing will cause severe |
734 |
* performance degredation. |
735 |
**/ |
736 |
static void bbr_remap_handler(void *data) |
737 |
{ |
738 |
struct bbr_private *bbr_id = data; |
739 |
struct bio *bio; |
740 |
unsigned long flags; |
741 |
|
742 |
spin_lock_irqsave(&bbr_id->remap_ios_lock, flags); |
743 |
bio = bio_list_get(&bbr_id->remap_ios); |
744 |
spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags); |
745 |
|
746 |
bbr_io_process_requests(bbr_id, bio); |
747 |
} |
748 |
|
749 |
/** |
750 |
* bbr_endio |
751 |
* |
752 |
* This is the callback for normal write requests. Check for an error |
753 |
* during the I/O, and send to the thread for processing if necessary. |
754 |
**/ |
755 |
static int bbr_endio(struct dm_target *ti, struct bio *bio, |
756 |
int error, union map_info *map_context) |
757 |
{ |
758 |
struct bbr_private *bbr_id = ti->private; |
759 |
struct dm_bio_details *bbr_io = map_context->ptr; |
760 |
|
761 |
if (error && bbr_io) { |
762 |
unsigned long flags; |
763 |
char b[32]; |
764 |
|
765 |
dm_bio_restore(bbr_io, bio); |
766 |
map_context->ptr = NULL; |
767 |
|
768 |
DMERR("dm-bbr: device %s: I/O failure on sector %lu. " |
769 |
"Scheduling for retry.", |
770 |
format_dev_t(b, bbr_id->dev->bdev->bd_dev), |
771 |
(unsigned long)bio->bi_sector); |
772 |
|
773 |
spin_lock_irqsave(&bbr_id->remap_ios_lock, flags); |
774 |
bio_list_add(&bbr_id->remap_ios, bio); |
775 |
spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags); |
776 |
|
777 |
queue_work(dm_bbr_wq, &bbr_id->remap_work); |
778 |
|
779 |
error = 1; |
780 |
} |
781 |
|
782 |
if (bbr_io) |
783 |
mempool_free(bbr_io, bbr_io_pool); |
784 |
|
785 |
return error; |
786 |
} |
787 |
|
788 |
/** |
789 |
* Construct a bbr mapping |
790 |
**/ |
791 |
static int bbr_ctr(struct dm_target *ti, unsigned int argc, char **argv) |
792 |
{ |
793 |
struct bbr_private *bbr_id; |
794 |
unsigned long block_size; |
795 |
char *end; |
796 |
int rc = -EINVAL; |
797 |
|
798 |
if (argc != 8) { |
799 |
ti->error = "dm-bbr requires exactly 8 arguments: " |
800 |
"device offset table1_lsn table2_lsn table_size start_replacement nr_replacement_blks block_size"; |
801 |
goto out1; |
802 |
} |
803 |
|
804 |
bbr_id = bbr_alloc_private(); |
805 |
if (!bbr_id) { |
806 |
ti->error = "dm-bbr: Error allocating bbr private data."; |
807 |
goto out1; |
808 |
} |
809 |
|
810 |
bbr_id->offset = simple_strtoull(argv[1], &end, 10); |
811 |
bbr_id->lba_table1 = simple_strtoull(argv[2], &end, 10); |
812 |
bbr_id->lba_table2 = simple_strtoull(argv[3], &end, 10); |
813 |
bbr_id->nr_sects_bbr_table = simple_strtoull(argv[4], &end, 10); |
814 |
bbr_id->start_replacement_sect = simple_strtoull(argv[5], &end, 10); |
815 |
bbr_id->nr_replacement_blks = simple_strtoull(argv[6], &end, 10); |
816 |
block_size = simple_strtoul(argv[7], &end, 10); |
817 |
bbr_id->blksize_in_sects = (block_size >> SECTOR_SHIFT); |
818 |
|
819 |
bbr_id->bbr_table = vmalloc(bbr_id->nr_sects_bbr_table << SECTOR_SHIFT); |
820 |
if (!bbr_id->bbr_table) { |
821 |
ti->error = "dm-bbr: Error allocating bbr table."; |
822 |
goto out2; |
823 |
} |
824 |
|
825 |
if (dm_get_device(ti, argv[0], 0, ti->len, |
826 |
dm_table_get_mode(ti->table), &bbr_id->dev)) { |
827 |
ti->error = "dm-bbr: Device lookup failed"; |
828 |
goto out2; |
829 |
} |
830 |
|
831 |
rc = bbr_setup(bbr_id); |
832 |
if (rc) { |
833 |
ti->error = "dm-bbr: Device setup failed"; |
834 |
goto out3; |
835 |
} |
836 |
|
837 |
ti->private = bbr_id; |
838 |
return 0; |
839 |
|
840 |
out3: |
841 |
dm_put_device(ti, bbr_id->dev); |
842 |
out2: |
843 |
bbr_free_private(bbr_id); |
844 |
out1: |
845 |
return rc; |
846 |
} |
847 |
|
848 |
static void bbr_dtr(struct dm_target *ti) |
849 |
{ |
850 |
struct bbr_private *bbr_id = ti->private; |
851 |
|
852 |
dm_put_device(ti, bbr_id->dev); |
853 |
bbr_free_private(bbr_id); |
854 |
} |
855 |
|
856 |
static int bbr_map(struct dm_target *ti, struct bio *bio, |
857 |
union map_info *map_context) |
858 |
{ |
859 |
struct bbr_private *bbr_id = ti->private; |
860 |
struct dm_bio_details *bbr_io; |
861 |
unsigned long flags; |
862 |
int rc = 1; |
863 |
|
864 |
bio->bi_sector += bbr_id->offset; |
865 |
|
866 |
if (atomic_read(&bbr_id->in_use_replacement_blks) == 0 || |
867 |
!bbr_remap_probe(bbr_id, bio->bi_sector, bio_sectors(bio))) { |
868 |
/* No existing remaps or this request doesn't |
869 |
* contain any remapped sectors. |
870 |
*/ |
871 |
bio->bi_bdev = bbr_id->dev->bdev; |
872 |
|
873 |
bbr_io = mempool_alloc(bbr_io_pool, GFP_NOIO); |
874 |
dm_bio_record(bbr_io, bio); |
875 |
map_context->ptr = bbr_io; |
876 |
} else { |
877 |
/* This request has at least one remapped sector. |
878 |
* Give it to the work-queue for processing. |
879 |
*/ |
880 |
map_context->ptr = NULL; |
881 |
spin_lock_irqsave(&bbr_id->remap_ios_lock, flags); |
882 |
bio_list_add(&bbr_id->remap_ios, bio); |
883 |
spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags); |
884 |
|
885 |
queue_work(dm_bbr_wq, &bbr_id->remap_work); |
886 |
rc = 0; |
887 |
} |
888 |
|
889 |
return rc; |
890 |
} |
891 |
|
892 |
static int bbr_status(struct dm_target *ti, status_type_t type, |
893 |
char *result, unsigned int maxlen) |
894 |
{ |
895 |
struct bbr_private *bbr_id = ti->private; |
896 |
char b[BDEVNAME_SIZE]; |
897 |
|
898 |
switch (type) { |
899 |
case STATUSTYPE_INFO: |
900 |
result[0] = '\0'; |
901 |
break; |
902 |
|
903 |
case STATUSTYPE_TABLE: |
904 |
snprintf(result, maxlen, "%s "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" %u", |
905 |
format_dev_t(b, bbr_id->dev->bdev->bd_dev), |
906 |
bbr_id->offset, bbr_id->lba_table1, bbr_id->lba_table2, |
907 |
bbr_id->nr_sects_bbr_table, |
908 |
bbr_id->start_replacement_sect, |
909 |
bbr_id->nr_replacement_blks, |
910 |
bbr_id->blksize_in_sects << SECTOR_SHIFT); |
911 |
break; |
912 |
} |
913 |
return 0; |
914 |
} |
915 |
|
916 |
static struct target_type bbr_target = { |
917 |
.name = "bbr", |
918 |
.version= {1, 0, 1}, |
919 |
.module = THIS_MODULE, |
920 |
.ctr = bbr_ctr, |
921 |
.dtr = bbr_dtr, |
922 |
.map = bbr_map, |
923 |
.end_io = bbr_endio, |
924 |
.status = bbr_status, |
925 |
}; |
926 |
|
927 |
int __init dm_bbr_init(void) |
928 |
{ |
929 |
int rc; |
930 |
|
931 |
rc = dm_register_target(&bbr_target); |
932 |
if (rc) { |
933 |
DMERR("dm-bbr: error registering target."); |
934 |
goto err1; |
935 |
} |
936 |
|
937 |
bbr_remap_cache = kmem_cache_create("bbr-remap", |
938 |
sizeof(struct bbr_runtime_remap), |
939 |
0, SLAB_HWCACHE_ALIGN, NULL, NULL); |
940 |
if (!bbr_remap_cache) { |
941 |
DMERR("dm-bbr: error creating remap cache."); |
942 |
rc = ENOMEM; |
943 |
goto err2; |
944 |
} |
945 |
|
946 |
bbr_io_cache = kmem_cache_create("bbr-io", sizeof(struct dm_bio_details), |
947 |
0, SLAB_HWCACHE_ALIGN, NULL, NULL); |
948 |
if (!bbr_io_cache) { |
949 |
DMERR("dm-bbr: error creating io cache."); |
950 |
rc = ENOMEM; |
951 |
goto err3; |
952 |
} |
953 |
|
954 |
bbr_io_pool = mempool_create(256, mempool_alloc_slab, |
955 |
mempool_free_slab, bbr_io_cache); |
956 |
if (!bbr_io_pool) { |
957 |
DMERR("dm-bbr: error creating io mempool."); |
958 |
rc = ENOMEM; |
959 |
goto err4; |
960 |
} |
961 |
|
962 |
dm_bbr_wq = create_workqueue("dm-bbr"); |
963 |
if (!dm_bbr_wq) { |
964 |
DMERR("dm-bbr: error creating work-queue."); |
965 |
rc = ENOMEM; |
966 |
goto err5; |
967 |
} |
968 |
|
969 |
rc = dm_io_get(1); |
970 |
if (rc) { |
971 |
DMERR("dm-bbr: error initializing I/O service."); |
972 |
goto err6; |
973 |
} |
974 |
|
975 |
return 0; |
976 |
|
977 |
err6: |
978 |
destroy_workqueue(dm_bbr_wq); |
979 |
err5: |
980 |
mempool_destroy(bbr_io_pool); |
981 |
err4: |
982 |
kmem_cache_destroy(bbr_io_cache); |
983 |
err3: |
984 |
kmem_cache_destroy(bbr_remap_cache); |
985 |
err2: |
986 |
dm_unregister_target(&bbr_target); |
987 |
err1: |
988 |
return rc; |
989 |
} |
990 |
|
991 |
void __exit dm_bbr_exit(void) |
992 |
{ |
993 |
dm_io_put(1); |
994 |
destroy_workqueue(dm_bbr_wq); |
995 |
mempool_destroy(bbr_io_pool); |
996 |
kmem_cache_destroy(bbr_io_cache); |
997 |
kmem_cache_destroy(bbr_remap_cache); |
998 |
dm_unregister_target(&bbr_target); |
999 |
} |
1000 |
|
1001 |
module_init(dm_bbr_init); |
1002 |
module_exit(dm_bbr_exit); |
1003 |
MODULE_LICENSE("GPL"); |