Most recent kernel where this bug did not occur: Distribution: SUSE 10.1 (2.6.18) and Debian (2.6.22) Hardware Environment: Software Environment: Problem Description: Steps to reproduce: Compile the following module and try to mount any device of size > 2TiB. and you will see the infinite loop "__find_get_block_slow() failed" /////////////////////////////////////////////////////////// // This example shows __bread error if block >= 2^32 // // /////////////////////////////////////////////////////////// #include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/blkdev.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/vermagic.h> #define USE_BREAD "Activate this macros to get the __bread error" #ifndef USE_BREAD /////////////////////////////////////////////////////////// // end_bio_io_page // // /////////////////////////////////////////////////////////// static int end_bio_io_page( struct bio *bio, unsigned int bytes_done, int error ) { struct page *page = bio->bi_private; if (bio->bi_size) return 1; if (!error) SetPageUptodate(page); else printk(KERN_WARNING "t2tb: error %d reading page\n", error); unlock_page(page); bio_put(bio); return 0; } /////////////////////////////////////////////////////////// // t2tb_read_page // // /////////////////////////////////////////////////////////// struct page* t2tb_read_page( struct super_block *sb, sector_t sector ) { struct page *page; struct bio *bio; page = alloc_page(GFP_KERNEL); if ( unlikely(!page) ) return NULL; ClearPageUptodate(page); ClearPageDirty(page); lock_page(page); bio = bio_alloc(GFP_KERNEL, 1); if (unlikely(!bio)) { __free_page(page); return NULL; } bio->bi_sector = sector; bio->bi_bdev = sb->s_bdev; bio_add_page(bio, page, PAGE_SIZE, 0); bio->bi_end_io = end_bio_io_page; bio->bi_private = page; bio_get(bio); submit_bio( READ_SYNC, bio); wait_on_page_locked(page); bio_put(bio); if (!PageUptodate(page)) { __free_page(page); return NULL; } return page; } /////////////////////////////////////////////////////////// // t2tb_read // // /////////////////////////////////////////////////////////// static int t2tb_read( struct super_block *sb, sector_t sector ) { struct page* page; printk(KERN_NOTICE ": read(0x%llx)\n", sector ); page = t2tb_read_page( sb, sector ); if ( NULL == page ) return -1; __free_page( page ); return 0; } #else /////////////////////////////////////////////////////////// // t2tb_read // // /////////////////////////////////////////////////////////// static int t2tb_read( struct super_block *sb, sector_t sector ) { struct buffer_head* bh; printk(KERN_NOTICE ": read(0x%llx)\n", sector ); bh = __bread( sb->s_bdev, sector, 512 ); if ( NULL == bh ) return -1; __brelse(bh); return 0; } #endif /////////////////////////////////////////////////////////// // t2tb_read_super // // /////////////////////////////////////////////////////////// static int t2tb_read_super( struct super_block * sb, void * data, int silent ) { set_blocksize( sb->s_bdev, 512 ); sb->s_blocksize = 512; sb->s_blocksize_bits = 9; printk(KERN_NOTICE "t2tb_read_super: enter\n" ); t2tb_read( sb, 0 ); t2tb_read( sb, 0x100000000ull-1 ); // // If 't2tb_read' uses __bread then the next function will never return. // Infinite loop at "__find_get_block_slow() failed" occurs // t2tb_read( sb, 0x100000000ull ); printk(KERN_NOTICE "t2tb_read_super: exit\n" ); return -EINVAL; } #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18) static int t2tb_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount* mnt) { return get_sb_bdev(fs_type, flags, dev_name, data, t2tb_read_super, mnt ); } #else static struct super_block * t2tb_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return get_sb_bdev(fs_type, flags, dev_name, data, t2tb_read_super ); } #endif static struct file_system_type t2tb_fs_type = { .owner = THIS_MODULE, .name = "t2tb", .get_sb = t2tb_get_sb, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; MODULE_INFO(vermagic, VERMAGIC_STRING); struct module __this_module __attribute__((section(".gnu.linkonce.this_module"))) = { .name = "t2tb", .init = init_module, .exit = cleanup_module, }; /////////////////////////////////////////////////////////// // t2tb_init // // module init function /////////////////////////////////////////////////////////// static int __init t2tb_init(void) { int ret = register_filesystem(&t2tb_fs_type); if ( 0 != ret ) return ret; printk(KERN_NOTICE ": t2tb driver loaded\n"); return 0; } /////////////////////////////////////////////////////////// // t2tb_exit // // module exit function /////////////////////////////////////////////////////////// static void __exit t2tb_exit(void) { unregister_filesystem(&t2tb_fs_type); printk(KERN_NOTICE ": t2tb driver unloaded\n"); } module_init(t2tb_init) module_exit(t2tb_exit)
Reply-To: akpm@linux-foundation.org well yes. I suppose this: From: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> --- fs/buffer.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff -puN fs/buffer.c~a fs/buffer.c --- a/fs/buffer.c~a +++ a/fs/buffer.c @@ -1113,6 +1113,20 @@ __getblk_slow(struct block_device *bdev, return NULL; } +#if (BITS_PER_LONG == 32) && defined(CONFIG_LBD) + if ((block >> (PAGE_CACHE_SHIFT - bdev->bd_inode->i_blkbits)) & + 0xffffffff00000000UL) { + /* + * We'll fail because the block is outside the range + * which a 32-bit pagecache index can address + */ + printk(KERN_ERR "getblk(): sector number too large for 32-bit" + "machines\n"); + dump_stack(); + return NULL; + } +#endif + for (;;) { struct buffer_head * bh; int ret; _ will stop it hanging, but I'm not sure that it justifies the additional overhead?
Lets look my case: block = 0x100000000 PAGE_CACHE_SHIFT = 12 bdev->bd_inode->i_blkbits = 9 ( 'cause set_blocksize( sb->s_bdev, 512 ); ) This means that: (block >> (PAGE_CACHE_SHIFT - bdev->bd_inode->i_blkbits)) = 0x20000000 It fits into 32 bit!
Is this still an issue with a recent kernel?
I don't have enough knowledge on this but I have one question out of curiosity: Why are we doing this check in the __getblk_slow? I think we move do this check and also the check for block size which we are doing currently in __getblk_slow to __getblk i.e we can do this check before even looking for this block in CPU's lru list and page cache.