Bug 9171

Summary: __bread and 2TiB problem
Product: IO/Storage Reporter: Alexander Mamaev (shura_mam)
Component: Block LayerAssignee: Jens Axboe (axboe)
Status: CLOSED OBSOLETE    
Severity: normal CC: akpm, alan, erik.andren, pmkernelbugs
Priority: P1    
Hardware: All   
OS: Linux   
Kernel Version: 2.6.22 Subsystem:
Regression: No Bisected commit-id:

Description Alexander Mamaev 2007-10-16 08:47:02 UTC
Most recent kernel where this bug did not occur:
Distribution: SUSE 10.1 (2.6.18) and Debian (2.6.22)
Hardware Environment:
Software Environment: 
Problem Description:

Steps to reproduce:

Compile the following module and try to mount any device of size > 2TiB.
and you will see the infinite loop
"__find_get_block_slow() failed"

///////////////////////////////////////////////////////////
// This example shows __bread error if block >= 2^32
//
//
///////////////////////////////////////////////////////////

#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/vermagic.h>

#define USE_BREAD "Activate this macros to get the __bread error"

#ifndef USE_BREAD

///////////////////////////////////////////////////////////
// end_bio_io_page
//
//
///////////////////////////////////////////////////////////
static int
end_bio_io_page(
    struct bio *bio,
    unsigned int bytes_done,
    int error
    )
{
  struct page *page = bio->bi_private;
  if (bio->bi_size)
    return 1;

  if (!error)
    SetPageUptodate(page);
  else
    printk(KERN_WARNING "t2tb: error %d reading page\n", error);
  unlock_page(page);
  bio_put(bio);
  return 0;
}


///////////////////////////////////////////////////////////
// t2tb_read_page
//
//
///////////////////////////////////////////////////////////
struct page*
t2tb_read_page(
    struct super_block *sb,
    sector_t sector
    )
{
  struct page *page;
  struct bio *bio;

  page = alloc_page(GFP_KERNEL);
  if ( unlikely(!page) )
    return NULL;

  ClearPageUptodate(page);
  ClearPageDirty(page);
  lock_page(page);

  bio = bio_alloc(GFP_KERNEL, 1);
  if (unlikely(!bio)) {
    __free_page(page);
    return NULL;
  }

  bio->bi_sector  = sector;
  bio->bi_bdev    = sb->s_bdev;
  bio_add_page(bio, page, PAGE_SIZE, 0);

  bio->bi_end_io  = end_bio_io_page;
  bio->bi_private = page;
  bio_get(bio);
  submit_bio( READ_SYNC, bio);
  wait_on_page_locked(page);
  bio_put(bio);
  if (!PageUptodate(page)) {
    __free_page(page);
    return NULL;
  }

  return page;
}


///////////////////////////////////////////////////////////
// t2tb_read
//
//
///////////////////////////////////////////////////////////
static int
t2tb_read(
    struct super_block *sb,
    sector_t sector
    )
{
  struct page* page;
  printk(KERN_NOTICE ": read(0x%llx)\n", sector );

  page = t2tb_read_page( sb, sector );

  if ( NULL == page )
    return -1;

  __free_page( page );
  return 0;
}

#else

///////////////////////////////////////////////////////////
// t2tb_read
//
//
///////////////////////////////////////////////////////////
static int
t2tb_read(
    struct super_block *sb,
    sector_t sector
    )
{
  struct buffer_head* bh;
  printk(KERN_NOTICE ": read(0x%llx)\n", sector );

  bh = __bread( sb->s_bdev, sector, 512 );
  if ( NULL == bh )
    return -1;

  __brelse(bh);
  return 0;
}

#endif



///////////////////////////////////////////////////////////
// t2tb_read_super
//
//
///////////////////////////////////////////////////////////
static int
t2tb_read_super(
    struct super_block * sb,
    void * data,
    int silent
    )
{
  set_blocksize( sb->s_bdev, 512 );
  sb->s_blocksize      = 512;
  sb->s_blocksize_bits = 9;

  printk(KERN_NOTICE "t2tb_read_super: enter\n" );

  t2tb_read( sb, 0 );
  t2tb_read( sb, 0x100000000ull-1 );
  //
  // If 't2tb_read' uses __bread then the next function will never return.
  // Infinite loop at "__find_get_block_slow() failed" occurs
  //
  t2tb_read( sb, 0x100000000ull );

  printk(KERN_NOTICE "t2tb_read_super: exit\n" );

  return -EINVAL;
}


#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
static int
t2tb_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name,
    void *data, struct vfsmount* mnt)
{
  return get_sb_bdev(fs_type, flags, dev_name, data, t2tb_read_super, mnt );
}
#else
static struct super_block *
t2tb_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name,
    void *data)
{
  return get_sb_bdev(fs_type, flags, dev_name, data, t2tb_read_super );
}
#endif

static struct file_system_type t2tb_fs_type = {
    .owner      = THIS_MODULE,
    .name       = "t2tb",
    .get_sb     = t2tb_get_sb,
    .kill_sb    = kill_block_super,
    .fs_flags   = FS_REQUIRES_DEV,
};


MODULE_INFO(vermagic, VERMAGIC_STRING);
struct module __this_module
__attribute__((section(".gnu.linkonce.this_module"))) = {
    .name = "t2tb",
    .init = init_module,
    .exit = cleanup_module,
};



///////////////////////////////////////////////////////////
// t2tb_init
//
// module init function
///////////////////////////////////////////////////////////
static int
__init t2tb_init(void)
{
  int ret = register_filesystem(&t2tb_fs_type);
  if ( 0 != ret )
    return ret;

  printk(KERN_NOTICE ": t2tb driver loaded\n");
  return 0;
}


///////////////////////////////////////////////////////////
// t2tb_exit
//
// module exit function
///////////////////////////////////////////////////////////
static void
__exit t2tb_exit(void)
{
  unregister_filesystem(&t2tb_fs_type);
  printk(KERN_NOTICE ": t2tb driver unloaded\n");
}

module_init(t2tb_init)
module_exit(t2tb_exit)
Comment 1 Anonymous Emailer 2007-10-16 09:30:49 UTC
Reply-To: akpm@linux-foundation.org

well yes.  I suppose this:

From: Andrew Morton <akpm@linux-foundation.org>

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/buffer.c |   14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff -puN fs/buffer.c~a fs/buffer.c
--- a/fs/buffer.c~a
+++ a/fs/buffer.c
@@ -1113,6 +1113,20 @@ __getblk_slow(struct block_device *bdev,
 		return NULL;
 	}
 
+#if (BITS_PER_LONG == 32) && defined(CONFIG_LBD)
+	if ((block >> (PAGE_CACHE_SHIFT - bdev->bd_inode->i_blkbits)) &
+			0xffffffff00000000UL) {
+		/*
+		 * We'll fail because the block is outside the range
+		 * which a 32-bit pagecache index can address
+		 */
+		printk(KERN_ERR "getblk(): sector number too large for 32-bit"
+				"machines\n");
+		dump_stack();
+		return NULL;
+	}
+#endif
+
 	for (;;) {
 		struct buffer_head * bh;
 		int ret;
_

will stop it hanging, but I'm not sure that it justifies the additional
overhead?
Comment 2 Alexander Mamaev 2007-10-16 10:06:51 UTC
Lets look my case:
block                     = 0x100000000
PAGE_CACHE_SHIFT          = 12
bdev->bd_inode->i_blkbits = 9    ( 'cause set_blocksize( sb->s_bdev, 512 ); )

This means that:
(block >> (PAGE_CACHE_SHIFT - bdev->bd_inode->i_blkbits)) = 0x20000000

It fits into 32 bit!
Comment 3 Erik Andr 2009-12-30 12:55:04 UTC
Is this still an issue with a recent kernel?
Comment 4 Piyush 2011-06-24 11:35:28 UTC
I don't have enough knowledge on this but I have one question out of curiosity:

Why are we doing this check in the __getblk_slow? I think we move do this check and also the check for block size which we are doing currently in __getblk_slow to 
__getblk i.e we can do this check before even looking for this block in CPU's lru list and page cache.