Bug 9171 - __bread and 2TiB problem
Summary: __bread and 2TiB problem
Status: CLOSED OBSOLETE
Alias: None
Product: IO/Storage
Classification: Unclassified
Component: Block Layer (show other bugs)
Hardware: All Linux
: P1 normal
Assignee: Jens Axboe
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2007-10-16 08:47 UTC by Alexander Mamaev
Modified: 2012-05-17 14:49 UTC (History)
4 users (show)

See Also:
Kernel Version: 2.6.22
Subsystem:
Regression: No
Bisected commit-id:


Attachments

Description Alexander Mamaev 2007-10-16 08:47:02 UTC
Most recent kernel where this bug did not occur:
Distribution: SUSE 10.1 (2.6.18) and Debian (2.6.22)
Hardware Environment:
Software Environment: 
Problem Description:

Steps to reproduce:

Compile the following module and try to mount any device of size > 2TiB.
and you will see the infinite loop
"__find_get_block_slow() failed"

///////////////////////////////////////////////////////////
// This example shows __bread error if block >= 2^32
//
//
///////////////////////////////////////////////////////////

#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/vermagic.h>

#define USE_BREAD "Activate this macros to get the __bread error"

#ifndef USE_BREAD

///////////////////////////////////////////////////////////
// end_bio_io_page
//
//
///////////////////////////////////////////////////////////
static int
end_bio_io_page(
    struct bio *bio,
    unsigned int bytes_done,
    int error
    )
{
  struct page *page = bio->bi_private;
  if (bio->bi_size)
    return 1;

  if (!error)
    SetPageUptodate(page);
  else
    printk(KERN_WARNING "t2tb: error %d reading page\n", error);
  unlock_page(page);
  bio_put(bio);
  return 0;
}


///////////////////////////////////////////////////////////
// t2tb_read_page
//
//
///////////////////////////////////////////////////////////
struct page*
t2tb_read_page(
    struct super_block *sb,
    sector_t sector
    )
{
  struct page *page;
  struct bio *bio;

  page = alloc_page(GFP_KERNEL);
  if ( unlikely(!page) )
    return NULL;

  ClearPageUptodate(page);
  ClearPageDirty(page);
  lock_page(page);

  bio = bio_alloc(GFP_KERNEL, 1);
  if (unlikely(!bio)) {
    __free_page(page);
    return NULL;
  }

  bio->bi_sector  = sector;
  bio->bi_bdev    = sb->s_bdev;
  bio_add_page(bio, page, PAGE_SIZE, 0);

  bio->bi_end_io  = end_bio_io_page;
  bio->bi_private = page;
  bio_get(bio);
  submit_bio( READ_SYNC, bio);
  wait_on_page_locked(page);
  bio_put(bio);
  if (!PageUptodate(page)) {
    __free_page(page);
    return NULL;
  }

  return page;
}


///////////////////////////////////////////////////////////
// t2tb_read
//
//
///////////////////////////////////////////////////////////
static int
t2tb_read(
    struct super_block *sb,
    sector_t sector
    )
{
  struct page* page;
  printk(KERN_NOTICE ": read(0x%llx)\n", sector );

  page = t2tb_read_page( sb, sector );

  if ( NULL == page )
    return -1;

  __free_page( page );
  return 0;
}

#else

///////////////////////////////////////////////////////////
// t2tb_read
//
//
///////////////////////////////////////////////////////////
static int
t2tb_read(
    struct super_block *sb,
    sector_t sector
    )
{
  struct buffer_head* bh;
  printk(KERN_NOTICE ": read(0x%llx)\n", sector );

  bh = __bread( sb->s_bdev, sector, 512 );
  if ( NULL == bh )
    return -1;

  __brelse(bh);
  return 0;
}

#endif



///////////////////////////////////////////////////////////
// t2tb_read_super
//
//
///////////////////////////////////////////////////////////
static int
t2tb_read_super(
    struct super_block * sb,
    void * data,
    int silent
    )
{
  set_blocksize( sb->s_bdev, 512 );
  sb->s_blocksize      = 512;
  sb->s_blocksize_bits = 9;

  printk(KERN_NOTICE "t2tb_read_super: enter\n" );

  t2tb_read( sb, 0 );
  t2tb_read( sb, 0x100000000ull-1 );
  //
  // If 't2tb_read' uses __bread then the next function will never return.
  // Infinite loop at "__find_get_block_slow() failed" occurs
  //
  t2tb_read( sb, 0x100000000ull );

  printk(KERN_NOTICE "t2tb_read_super: exit\n" );

  return -EINVAL;
}


#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
static int
t2tb_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name,
    void *data, struct vfsmount* mnt)
{
  return get_sb_bdev(fs_type, flags, dev_name, data, t2tb_read_super, mnt );
}
#else
static struct super_block *
t2tb_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name,
    void *data)
{
  return get_sb_bdev(fs_type, flags, dev_name, data, t2tb_read_super );
}
#endif

static struct file_system_type t2tb_fs_type = {
    .owner      = THIS_MODULE,
    .name       = "t2tb",
    .get_sb     = t2tb_get_sb,
    .kill_sb    = kill_block_super,
    .fs_flags   = FS_REQUIRES_DEV,
};


MODULE_INFO(vermagic, VERMAGIC_STRING);
struct module __this_module
__attribute__((section(".gnu.linkonce.this_module"))) = {
    .name = "t2tb",
    .init = init_module,
    .exit = cleanup_module,
};



///////////////////////////////////////////////////////////
// t2tb_init
//
// module init function
///////////////////////////////////////////////////////////
static int
__init t2tb_init(void)
{
  int ret = register_filesystem(&t2tb_fs_type);
  if ( 0 != ret )
    return ret;

  printk(KERN_NOTICE ": t2tb driver loaded\n");
  return 0;
}


///////////////////////////////////////////////////////////
// t2tb_exit
//
// module exit function
///////////////////////////////////////////////////////////
static void
__exit t2tb_exit(void)
{
  unregister_filesystem(&t2tb_fs_type);
  printk(KERN_NOTICE ": t2tb driver unloaded\n");
}

module_init(t2tb_init)
module_exit(t2tb_exit)
Comment 1 Anonymous Emailer 2007-10-16 09:30:49 UTC
Reply-To: akpm@linux-foundation.org

well yes.  I suppose this:

From: Andrew Morton <akpm@linux-foundation.org>

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/buffer.c |   14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff -puN fs/buffer.c~a fs/buffer.c
--- a/fs/buffer.c~a
+++ a/fs/buffer.c
@@ -1113,6 +1113,20 @@ __getblk_slow(struct block_device *bdev,
 		return NULL;
 	}
 
+#if (BITS_PER_LONG == 32) && defined(CONFIG_LBD)
+	if ((block >> (PAGE_CACHE_SHIFT - bdev->bd_inode->i_blkbits)) &
+			0xffffffff00000000UL) {
+		/*
+		 * We'll fail because the block is outside the range
+		 * which a 32-bit pagecache index can address
+		 */
+		printk(KERN_ERR "getblk(): sector number too large for 32-bit"
+				"machines\n");
+		dump_stack();
+		return NULL;
+	}
+#endif
+
 	for (;;) {
 		struct buffer_head * bh;
 		int ret;
_

will stop it hanging, but I'm not sure that it justifies the additional
overhead?
Comment 2 Alexander Mamaev 2007-10-16 10:06:51 UTC
Lets look my case:
block                     = 0x100000000
PAGE_CACHE_SHIFT          = 12
bdev->bd_inode->i_blkbits = 9    ( 'cause set_blocksize( sb->s_bdev, 512 ); )

This means that:
(block >> (PAGE_CACHE_SHIFT - bdev->bd_inode->i_blkbits)) = 0x20000000

It fits into 32 bit!
Comment 3 Erik Andr 2009-12-30 12:55:04 UTC
Is this still an issue with a recent kernel?
Comment 4 Piyush 2011-06-24 11:35:28 UTC
I don't have enough knowledge on this but I have one question out of curiosity:

Why are we doing this check in the __getblk_slow? I think we move do this check and also the check for block size which we are doing currently in __getblk_slow to 
__getblk i.e we can do this check before even looking for this block in CPU's lru list and page cache.

Note You need to log in before you can comment on or make changes to this bug.