--- linux-2.6.17-ming/fs/ext2/super.c | 25 linux-2.6.17-ming/fs/ext3/Makefile | 2 linux-2.6.17-ming/fs/ext3/balloc.c | 302 +-- linux-2.6.17-ming/fs/ext3/dir.c | 3 linux-2.6.17-ming/fs/ext3/extents.c | 2103 ++++++++++++++++++++++ linux-2.6.17-ming/fs/ext3/ialloc.c | 38 linux-2.6.17-ming/fs/ext3/inode.c | 91 linux-2.6.17-ming/fs/ext3/ioctl.c | 3 linux-2.6.17-ming/fs/ext3/resize.c | 114 - linux-2.6.17-ming/fs/ext3/super.c | 145 - linux-2.6.17-ming/fs/ext3/xattr.c | 27 linux-2.6.17-ming/fs/file_table.c | 2 linux-2.6.17-ming/fs/jbd/commit.c | 19 linux-2.6.17-ming/fs/jbd/journal.c | 27 linux-2.6.17-ming/fs/jbd/recovery.c | 51 linux-2.6.17-ming/fs/jbd/revoke.c | 38 linux-2.6.17-ming/include/asm-h8300/types.h | 1 linux-2.6.17-ming/include/asm-i386/types.h | 1 linux-2.6.17-ming/include/asm-mips/types.h | 5 linux-2.6.17-ming/include/asm-powerpc/types.h | 5 linux-2.6.17-ming/include/asm-s390/types.h | 5 linux-2.6.17-ming/include/asm-sh/types.h | 1 linux-2.6.17-ming/include/asm-x86_64/types.h | 1 linux-2.6.17-ming/include/linux/ext3_fs.h | 182 + linux-2.6.17-ming/include/linux/ext3_fs_extents.h | 196 ++ linux-2.6.17-ming/include/linux/ext3_fs_i.h | 29 linux-2.6.17-ming/include/linux/ext3_fs_sb.h | 10 linux-2.6.17-ming/include/linux/ext3_jbd.h | 19 linux-2.6.17-ming/include/linux/jbd.h | 31 linux-2.6.17-ming/include/linux/percpu_counter.h | 36 linux-2.6.17-ming/include/linux/types.h | 1 linux-2.6.17-ming/mm/swap.c | 12 32 files changed, 3089 insertions(+), 436 deletions(-) diff -puN fs/ext2/super.c~ext3-2.6.17 fs/ext2/super.c --- linux-2.6.17/fs/ext2/super.c~ext3-2.6.17 2006-06-27 12:57:54.816372167 -0700 +++ linux-2.6.17-ming/fs/ext2/super.c 2006-06-27 12:57:55.043344313 -0700 @@ -834,9 +834,6 @@ static int ext2_fill_super(struct super_ printk ("EXT2-fs: not enough memory\n"); goto failed_mount; } - percpu_counter_init(&sbi->s_freeblocks_counter); - percpu_counter_init(&sbi->s_freeinodes_counter); - percpu_counter_init(&sbi->s_dirs_counter); bgl_lock_init(&sbi->s_blockgroup_lock); sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts), GFP_KERNEL); @@ -863,6 +860,13 @@ static int ext2_fill_super(struct super_ sbi->s_gdb_count = db_count; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); + + percpu_counter_init(&sbi->s_freeblocks_counter, + ext2_count_free_blocks(sb)); + percpu_counter_init(&sbi->s_freeinodes_counter, + ext2_count_free_inodes(sb)); + percpu_counter_init(&sbi->s_dirs_counter, + ext2_count_dirs(sb)); /* * set up enough so that it can read an inode */ @@ -874,24 +878,18 @@ static int ext2_fill_super(struct super_ if (!sb->s_root) { iput(root); printk(KERN_ERR "EXT2-fs: get root inode failed\n"); - goto failed_mount2; + goto failed_mount3; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { dput(sb->s_root); sb->s_root = NULL; printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n"); - goto failed_mount2; + goto failed_mount3; } if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) ext2_warning(sb, __FUNCTION__, "mounting ext3 filesystem as ext2"); ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); - percpu_counter_mod(&sbi->s_freeblocks_counter, - ext2_count_free_blocks(sb)); - percpu_counter_mod(&sbi->s_freeinodes_counter, - ext2_count_free_inodes(sb)); - percpu_counter_mod(&sbi->s_dirs_counter, - ext2_count_dirs(sb)); return 0; cantfind_ext2: @@ -899,7 +897,10 @@ cantfind_ext2: printk("VFS: Can't find an ext2 filesystem on dev %s.\n", sb->s_id); goto failed_mount; - +failed_mount3: + percpu_counter_destroy(&sbi->s_freeblocks_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); + percpu_counter_destroy(&sbi->s_dirs_counter); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); diff -puN fs/ext3/balloc.c~ext3-2.6.17 fs/ext3/balloc.c --- linux-2.6.17/fs/ext3/balloc.c~ext3-2.6.17 2006-06-27 12:57:54.819371798 -0700 +++ linux-2.6.17-ming/fs/ext3/balloc.c 2006-06-27 12:57:55.062341982 -0700 @@ -38,7 +38,6 @@ #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) - struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, unsigned int block_group, struct buffer_head ** bh) @@ -89,12 +88,16 @@ read_block_bitmap(struct super_block *sb desc = ext3_get_group_desc (sb, block_group, NULL); if (!desc) goto error_out; - bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); + bh = sb_bread(sb, + EXT3_BLOCK_BITMAP(desc, + ext3_group_first_block_no(sb, block_group))); if (!bh) ext3_error (sb, "read_block_bitmap", "Cannot read block bitmap - " - "block_group = %d, block_bitmap = %u", - block_group, le32_to_cpu(desc->bg_block_bitmap)); + "block_group = %d, block_bitmap = "E3FSBLK, + block_group, + EXT3_BLOCK_BITMAP(desc, + ext3_group_first_block_no(sb, block_group))); error_out: return bh; } @@ -163,20 +166,19 @@ restart: #endif static int -goal_in_my_reservation(struct ext3_reserve_window *rsv, int goal, +goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal, unsigned int group, struct super_block * sb) { - unsigned long group_first_block, group_last_block; + ext3_fsblk_t group_first_block, group_last_block; - group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + - group * EXT3_BLOCKS_PER_GROUP(sb); + group_first_block = ext3_group_first_block_no(sb, group); group_last_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; if ((rsv->_rsv_start > group_last_block) || (rsv->_rsv_end < group_first_block)) return 0; - if ((goal >= 0) && ((goal + group_first_block < rsv->_rsv_start) - || (goal + group_first_block > rsv->_rsv_end))) + if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) + || (grp_goal + group_first_block > rsv->_rsv_end))) return 0; return 1; } @@ -187,7 +189,7 @@ goal_in_my_reservation(struct ext3_reser * Returns NULL if there are no windows or if all windows start after the goal. */ static struct ext3_reserve_window_node * -search_reserve_window(struct rb_root *root, unsigned long goal) +search_reserve_window(struct rb_root *root, ext3_fsblk_t goal) { struct rb_node *n = root->rb_node; struct ext3_reserve_window_node *rsv; @@ -223,7 +225,7 @@ void ext3_rsv_window_add(struct super_bl { struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root; struct rb_node *node = &rsv->rsv_node; - unsigned int start = rsv->rsv_start; + ext3_fsblk_t start = rsv->rsv_start; struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; @@ -310,30 +312,30 @@ void ext3_discard_reservation(struct ino /* Free given blocks, update quota and i_blocks field */ void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb, - unsigned long block, unsigned long count, - int *pdquot_freed_blocks) + ext3_fsblk_t block, unsigned long count, + unsigned long *pdquot_freed_blocks) { struct buffer_head *bitmap_bh = NULL; struct buffer_head *gd_bh; unsigned long block_group; - unsigned long bit; + ext3_grpblk_t bit; unsigned long i; unsigned long overflow; struct ext3_group_desc * desc; struct ext3_super_block * es; struct ext3_sb_info *sbi; int err = 0, ret; - unsigned group_freed; + ext3_grpblk_t group_freed; *pdquot_freed_blocks = 0; sbi = EXT3_SB(sb); es = sbi->s_es; if (block < le32_to_cpu(es->s_first_data_block) || block + count < block || - block + count > le32_to_cpu(es->s_blocks_count)) { + block + count > EXT3_BLOCKS_COUNT(es)) { ext3_error (sb, "ext3_free_blocks", "Freeing blocks not in datazone - " - "block = %lu, count = %lu", block, count); + "block = "E3FSBLK", count = %lu", block, count); goto error_return; } @@ -341,10 +343,7 @@ void ext3_free_blocks_sb(handle_t *handl do_more: overflow = 0; - block_group = (block - le32_to_cpu(es->s_first_data_block)) / - EXT3_BLOCKS_PER_GROUP(sb); - bit = (block - le32_to_cpu(es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb); + ext3_get_group_no_and_offset(sb, block, &block_group, &bit); /* * Check to see if we are freeing blocks across a group * boundary. @@ -361,15 +360,23 @@ do_more: if (!desc) goto error_return; - if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) || - in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) || - in_range (block, le32_to_cpu(desc->bg_inode_table), + if (in_range (EXT3_BLOCK_BITMAP(desc, + ext3_group_first_block_no(sb, block_group)), + block, count) || + in_range (EXT3_INODE_BITMAP(desc, + ext3_group_first_block_no(sb, block_group)), + block, count) || + in_range (block, + EXT3_INODE_TABLE(desc, + ext3_group_first_block_no(sb, block_group)), sbi->s_itb_per_group) || - in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), + in_range (block + count - 1, + EXT3_INODE_TABLE(desc, + ext3_group_first_block_no(sb, block_group)), sbi->s_itb_per_group)) ext3_error (sb, "ext3_free_blocks", "Freeing blocks in system zones - " - "Block = %lu, count = %lu", + "Block = "E3FSBLK", count = %lu", block, count); /* @@ -453,7 +460,8 @@ do_more: bit + i, bitmap_bh->b_data)) { jbd_unlock_bh_state(bitmap_bh); ext3_error(sb, __FUNCTION__, - "bit already cleared for block %lu", block + i); + "bit already cleared for block "E3FSBLK, + block + i); jbd_lock_bh_state(bitmap_bh); BUFFER_TRACE(bitmap_bh, "bit already cleared"); } else { @@ -493,10 +501,10 @@ error_return: /* Free given blocks, update quota and i_blocks field */ void ext3_free_blocks(handle_t *handle, struct inode *inode, - unsigned long block, unsigned long count) + ext3_fsblk_t block, unsigned long count) { struct super_block * sb; - int dquot_freed_blocks; + unsigned long dquot_freed_blocks; sb = inode->i_sb; if (!sb) { @@ -525,7 +533,7 @@ void ext3_free_blocks(handle_t *handle, * data-writes at some point, and disable it for metadata allocations or * sync-data inodes. */ -static int ext3_test_allocatable(int nr, struct buffer_head *bh) +static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh) { int ret; struct journal_head *jh = bh2jh(bh); @@ -542,11 +550,11 @@ static int ext3_test_allocatable(int nr, return ret; } -static int -bitmap_search_next_usable_block(int start, struct buffer_head *bh, - int maxblocks) +static ext3_grpblk_t +bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, + ext3_grpblk_t maxblocks) { - int next; + ext3_grpblk_t next; struct journal_head *jh = bh2jh(bh); /* @@ -576,10 +584,11 @@ bitmap_search_next_usable_block(int star * the initial goal; then for a free byte somewhere in the bitmap; then * for any free bit in the bitmap. */ -static int -find_next_usable_block(int start, struct buffer_head *bh, int maxblocks) +static ext3_grpblk_t +find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, + ext3_grpblk_t maxblocks) { - int here, next; + ext3_grpblk_t here, next; char *p, *r; if (start > 0) { @@ -591,7 +600,7 @@ find_next_usable_block(int start, struct * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the * next 64-bit boundary is simple.. */ - int end_goal = (start + 63) & ~63; + ext3_grpblk_t end_goal = (start + 63) & ~63; if (end_goal > maxblocks) end_goal = maxblocks; here = ext3_find_next_zero_bit(bh->b_data, end_goal, start); @@ -628,7 +637,7 @@ find_next_usable_block(int start, struct * zero (failure). */ static inline int -claim_block(spinlock_t *lock, int block, struct buffer_head *bh) +claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh) { struct journal_head *jh = bh2jh(bh); int ret; @@ -651,19 +660,18 @@ claim_block(spinlock_t *lock, int block, * new bitmap. In that case we must release write access to the old one via * ext3_journal_release_buffer(), else we'll run out of credits. */ -static int +static ext3_grpblk_t ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, - struct buffer_head *bitmap_bh, int goal, + struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal, unsigned long *count, struct ext3_reserve_window *my_rsv) { - int group_first_block, start, end; + ext3_fsblk_t group_first_block; + ext3_grpblk_t start, end; unsigned long num = 0; /* we do allocation within the reservation window if we have a window */ if (my_rsv) { - group_first_block = - le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + - group * EXT3_BLOCKS_PER_GROUP(sb); + group_first_block = ext3_group_first_block_no(sb, group); if (my_rsv->_rsv_start >= group_first_block) start = my_rsv->_rsv_start - group_first_block; else @@ -673,13 +681,13 @@ ext3_try_to_allocate(struct super_block if (end > EXT3_BLOCKS_PER_GROUP(sb)) /* reservation window crosses group boundary */ end = EXT3_BLOCKS_PER_GROUP(sb); - if ((start <= goal) && (goal < end)) - start = goal; + if ((start <= grp_goal) && (grp_goal < end)) + start = grp_goal; else - goal = -1; + grp_goal = -1; } else { - if (goal > 0) - start = goal; + if (grp_goal > 0) + start = grp_goal; else start = 0; end = EXT3_BLOCKS_PER_GROUP(sb); @@ -688,43 +696,43 @@ ext3_try_to_allocate(struct super_block BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb)); repeat: - if (goal < 0 || !ext3_test_allocatable(goal, bitmap_bh)) { - goal = find_next_usable_block(start, bitmap_bh, end); - if (goal < 0) + if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) { + grp_goal = find_next_usable_block(start, bitmap_bh, end); + if (grp_goal < 0) goto fail_access; if (!my_rsv) { int i; - for (i = 0; i < 7 && goal > start && - ext3_test_allocatable(goal - 1, + for (i = 0; i < 7 && grp_goal > start && + ext3_test_allocatable(grp_goal - 1, bitmap_bh); - i++, goal--) + i++, grp_goal--) ; } } - start = goal; + start = grp_goal; - if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) { + if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) { /* * The block was allocated by another thread, or it was * allocated and then freed by another thread */ start++; - goal++; + grp_goal++; if (start >= end) goto fail_access; goto repeat; } num++; - goal++; - while (num < *count && goal < end - && ext3_test_allocatable(goal, bitmap_bh) - && claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) { + grp_goal++; + while (num < *count && grp_goal < end + && ext3_test_allocatable(grp_goal, bitmap_bh) + && claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) { num++; - goal++; + grp_goal++; } *count = num; - return goal - num; + return grp_goal - num; fail_access: *count = num; return -1; @@ -766,12 +774,13 @@ fail_access: static int find_next_reservable_window( struct ext3_reserve_window_node *search_head, struct ext3_reserve_window_node *my_rsv, - struct super_block * sb, int start_block, - int last_block) + struct super_block * sb, + ext3_fsblk_t start_block, + ext3_fsblk_t last_block) { struct rb_node *next; struct ext3_reserve_window_node *rsv, *prev; - int cur; + ext3_fsblk_t cur; int size = my_rsv->rsv_goal_size; /* TODO: make the start of the reservation window byte-aligned */ @@ -873,10 +882,10 @@ static int find_next_reservable_window( * * @rsv: the reservation * - * @goal: The goal (group-relative). It is where the search for a + * @grp_goal: The goal (group-relative). It is where the search for a * free reservable space should start from. - * if we have a goal(goal >0 ), then start from there, - * no goal(goal = -1), we start from the first block + * if we have a grp_goal(grp_goal >0 ), then start from there, + * no grp_goal(grp_goal = -1), we start from the first block * of the group. * * @sb: the super block @@ -885,25 +894,24 @@ static int find_next_reservable_window( * */ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, - int goal, struct super_block *sb, + ext3_grpblk_t grp_goal, struct super_block *sb, unsigned int group, struct buffer_head *bitmap_bh) { struct ext3_reserve_window_node *search_head; - int group_first_block, group_end_block, start_block; - int first_free_block; + ext3_fsblk_t group_first_block, group_end_block, start_block; + ext3_grpblk_t first_free_block; struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; unsigned long size; int ret; spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; - group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + - group * EXT3_BLOCKS_PER_GROUP(sb); + group_first_block = ext3_group_first_block_no(sb, group); group_end_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; - if (goal < 0) + if (grp_goal < 0) start_block = group_first_block; else - start_block = goal + group_first_block; + start_block = grp_goal + group_first_block; size = my_rsv->rsv_goal_size; @@ -1057,14 +1065,15 @@ static void try_to_extend_reservation(st * sorted double linked list should be fast. * */ -static int +static ext3_grpblk_t ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, unsigned int group, struct buffer_head *bitmap_bh, - int goal, struct ext3_reserve_window_node * my_rsv, + ext3_grpblk_t grp_goal, + struct ext3_reserve_window_node * my_rsv, unsigned long *count, int *errp) { - unsigned long group_first_block; - int ret = 0; + ext3_fsblk_t group_first_block; + ext3_grpblk_t ret = 0; int fatal; unsigned long num = *count; @@ -1090,17 +1099,16 @@ ext3_try_to_allocate_with_rsv(struct sup */ if (my_rsv == NULL ) { ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, - goal, count, NULL); + grp_goal, count, NULL); goto out; } /* - * goal is a group relative block number (if there is a goal) - * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb) + * grp_goal is a group relative block number (if there is a goal) + * 0 < grp_goal < EXT3_BLOCKS_PER_GROUP(sb) * first block is a filesystem wide block number * first block is the block number of the first block in this group */ - group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + - group * EXT3_BLOCKS_PER_GROUP(sb); + group_first_block = ext3_group_first_block_no(sb, group); /* * Basically we will allocate a new block from inode's reservation @@ -1119,24 +1127,24 @@ ext3_try_to_allocate_with_rsv(struct sup */ while (1) { if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || - !goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) { + !goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) { if (my_rsv->rsv_goal_size < *count) my_rsv->rsv_goal_size = *count; - ret = alloc_new_reservation(my_rsv, goal, sb, + ret = alloc_new_reservation(my_rsv, grp_goal, sb, group, bitmap_bh); if (ret < 0) break; /* failed */ - if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) - goal = -1; - } else if (goal > 0 && (my_rsv->rsv_end-goal+1) < *count) + if (!goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) + grp_goal = -1; + } else if (grp_goal > 0 && (my_rsv->rsv_end-grp_goal+1) < *count) try_to_extend_reservation(my_rsv, sb, - *count-my_rsv->rsv_end + goal - 1); + *count-my_rsv->rsv_end + grp_goal - 1); if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) || (my_rsv->rsv_end < group_first_block)) BUG(); - ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, + ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, grp_goal, &num, &my_rsv->rsv_window); if (ret >= 0) { my_rsv->rsv_alloc_hit += num; @@ -1164,10 +1172,10 @@ out: static int ext3_has_free_blocks(struct ext3_sb_info *sbi) { - int free_blocks, root_blocks; + ext3_fsblk_t free_blocks, root_blocks; free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); - root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); + root_blocks = EXT3_R_BLOCKS_COUNT(sbi->s_es); if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && sbi->s_resuid != current->fsuid && (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { @@ -1200,19 +1208,20 @@ int ext3_should_retry_alloc(struct super * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */ -int ext3_new_blocks(handle_t *handle, struct inode *inode, - unsigned long goal, unsigned long *count, int *errp) +ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, + ext3_fsblk_t goal, unsigned long *count, int *errp) { struct buffer_head *bitmap_bh = NULL; struct buffer_head *gdp_bh; - int group_no; + unsigned long group_no; int goal_group; - int ret_block; + ext3_grpblk_t grp_target_blk; /* blockgroup relative goal block */ + ext3_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ + ext3_fsblk_t ret_block; /* filesyetem-wide allocated block */ int bgi; /* blockgroup iteration index */ - int target_block; int fatal = 0, err; int performed_allocation = 0; - int free_blocks; + ext3_grpblk_t free_blocks; /* number of free blocks in a group */ struct super_block *sb; struct ext3_group_desc *gdp; struct ext3_super_block *es; @@ -1265,10 +1274,9 @@ int ext3_new_blocks(handle_t *handle, st * First, test whether the goal block is free. */ if (goal < le32_to_cpu(es->s_first_data_block) || - goal >= le32_to_cpu(es->s_blocks_count)) + goal >= EXT3_BLOCKS_COUNT(es)) goal = le32_to_cpu(es->s_first_data_block); - group_no = (goal - le32_to_cpu(es->s_first_data_block)) / - EXT3_BLOCKS_PER_GROUP(sb); + ext3_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk); gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); if (!gdp) goto io_error; @@ -1285,16 +1293,15 @@ retry: my_rsv = NULL; if (free_blocks > 0) { - ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb)); bitmap_bh = read_block_bitmap(sb, group_no); if (!bitmap_bh) goto io_error; - ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no, - bitmap_bh, ret_block, my_rsv, &num, &fatal); + grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle, + group_no, bitmap_bh, grp_target_blk, + my_rsv, &num, &fatal); if (fatal) goto out; - if (ret_block >= 0) + if (grp_alloc_blk >= 0) goto allocated; } @@ -1327,11 +1334,15 @@ retry: bitmap_bh = read_block_bitmap(sb, group_no); if (!bitmap_bh) goto io_error; - ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no, - bitmap_bh, -1, my_rsv, &num, &fatal); + /* + * try to allocate block(s) from this group, without a goal(-1). + */ + grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle, + group_no, bitmap_bh, -1, my_rsv, + &num, &fatal); if (fatal) goto out; - if (ret_block >= 0) + if (grp_alloc_blk >= 0) goto allocated; } /* @@ -1360,18 +1371,22 @@ allocated: if (fatal) goto out; - target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb) - + le32_to_cpu(es->s_first_data_block); + ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no); - if (in_range(le32_to_cpu(gdp->bg_block_bitmap), target_block, num) || - in_range(le32_to_cpu(gdp->bg_inode_bitmap), target_block, num) || - in_range(target_block, le32_to_cpu(gdp->bg_inode_table), + if (in_range(EXT3_BLOCK_BITMAP(gdp, ext3_group_first_block_no(sb, group_no)), + ret_block, num) || + in_range(EXT3_BLOCK_BITMAP(gdp, ext3_group_first_block_no(sb, group_no)), + ret_block, num) || + in_range(ret_block, EXT3_INODE_TABLE(gdp, + ext3_group_first_block_no(sb, group_no)), EXT3_SB(sb)->s_itb_per_group) || - in_range(target_block + num - 1, le32_to_cpu(gdp->bg_inode_table), + in_range(ret_block + num - 1, EXT3_INODE_TABLE(gdp, + ext3_group_first_block_no(sb, group_no)), EXT3_SB(sb)->s_itb_per_group)) ext3_error(sb, "ext3_new_block", "Allocating block in system zone - " - "blocks from %u, length %lu", target_block, num); + "blocks from "E3FSBLK", length %lu", + ret_block, num); performed_allocation = 1; @@ -1380,7 +1395,7 @@ allocated: struct buffer_head *debug_bh; /* Record bitmap buffer state in the newly allocated block */ - debug_bh = sb_find_get_block(sb, target_block); + debug_bh = sb_find_get_block(sb, ret_block); if (debug_bh) { BUFFER_TRACE(debug_bh, "state when allocated"); BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); @@ -1393,26 +1408,23 @@ allocated: int i; for (i = 0; i < num; i++) { - if (ext3_test_bit(ret_block, + if (ext3_test_bit(grp_alloc_blk+i, bh2jh(bitmap_bh)->b_committed_data)) { printk("%s: block was unexpectedly set in " "b_committed_data\n", __FUNCTION__); } } } - ext3_debug("found bit %d\n", ret_block); + ext3_debug("found bit %d\n", grp_alloc_blk); spin_unlock(sb_bgl_lock(sbi, group_no)); jbd_unlock_bh_state(bitmap_bh); #endif - /* ret_block was blockgroup-relative. Now it becomes fs-relative */ - ret_block = target_block; - - if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) { + if (ret_block + num - 1 >= EXT3_BLOCKS_COUNT(es)) { ext3_error(sb, "ext3_new_block", - "block(%d) >= blocks count(%d) - " - "block_group = %d, es == %p ", ret_block, - le32_to_cpu(es->s_blocks_count), group_no, es); + "block("E3FSBLK") >= blocks count("E3FSBLK") - " + "block_group = %lu, es == %p ", ret_block, + EXT3_BLOCKS_COUNT(es), group_no, es); goto out; } @@ -1421,7 +1433,7 @@ allocated: * list of some description. We don't know in advance whether * the caller wants to use it as metadata or data. */ - ext3_debug("allocating block %d. Goal hits %d of %d.\n", + ext3_debug("allocating block %lu. Goal hits %d of %d.\n", ret_block, goal_hits, goal_attempts); spin_lock(sb_bgl_lock(sbi, group_no)); @@ -1461,23 +1473,24 @@ out: return 0; } -int ext3_new_block(handle_t *handle, struct inode *inode, - unsigned long goal, int *errp) +ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, + ext3_fsblk_t goal, int *errp) { unsigned long count = 1; return ext3_new_blocks(handle, inode, goal, &count, errp); } -unsigned long ext3_count_free_blocks(struct super_block *sb) +ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb) { - unsigned long desc_count; + ext3_fsblk_t desc_count; struct ext3_group_desc *gdp; int i; unsigned long ngroups = EXT3_SB(sb)->s_groups_count; #ifdef EXT3FS_DEBUG struct ext3_super_block *es; - unsigned long bitmap_count, x; + ext3_fsblk_t bitmap_count; + unsigned long x; struct buffer_head *bitmap_bh = NULL; es = EXT3_SB(sb)->s_es; @@ -1502,8 +1515,10 @@ unsigned long ext3_count_free_blocks(str bitmap_count += x; } brelse(bitmap_bh); - printk("ext3_count_free_blocks: stored = %u, computed = %lu, %lu\n", - le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count); + printk("ext3_count_free_blocks: stored = "E3FSBLK + ", computed = "E3FSBLK", "E3FSBLK"\n", + EXT3_FREE_BLOCKS_COUNT(es), + desc_count, bitmap_count); return bitmap_count; #else desc_count = 0; @@ -1520,11 +1535,12 @@ unsigned long ext3_count_free_blocks(str } static inline int -block_in_use(unsigned long block, struct super_block *sb, unsigned char *map) +block_in_use(ext3_fsblk_t block, struct super_block *sb, unsigned char *map) { - return ext3_test_bit ((block - - le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb), map); + ext3_grpblk_t offset; + + ext3_get_group_no_and_offset(sb, block, NULL, &offset); + return ext3_test_bit (offset, map); } static inline int test_root(int a, int b) diff -puN fs/ext3/dir.c~ext3-2.6.17 fs/ext3/dir.c --- linux-2.6.17/fs/ext3/dir.c~ext3-2.6.17 2006-06-27 12:57:54.822371430 -0700 +++ linux-2.6.17-ming/fs/ext3/dir.c 2006-06-27 12:57:55.079339896 -0700 @@ -131,8 +131,7 @@ static int ext3_readdir(struct file * fi struct buffer_head *bh = NULL; map_bh.b_state = 0; - err = ext3_get_blocks_handle(NULL, inode, blk, 1, - &map_bh, 0, 0); + err = ext3_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0); if (err > 0) { page_cache_readahead(sb->s_bdev->bd_inode->i_mapping, &filp->f_ra, diff -puN /dev/null fs/ext3/extents.c --- /dev/null 2006-06-27 02:20:44.981223576 -0700 +++ linux-2.6.17-ming/fs/ext3/extents.c 2006-06-27 12:57:55.087338914 -0700 @@ -0,0 +1,2103 @@ +/* + * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas + * + * Architecture independence: + * Copyright (c) 2005, Bull S.A. + * Written by Pierre Peiffer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + +/* + * Extents support for EXT3 + * + * TODO: + * - ext3*_error() should be used in some situations + * - analyze all BUG()/BUG_ON(), use -EIO where appropriate + * - smart tree reduction + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* this macro combines low and hi parts of phys. blocknr into ext3_fsblk_t */ +static inline ext3_fsblk_t ext_pblock(struct ext3_extent *ex) +{ + ext3_fsblk_t block; + + block = le32_to_cpu(ex->ee_start); + if (sizeof(ext3_fsblk_t) > 4) + block |= ((ext3_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; + return block; +} + +/* this macro combines low and hi parts of phys. blocknr into ext3_fsblk_t */ +static inline ext3_fsblk_t idx_pblock(struct ext3_extent_idx *ix) +{ + ext3_fsblk_t block; + + block = le32_to_cpu(ix->ei_leaf); + if (sizeof(ext3_fsblk_t) > 4) + block |= ((ext3_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; + return block; +} + +/* the routine stores large phys. blocknr into extent breaking it into parts */ +static inline void ext3_ext_store_pblock(struct ext3_extent *ex, ext3_fsblk_t pb) +{ + ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff)); + if (sizeof(ext3_fsblk_t) > 4) + ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); +} + +/* the routine stores large phys. blocknr into index breaking it into parts */ +static inline void ext3_idx_store_pblock(struct ext3_extent_idx *ix, ext3_fsblk_t pb) +{ + ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff)); + if (sizeof(ext3_fsblk_t) > 4) + ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); +} + +static int ext3_ext_check_header(const char *function, struct inode *inode, + struct ext3_extent_header *eh) +{ + const char *error_msg = NULL; + + if (unlikely(eh->eh_magic != EXT3_EXT_MAGIC)) { + error_msg = "invalid magic"; + goto corrupted; + } + if (unlikely(eh->eh_max == 0)) { + error_msg = "invalid eh_max"; + goto corrupted; + } + if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) { + error_msg = "invalid eh_entries"; + goto corrupted; + } + return 0; + +corrupted: + ext3_error(inode->i_sb, function, + "bad header in inode #%lu: %s - magic %x, " + "entries %u, max %u, depth %u", + inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), + le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), + le16_to_cpu(eh->eh_depth)); + + return -EIO; +} + +static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) +{ + int err; + + if (handle->h_buffer_credits > needed) + return handle; + if (!ext3_journal_extend(handle, needed)) + return handle; + err = ext3_journal_restart(handle, needed); + + return handle; +} + +/* + * could return: + * - EROFS + * - ENOMEM + */ +static int ext3_ext_get_access(handle_t *handle, struct inode *inode, + struct ext3_ext_path *path) +{ + if (path->p_bh) { + /* path points to block */ + return ext3_journal_get_write_access(handle, path->p_bh); + } + /* path points to leaf/index in inode body */ + /* we use in-core data, no need to protect them */ + return 0; +} + +/* + * could return: + * - EROFS + * - ENOMEM + * - EIO + */ +static int ext3_ext_dirty(handle_t *handle, struct inode *inode, + struct ext3_ext_path *path) +{ + int err; + if (path->p_bh) { + /* path points to block */ + err = ext3_journal_dirty_metadata(handle, path->p_bh); + } else { + /* path points to leaf/index in inode body */ + err = ext3_mark_inode_dirty(handle, inode); + } + return err; +} + +static ext3_fsblk_t ext3_ext_find_goal(struct inode *inode, + struct ext3_ext_path *path, + ext3_fsblk_t block) +{ + struct ext3_inode_info *ei = EXT3_I(inode); + ext3_fsblk_t bg_start; + ext3_grpblk_t colour; + int depth; + + if (path) { + struct ext3_extent *ex; + depth = path->p_depth; + + /* try to predict block placement */ + if ((ex = path[depth].p_ext)) + return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block)); + + /* it looks index is empty + * try to find starting from index itself */ + if (path[depth].p_bh) + return path[depth].p_bh->b_blocknr; + } + + /* OK. use inode's group */ + bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + + le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); + colour = (current->pid % 16) * + (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); + return bg_start + colour + block; +} + +static ext3_fsblk_t +ext3_ext_new_block(handle_t *handle, struct inode *inode, + struct ext3_ext_path *path, + struct ext3_extent *ex, int *err) +{ + ext3_fsblk_t goal, newblock; + + goal = ext3_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); + newblock = ext3_new_block(handle, inode, goal, err); + return newblock; +} + +static inline int ext3_ext_space_block(struct inode *inode) +{ + int size; + + size = (inode->i_sb->s_blocksize - sizeof(struct ext3_extent_header)) + / sizeof(struct ext3_extent); +#ifdef AGRESSIVE_TEST + if (size > 6) + size = 6; +#endif + return size; +} + +static inline int ext3_ext_space_block_idx(struct inode *inode) +{ + int size; + + size = (inode->i_sb->s_blocksize - sizeof(struct ext3_extent_header)) + / sizeof(struct ext3_extent_idx); +#ifdef AGRESSIVE_TEST + if (size > 5) + size = 5; +#endif + return size; +} + +static inline int ext3_ext_space_root(struct inode *inode) +{ + int size; + + size = sizeof(EXT3_I(inode)->i_data); + size -= sizeof(struct ext3_extent_header); + size /= sizeof(struct ext3_extent); +#ifdef AGRESSIVE_TEST + if (size > 3) + size = 3; +#endif + return size; +} + +static inline int ext3_ext_space_root_idx(struct inode *inode) +{ + int size; + + size = sizeof(EXT3_I(inode)->i_data); + size -= sizeof(struct ext3_extent_header); + size /= sizeof(struct ext3_extent_idx); +#ifdef AGRESSIVE_TEST + if (size > 4) + size = 4; +#endif + return size; +} + +#ifdef EXT_DEBUG +static void ext3_ext_show_path(struct inode *inode, struct ext3_ext_path *path) +{ + int k, l = path->p_depth; + + ext_debug("path:"); + for (k = 0; k <= l; k++, path++) { + if (path->p_idx) { + ext_debug(" %d->"E3FSBLK, le32_to_cpu(path->p_idx->ei_block), + idx_pblock(path->p_idx)); + } else if (path->p_ext) { + ext_debug(" %d:%d:"E3FSBLK" ", + le32_to_cpu(path->p_ext->ee_block), + le16_to_cpu(path->p_ext->ee_len), + ext_pblock(path->p_ext)); + } else + ext_debug(" []"); + } + ext_debug("\n"); +} + +static void ext3_ext_show_leaf(struct inode *inode, struct ext3_ext_path *path) +{ + int depth = ext_depth(inode); + struct ext3_extent_header *eh; + struct ext3_extent *ex; + int i; + + if (!path) + return; + + eh = path[depth].p_hdr; + ex = EXT_FIRST_EXTENT(eh); + + for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { + ext_debug("%d:%d:"E3FSBLK" ", le32_to_cpu(ex->ee_block), + le16_to_cpu(ex->ee_len), ext_pblock(ex)); + } + ext_debug("\n"); +} +#else +#define ext3_ext_show_path(inode,path) +#define ext3_ext_show_leaf(inode,path) +#endif + +static void ext3_ext_drop_refs(struct ext3_ext_path *path) +{ + int depth = path->p_depth; + int i; + + for (i = 0; i <= depth; i++, path++) + if (path->p_bh) { + brelse(path->p_bh); + path->p_bh = NULL; + } +} + +/* + * binary search for closest index by given block + */ +static void +ext3_ext_binsearch_idx(struct inode *inode, struct ext3_ext_path *path, int block) +{ + struct ext3_extent_header *eh = path->p_hdr; + struct ext3_extent_idx *r, *l, *m; + + BUG_ON(eh->eh_magic != EXT3_EXT_MAGIC); + BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); + BUG_ON(le16_to_cpu(eh->eh_entries) <= 0); + + ext_debug("binsearch for %d(idx): ", block); + + l = EXT_FIRST_INDEX(eh) + 1; + r = EXT_FIRST_INDEX(eh) + le16_to_cpu(eh->eh_entries) - 1; + while (l <= r) { + m = l + (r - l) / 2; + if (block < le32_to_cpu(m->ei_block)) + r = m - 1; + else + l = m + 1; + ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ei_block, + m, m->ei_block, r, r->ei_block); + } + + path->p_idx = l - 1; + ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), + idx_block(path->p_idx)); + +#ifdef CHECK_BINSEARCH + { + struct ext3_extent_idx *chix, *ix; + int k; + + chix = ix = EXT_FIRST_INDEX(eh); + for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { + if (k != 0 && + le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { + printk("k=%d, ix=0x%p, first=0x%p\n", k, + ix, EXT_FIRST_INDEX(eh)); + printk("%u <= %u\n", + le32_to_cpu(ix->ei_block), + le32_to_cpu(ix[-1].ei_block)); + } + BUG_ON(k && le32_to_cpu(ix->ei_block) + <= le32_to_cpu(ix[-1].ei_block)); + if (block < le32_to_cpu(ix->ei_block)) + break; + chix = ix; + } + BUG_ON(chix != path->p_idx); + } +#endif + +} + +/* + * binary search for closest extent by given block + */ +static void +ext3_ext_binsearch(struct inode *inode, struct ext3_ext_path *path, int block) +{ + struct ext3_extent_header *eh = path->p_hdr; + struct ext3_extent *r, *l, *m; + + BUG_ON(eh->eh_magic != EXT3_EXT_MAGIC); + BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); + + if (eh->eh_entries == 0) { + /* + * this leaf is empty yet: + * we get such a leaf in split/add case + */ + return; + } + + ext_debug("binsearch for %d: ", block); + + l = EXT_FIRST_EXTENT(eh) + 1; + r = EXT_FIRST_EXTENT(eh) + le16_to_cpu(eh->eh_entries) - 1; + + while (l <= r) { + m = l + (r - l) / 2; + if (block < le32_to_cpu(m->ee_block)) + r = m - 1; + else + l = m + 1; + ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block, + m, m->ee_block, r, r->ee_block); + } + + path->p_ext = l - 1; + ext_debug(" -> %d:"E3FSBLK":%d ", + le32_to_cpu(path->p_ext->ee_block), + ext_pblock(path->p_ext), + le16_to_cpu(path->p_ext->ee_len)); + +#ifdef CHECK_BINSEARCH + { + struct ext3_extent *chex, *ex; + int k; + + chex = ex = EXT_FIRST_EXTENT(eh); + for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { + BUG_ON(k && le32_to_cpu(ex->ee_block) + <= le32_to_cpu(ex[-1].ee_block)); + if (block < le32_to_cpu(ex->ee_block)) + break; + chex = ex; + } + BUG_ON(chex != path->p_ext); + } +#endif + +} + +int ext3_ext_tree_init(handle_t *handle, struct inode *inode) +{ + struct ext3_extent_header *eh; + + eh = ext_inode_hdr(inode); + eh->eh_depth = 0; + eh->eh_entries = 0; + eh->eh_magic = EXT3_EXT_MAGIC; + eh->eh_max = cpu_to_le16(ext3_ext_space_root(inode)); + ext3_mark_inode_dirty(handle, inode); + ext3_ext_invalidate_cache(inode); + return 0; +} + +struct ext3_ext_path * +ext3_ext_find_extent(struct inode *inode, int block, struct ext3_ext_path *path) +{ + struct ext3_extent_header *eh; + struct buffer_head *bh; + short int depth, i, ppos = 0, alloc = 0; + + eh = ext_inode_hdr(inode); + BUG_ON(eh == NULL); + if (ext3_ext_check_header(__FUNCTION__, inode, eh)) + return ERR_PTR(-EIO); + + i = depth = ext_depth(inode); + + /* account possible depth increase */ + if (!path) { + path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), + GFP_NOFS); + if (!path) + return ERR_PTR(-ENOMEM); + alloc = 1; + } + memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); + path[0].p_hdr = eh; + + /* walk through the tree */ + while (i) { + ext_debug("depth %d: num %d, max %d\n", + ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); + ext3_ext_binsearch_idx(inode, path + ppos, block); + path[ppos].p_block = idx_pblock(path[ppos].p_idx); + path[ppos].p_depth = i; + path[ppos].p_ext = NULL; + + bh = sb_bread(inode->i_sb, path[ppos].p_block); + if (!bh) + goto err; + + eh = ext_block_hdr(bh); + ppos++; + BUG_ON(ppos > depth); + path[ppos].p_bh = bh; + path[ppos].p_hdr = eh; + i--; + + if (ext3_ext_check_header(__FUNCTION__, inode, eh)) + goto err; + } + + path[ppos].p_depth = i; + path[ppos].p_hdr = eh; + path[ppos].p_ext = NULL; + path[ppos].p_idx = NULL; + + if (ext3_ext_check_header(__FUNCTION__, inode, eh)) + goto err; + + /* find extent */ + ext3_ext_binsearch(inode, path + ppos, block); + + ext3_ext_show_path(inode, path); + + return path; + +err: + ext3_ext_drop_refs(path); + if (alloc) + kfree(path); + return ERR_PTR(-EIO); +} + +/* + * insert new index [logical;ptr] into the block at cupr + * it check where to insert: before curp or after curp + */ +static int ext3_ext_insert_index(handle_t *handle, struct inode *inode, + struct ext3_ext_path *curp, + int logical, ext3_fsblk_t ptr) +{ + struct ext3_extent_idx *ix; + int len, err; + + if ((err = ext3_ext_get_access(handle, inode, curp))) + return err; + + BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block)); + len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; + if (logical > le32_to_cpu(curp->p_idx->ei_block)) { + /* insert after */ + if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { + len = (len - 1) * sizeof(struct ext3_extent_idx); + len = len < 0 ? 0 : len; + ext_debug("insert new index %d after: %d. " + "move %d from 0x%p to 0x%p\n", + logical, ptr, len, + (curp->p_idx + 1), (curp->p_idx + 2)); + memmove(curp->p_idx + 2, curp->p_idx + 1, len); + } + ix = curp->p_idx + 1; + } else { + /* insert before */ + len = len * sizeof(struct ext3_extent_idx); + len = len < 0 ? 0 : len; + ext_debug("insert new index %d before: %d. " + "move %d from 0x%p to 0x%p\n", + logical, ptr, len, + curp->p_idx, (curp->p_idx + 1)); + memmove(curp->p_idx + 1, curp->p_idx, len); + ix = curp->p_idx; + } + + ix->ei_block = cpu_to_le32(logical); + ext3_idx_store_pblock(ix, ptr); + curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); + + BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) + > le16_to_cpu(curp->p_hdr->eh_max)); + BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); + + err = ext3_ext_dirty(handle, inode, curp); + ext3_std_error(inode->i_sb, err); + + return err; +} + +/* + * routine inserts new subtree into the path, using free index entry + * at depth 'at: + * - allocates all needed blocks (new leaf and all intermediate index blocks) + * - makes decision where to split + * - moves remaining extens and index entries (right to the split point) + * into the newly allocated blocks + * - initialize subtree + */ +static int ext3_ext_split(handle_t *handle, struct inode *inode, + struct ext3_ext_path *path, + struct ext3_extent *newext, int at) +{ + struct buffer_head *bh = NULL; + int depth = ext_depth(inode); + struct ext3_extent_header *neh; + struct ext3_extent_idx *fidx; + struct ext3_extent *ex; + int i = at, k, m, a; + ext3_fsblk_t newblock, oldblock; + __le32 border; + ext3_fsblk_t *ablocks = NULL; /* array of allocated blocks */ + int err = 0; + + /* make decision: where to split? */ + /* FIXME: now desicion is simplest: at current extent */ + + /* if current leaf will be splitted, then we should use + * border from split point */ + BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr)); + if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { + border = path[depth].p_ext[1].ee_block; + ext_debug("leaf will be splitted." + " next leaf starts at %d\n", + le32_to_cpu(border)); + } else { + border = newext->ee_block; + ext_debug("leaf will be added." + " next leaf starts at %d\n", + le32_to_cpu(border)); + } + + /* + * if error occurs, then we break processing + * and turn filesystem read-only. so, index won't + * be inserted and tree will be in consistent + * state. next mount will repair buffers too + */ + + /* + * get array to track all allocated blocks + * we need this to handle errors and free blocks + * upon them + */ + ablocks = kmalloc(sizeof(ext3_fsblk_t) * depth, GFP_NOFS); + if (!ablocks) + return -ENOMEM; + memset(ablocks, 0, sizeof(ext3_fsblk_t) * depth); + + /* allocate all needed blocks */ + ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); + for (a = 0; a < depth - at; a++) { + newblock = ext3_ext_new_block(handle, inode, path, newext, &err); + if (newblock == 0) + goto cleanup; + ablocks[a] = newblock; + } + + /* initialize new leaf */ + newblock = ablocks[--a]; + BUG_ON(newblock == 0); + bh = sb_getblk(inode->i_sb, newblock); + if (!bh) { + err = -EIO; + goto cleanup; + } + lock_buffer(bh); + + if ((err = ext3_journal_get_create_access(handle, bh))) + goto cleanup; + + neh = ext_block_hdr(bh); + neh->eh_entries = 0; + neh->eh_max = cpu_to_le16(ext3_ext_space_block(inode)); + neh->eh_magic = EXT3_EXT_MAGIC; + neh->eh_depth = 0; + ex = EXT_FIRST_EXTENT(neh); + + /* move remain of path[depth] to the new leaf */ + BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max); + /* start copy from next extent */ + /* TODO: we could do it by single memmove */ + m = 0; + path[depth].p_ext++; + while (path[depth].p_ext <= + EXT_MAX_EXTENT(path[depth].p_hdr)) { + ext_debug("move %d:"E3FSBLK":%d in new leaf "E3FSBLK"\n", + le32_to_cpu(path[depth].p_ext->ee_block), + ext_pblock(path[depth].p_ext), + le16_to_cpu(path[depth].p_ext->ee_len), + newblock); + /*memmove(ex++, path[depth].p_ext++, + sizeof(struct ext3_extent)); + neh->eh_entries++;*/ + path[depth].p_ext++; + m++; + } + if (m) { + memmove(ex, path[depth].p_ext-m, sizeof(struct ext3_extent)*m); + neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m); + } + + set_buffer_uptodate(bh); + unlock_buffer(bh); + + if ((err = ext3_journal_dirty_metadata(handle, bh))) + goto cleanup; + brelse(bh); + bh = NULL; + + /* correct old leaf */ + if (m) { + if ((err = ext3_ext_get_access(handle, inode, path + depth))) + goto cleanup; + path[depth].p_hdr->eh_entries = + cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m); + if ((err = ext3_ext_dirty(handle, inode, path + depth))) + goto cleanup; + + } + + /* create intermediate indexes */ + k = depth - at - 1; + BUG_ON(k < 0); + if (k) + ext_debug("create %d intermediate indices\n", k); + /* insert new index into current index block */ + /* current depth stored in i var */ + i = depth - 1; + while (k--) { + oldblock = newblock; + newblock = ablocks[--a]; + bh = sb_getblk(inode->i_sb, (ext3_fsblk_t)newblock); + if (!bh) { + err = -EIO; + goto cleanup; + } + lock_buffer(bh); + + if ((err = ext3_journal_get_create_access(handle, bh))) + goto cleanup; + + neh = ext_block_hdr(bh); + neh->eh_entries = cpu_to_le16(1); + neh->eh_magic = EXT3_EXT_MAGIC; + neh->eh_max = cpu_to_le16(ext3_ext_space_block_idx(inode)); + neh->eh_depth = cpu_to_le16(depth - i); + fidx = EXT_FIRST_INDEX(neh); + fidx->ei_block = border; + ext3_idx_store_pblock(fidx, oldblock); + + ext_debug("int.index at %d (block "E3FSBLK"): %lu -> "E3FSBLK"\n", i, + newblock, (unsigned long) le32_to_cpu(border), + oldblock); + /* copy indexes */ + m = 0; + path[i].p_idx++; + + ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, + EXT_MAX_INDEX(path[i].p_hdr)); + BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) != + EXT_LAST_INDEX(path[i].p_hdr)); + while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { + ext_debug("%d: move %d:%d in new index "E3FSBLK"\n", i, + le32_to_cpu(path[i].p_idx->ei_block), + idx_pblock(path[i].p_idx), + newblock); + /*memmove(++fidx, path[i].p_idx++, + sizeof(struct ext3_extent_idx)); + neh->eh_entries++; + BUG_ON(neh->eh_entries > neh->eh_max);*/ + path[i].p_idx++; + m++; + } + if (m) { + memmove(++fidx, path[i].p_idx - m, + sizeof(struct ext3_extent_idx) * m); + neh->eh_entries = + cpu_to_le16(le16_to_cpu(neh->eh_entries) + m); + } + set_buffer_uptodate(bh); + unlock_buffer(bh); + + if ((err = ext3_journal_dirty_metadata(handle, bh))) + goto cleanup; + brelse(bh); + bh = NULL; + + /* correct old index */ + if (m) { + err = ext3_ext_get_access(handle, inode, path + i); + if (err) + goto cleanup; + path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m); + err = ext3_ext_dirty(handle, inode, path + i); + if (err) + goto cleanup; + } + + i--; + } + + /* insert new index */ + if (err) + goto cleanup; + + err = ext3_ext_insert_index(handle, inode, path + at, + le32_to_cpu(border), newblock); + +cleanup: + if (bh) { + if (buffer_locked(bh)) + unlock_buffer(bh); + brelse(bh); + } + + if (err) { + /* free all allocated blocks in error case */ + for (i = 0; i < depth; i++) { + if (!ablocks[i]) + continue; + ext3_free_blocks(handle, inode, ablocks[i], 1); + } + } + kfree(ablocks); + + return err; +} + +/* + * routine implements tree growing procedure: + * - allocates new block + * - moves top-level data (index block or leaf) into the new block + * - initialize new top-level, creating index that points to the + * just created block + */ +static int ext3_ext_grow_indepth(handle_t *handle, struct inode *inode, + struct ext3_ext_path *path, + struct ext3_extent *newext) +{ + struct ext3_ext_path *curp = path; + struct ext3_extent_header *neh; + struct ext3_extent_idx *fidx; + struct buffer_head *bh; + ext3_fsblk_t newblock; + int err = 0; + + newblock = ext3_ext_new_block(handle, inode, path, newext, &err); + if (newblock == 0) + return err; + + bh = sb_getblk(inode->i_sb, newblock); + if (!bh) { + err = -EIO; + ext3_std_error(inode->i_sb, err); + return err; + } + lock_buffer(bh); + + if ((err = ext3_journal_get_create_access(handle, bh))) { + unlock_buffer(bh); + goto out; + } + + /* move top-level index/leaf into new block */ + memmove(bh->b_data, curp->p_hdr, sizeof(EXT3_I(inode)->i_data)); + + /* set size of new block */ + neh = ext_block_hdr(bh); + /* old root could have indexes or leaves + * so calculate e_max right way */ + if (ext_depth(inode)) + neh->eh_max = cpu_to_le16(ext3_ext_space_block_idx(inode)); + else + neh->eh_max = cpu_to_le16(ext3_ext_space_block(inode)); + neh->eh_magic = EXT3_EXT_MAGIC; + set_buffer_uptodate(bh); + unlock_buffer(bh); + + if ((err = ext3_journal_dirty_metadata(handle, bh))) + goto out; + + /* create index in new top-level index: num,max,pointer */ + if ((err = ext3_ext_get_access(handle, inode, curp))) + goto out; + + curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; + curp->p_hdr->eh_max = cpu_to_le16(ext3_ext_space_root_idx(inode)); + curp->p_hdr->eh_entries = cpu_to_le16(1); + curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); + /* FIXME: it works, but actually path[0] can be index */ + curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; + ext3_idx_store_pblock(curp->p_idx, newblock); + + neh = ext_inode_hdr(inode); + fidx = EXT_FIRST_INDEX(neh); + ext_debug("new root: num %d(%d), lblock %d, ptr "E3FSBLK"\n", + le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), + le32_to_cpu(fidx->ei_block), idx_pblock(fidx)); + + neh->eh_depth = cpu_to_le16(path->p_depth + 1); + err = ext3_ext_dirty(handle, inode, curp); +out: + brelse(bh); + + return err; +} + +/* + * routine finds empty index and adds new leaf. if no free index found + * then it requests in-depth growing + */ +static int ext3_ext_create_new_leaf(handle_t *handle, struct inode *inode, + struct ext3_ext_path *path, + struct ext3_extent *newext) +{ + struct ext3_ext_path *curp; + int depth, i, err = 0; + +repeat: + i = depth = ext_depth(inode); + + /* walk up to the tree and look for free index entry */ + curp = path + depth; + while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { + i--; + curp--; + } + + /* we use already allocated block for index block + * so, subsequent data blocks should be contigoues */ + if (EXT_HAS_FREE_INDEX(curp)) { + /* if we found index with free entry, then use that + * entry: create all needed subtree and add new leaf */ + err = ext3_ext_split(handle, inode, path, newext, i); + + /* refill path */ + ext3_ext_drop_refs(path); + path = ext3_ext_find_extent(inode, + le32_to_cpu(newext->ee_block), + path); + if (IS_ERR(path)) + err = PTR_ERR(path); + } else { + /* tree is full, time to grow in depth */ + err = ext3_ext_grow_indepth(handle, inode, path, newext); + if (err) + goto out; + + /* refill path */ + ext3_ext_drop_refs(path); + path = ext3_ext_find_extent(inode, + le32_to_cpu(newext->ee_block), + path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + goto out; + } + + /* + * only first (depth 0 -> 1) produces free space + * in all other cases we have to split growed tree + */ + depth = ext_depth(inode); + if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { + /* now we need split */ + goto repeat; + } + } + +out: + return err; +} + +/* + * returns allocated block in subsequent extent or EXT_MAX_BLOCK + * NOTE: it consider block number from index entry as + * allocated block. thus, index entries have to be consistent + * with leafs + */ +static unsigned long +ext3_ext_next_allocated_block(struct ext3_ext_path *path) +{ + int depth; + + BUG_ON(path == NULL); + depth = path->p_depth; + + if (depth == 0 && path->p_ext == NULL) + return EXT_MAX_BLOCK; + + while (depth >= 0) { + if (depth == path->p_depth) { + /* leaf */ + if (path[depth].p_ext != + EXT_LAST_EXTENT(path[depth].p_hdr)) + return le32_to_cpu(path[depth].p_ext[1].ee_block); + } else { + /* index */ + if (path[depth].p_idx != + EXT_LAST_INDEX(path[depth].p_hdr)) + return le32_to_cpu(path[depth].p_idx[1].ei_block); + } + depth--; + } + + return EXT_MAX_BLOCK; +} + +/* + * returns first allocated block from next leaf or EXT_MAX_BLOCK + */ +static unsigned ext3_ext_next_leaf_block(struct inode *inode, + struct ext3_ext_path *path) +{ + int depth; + + BUG_ON(path == NULL); + depth = path->p_depth; + + /* zero-tree has no leaf blocks at all */ + if (depth == 0) + return EXT_MAX_BLOCK; + + /* go to index block */ + depth--; + + while (depth >= 0) { + if (path[depth].p_idx != + EXT_LAST_INDEX(path[depth].p_hdr)) + return le32_to_cpu(path[depth].p_idx[1].ei_block); + depth--; + } + + return EXT_MAX_BLOCK; +} + +/* + * if leaf gets modified and modified extent is first in the leaf + * then we have to correct all indexes above + * TODO: do we need to correct tree in all cases? + */ +int ext3_ext_correct_indexes(handle_t *handle, struct inode *inode, + struct ext3_ext_path *path) +{ + struct ext3_extent_header *eh; + int depth = ext_depth(inode); + struct ext3_extent *ex; + __le32 border; + int k, err = 0; + + eh = path[depth].p_hdr; + ex = path[depth].p_ext; + BUG_ON(ex == NULL); + BUG_ON(eh == NULL); + + if (depth == 0) { + /* there is no tree at all */ + return 0; + } + + if (ex != EXT_FIRST_EXTENT(eh)) { + /* we correct tree if first leaf got modified only */ + return 0; + } + + /* + * TODO: we need correction if border is smaller then current one + */ + k = depth - 1; + border = path[depth].p_ext->ee_block; + if ((err = ext3_ext_get_access(handle, inode, path + k))) + return err; + path[k].p_idx->ei_block = border; + if ((err = ext3_ext_dirty(handle, inode, path + k))) + return err; + + while (k--) { + /* change all left-side indexes */ + if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) + break; + if ((err = ext3_ext_get_access(handle, inode, path + k))) + break; + path[k].p_idx->ei_block = border; + if ((err = ext3_ext_dirty(handle, inode, path + k))) + break; + } + + return err; +} + +static int inline +ext3_can_extents_be_merged(struct inode *inode, struct ext3_extent *ex1, + struct ext3_extent *ex2) +{ + if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len) + != le32_to_cpu(ex2->ee_block)) + return 0; + +#ifdef AGRESSIVE_TEST + if (le16_to_cpu(ex1->ee_len) >= 4) + return 0; +#endif + + if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2)) + return 1; + return 0; +} + +/* + * this routine tries to merge requsted extent into the existing + * extent or inserts requested extent as new one into the tree, + * creating new leaf in no-space case + */ +int ext3_ext_insert_extent(handle_t *handle, struct inode *inode, + struct ext3_ext_path *path, + struct ext3_extent *newext) +{ + struct ext3_extent_header * eh; + struct ext3_extent *ex, *fex; + struct ext3_extent *nearex; /* nearest extent */ + struct ext3_ext_path *npath = NULL; + int depth, len, err, next; + + BUG_ON(newext->ee_len == 0); + depth = ext_depth(inode); + ex = path[depth].p_ext; + BUG_ON(path[depth].p_hdr == NULL); + + /* try to insert block into found extent and return */ + if (ex && ext3_can_extents_be_merged(inode, ex, newext)) { + ext_debug("append %d block to %d:%d (from "E3FSBLK")\n", + le16_to_cpu(newext->ee_len), + le32_to_cpu(ex->ee_block), + le16_to_cpu(ex->ee_len), ext_pblock(ex)); + if ((err = ext3_ext_get_access(handle, inode, path + depth))) + return err; + ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len) + + le16_to_cpu(newext->ee_len)); + eh = path[depth].p_hdr; + nearex = ex; + goto merge; + } + +repeat: + depth = ext_depth(inode); + eh = path[depth].p_hdr; + if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) + goto has_space; + + /* probably next leaf has space for us? */ + fex = EXT_LAST_EXTENT(eh); + next = ext3_ext_next_leaf_block(inode, path); + if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) + && next != EXT_MAX_BLOCK) { + ext_debug("next leaf block - %d\n", next); + BUG_ON(npath != NULL); + npath = ext3_ext_find_extent(inode, next, NULL); + if (IS_ERR(npath)) + return PTR_ERR(npath); + BUG_ON(npath->p_depth != path->p_depth); + eh = npath[depth].p_hdr; + if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { + ext_debug("next leaf isnt full(%d)\n", + le16_to_cpu(eh->eh_entries)); + path = npath; + goto repeat; + } + ext_debug("next leaf has no free space(%d,%d)\n", + le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); + } + + /* + * there is no free space in found leaf + * we're gonna add new leaf in the tree + */ + err = ext3_ext_create_new_leaf(handle, inode, path, newext); + if (err) + goto cleanup; + depth = ext_depth(inode); + eh = path[depth].p_hdr; + +has_space: + nearex = path[depth].p_ext; + + if ((err = ext3_ext_get_access(handle, inode, path + depth))) + goto cleanup; + + if (!nearex) { + /* there is no extent in this leaf, create first one */ + ext_debug("first extent in the leaf: %d:"E3FSBLK":%d\n", + le32_to_cpu(newext->ee_block), + ext_pblock(newext), + le16_to_cpu(newext->ee_len)); + path[depth].p_ext = EXT_FIRST_EXTENT(eh); + } else if (le32_to_cpu(newext->ee_block) + > le32_to_cpu(nearex->ee_block)) { +/* BUG_ON(newext->ee_block == nearex->ee_block); */ + if (nearex != EXT_LAST_EXTENT(eh)) { + len = EXT_MAX_EXTENT(eh) - nearex; + len = (len - 1) * sizeof(struct ext3_extent); + len = len < 0 ? 0 : len; + ext_debug("insert %d:"E3FSBLK":%d after: nearest 0x%p, " + "move %d from 0x%p to 0x%p\n", + le32_to_cpu(newext->ee_block), + ext_pblock(newext), + le16_to_cpu(newext->ee_len), + nearex, len, nearex + 1, nearex + 2); + memmove(nearex + 2, nearex + 1, len); + } + path[depth].p_ext = nearex + 1; + } else { + BUG_ON(newext->ee_block == nearex->ee_block); + len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); + len = len < 0 ? 0 : len; + ext_debug("insert %d:"E3FSBLK":%d before: nearest 0x%p, " + "move %d from 0x%p to 0x%p\n", + le32_to_cpu(newext->ee_block), + ext_pblock(newext), + le16_to_cpu(newext->ee_len), + nearex, len, nearex + 1, nearex + 2); + memmove(nearex + 1, nearex, len); + path[depth].p_ext = nearex; + } + + eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); + nearex = path[depth].p_ext; + nearex->ee_block = newext->ee_block; + nearex->ee_start = newext->ee_start; + nearex->ee_start_hi = newext->ee_start_hi; + nearex->ee_len = newext->ee_len; + +merge: + /* try to merge extents to the right */ + while (nearex < EXT_LAST_EXTENT(eh)) { + if (!ext3_can_extents_be_merged(inode, nearex, nearex + 1)) + break; + /* merge with next extent! */ + nearex->ee_len = cpu_to_le16(le16_to_cpu(nearex->ee_len) + + le16_to_cpu(nearex[1].ee_len)); + if (nearex + 1 < EXT_LAST_EXTENT(eh)) { + len = (EXT_LAST_EXTENT(eh) - nearex - 1) + * sizeof(struct ext3_extent); + memmove(nearex + 1, nearex + 2, len); + } + eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); + BUG_ON(eh->eh_entries == 0); + } + + /* try to merge extents to the left */ + + /* time to correct all indexes above */ + err = ext3_ext_correct_indexes(handle, inode, path); + if (err) + goto cleanup; + + err = ext3_ext_dirty(handle, inode, path + depth); + +cleanup: + if (npath) { + ext3_ext_drop_refs(npath); + kfree(npath); + } + ext3_ext_tree_changed(inode); + ext3_ext_invalidate_cache(inode); + return err; +} + +int ext3_ext_walk_space(struct inode *inode, unsigned long block, + unsigned long num, ext_prepare_callback func, + void *cbdata) +{ + struct ext3_ext_path *path = NULL; + struct ext3_ext_cache cbex; + struct ext3_extent *ex; + unsigned long next, start = 0, end = 0; + unsigned long last = block + num; + int depth, exists, err = 0; + + BUG_ON(func == NULL); + BUG_ON(inode == NULL); + + while (block < last && block != EXT_MAX_BLOCK) { + num = last - block; + /* find extent for this block */ + path = ext3_ext_find_extent(inode, block, path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; + break; + } + + depth = ext_depth(inode); + BUG_ON(path[depth].p_hdr == NULL); + ex = path[depth].p_ext; + next = ext3_ext_next_allocated_block(path); + + exists = 0; + if (!ex) { + /* there is no extent yet, so try to allocate + * all requested space */ + start = block; + end = block + num; + } else if (le32_to_cpu(ex->ee_block) > block) { + /* need to allocate space before found extent */ + start = block; + end = le32_to_cpu(ex->ee_block); + if (block + num < end) + end = block + num; + } else if (block >= + le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)) { + /* need to allocate space after found extent */ + start = block; + end = block + num; + if (end >= next) + end = next; + } else if (block >= le32_to_cpu(ex->ee_block)) { + /* + * some part of requested space is covered + * by found extent + */ + start = block; + end = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len); + if (block + num < end) + end = block + num; + exists = 1; + } else { + BUG(); + } + BUG_ON(end <= start); + + if (!exists) { + cbex.ec_block = start; + cbex.ec_len = end - start; + cbex.ec_start = 0; + cbex.ec_type = EXT3_EXT_CACHE_GAP; + } else { + cbex.ec_block = le32_to_cpu(ex->ee_block); + cbex.ec_len = le16_to_cpu(ex->ee_len); + cbex.ec_start = ext_pblock(ex); + cbex.ec_type = EXT3_EXT_CACHE_EXTENT; + } + + BUG_ON(cbex.ec_len == 0); + err = func(inode, path, &cbex, cbdata); + ext3_ext_drop_refs(path); + + if (err < 0) + break; + if (err == EXT_REPEAT) + continue; + else if (err == EXT_BREAK) { + err = 0; + break; + } + + if (ext_depth(inode) != depth) { + /* depth was changed. we have to realloc path */ + kfree(path); + path = NULL; + } + + block = cbex.ec_block + cbex.ec_len; + } + + if (path) { + ext3_ext_drop_refs(path); + kfree(path); + } + + return err; +} + +static inline void +ext3_ext_put_in_cache(struct inode *inode, __u32 block, + __u32 len, __u32 start, int type) +{ + struct ext3_ext_cache *cex; + BUG_ON(len == 0); + cex = &EXT3_I(inode)->i_cached_extent; + cex->ec_type = type; + cex->ec_block = block; + cex->ec_len = len; + cex->ec_start = start; +} + +/* + * this routine calculate boundaries of the gap requested block fits into + * and cache this gap + */ +static inline void +ext3_ext_put_gap_in_cache(struct inode *inode, struct ext3_ext_path *path, + unsigned long block) +{ + int depth = ext_depth(inode); + unsigned long lblock, len; + struct ext3_extent *ex; + + ex = path[depth].p_ext; + if (ex == NULL) { + /* there is no extent yet, so gap is [0;-] */ + lblock = 0; + len = EXT_MAX_BLOCK; + ext_debug("cache gap(whole file):"); + } else if (block < le32_to_cpu(ex->ee_block)) { + lblock = block; + len = le32_to_cpu(ex->ee_block) - block; + ext_debug("cache gap(before): %lu [%lu:%lu]", + (unsigned long) block, + (unsigned long) le32_to_cpu(ex->ee_block), + (unsigned long) le16_to_cpu(ex->ee_len)); + } else if (block >= le32_to_cpu(ex->ee_block) + + le16_to_cpu(ex->ee_len)) { + lblock = le32_to_cpu(ex->ee_block) + + le16_to_cpu(ex->ee_len); + len = ext3_ext_next_allocated_block(path); + ext_debug("cache gap(after): [%lu:%lu] %lu", + (unsigned long) le32_to_cpu(ex->ee_block), + (unsigned long) le16_to_cpu(ex->ee_len), + (unsigned long) block); + BUG_ON(len == lblock); + len = len - lblock; + } else { + lblock = len = 0; + BUG(); + } + + ext_debug(" -> %lu:%lu\n", (unsigned long) lblock, len); + ext3_ext_put_in_cache(inode, lblock, len, 0, EXT3_EXT_CACHE_GAP); +} + +static inline int +ext3_ext_in_cache(struct inode *inode, unsigned long block, + struct ext3_extent *ex) +{ + struct ext3_ext_cache *cex; + + cex = &EXT3_I(inode)->i_cached_extent; + + /* has cache valid data? */ + if (cex->ec_type == EXT3_EXT_CACHE_NO) + return EXT3_EXT_CACHE_NO; + + BUG_ON(cex->ec_type != EXT3_EXT_CACHE_GAP && + cex->ec_type != EXT3_EXT_CACHE_EXTENT); + if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { + ex->ee_block = cpu_to_le32(cex->ec_block); + ext3_ext_store_pblock(ex, cex->ec_start); + ex->ee_len = cpu_to_le16(cex->ec_len); + ext_debug("%lu cached by %lu:%lu:"E3FSBLK"\n", + (unsigned long) block, + (unsigned long) cex->ec_block, + (unsigned long) cex->ec_len, + cex->ec_start); + return cex->ec_type; + } + + /* not in cache */ + return EXT3_EXT_CACHE_NO; +} + +/* + * routine removes index from the index block + * it's used in truncate case only. thus all requests are for + * last index in the block only + */ +int ext3_ext_rm_idx(handle_t *handle, struct inode *inode, + struct ext3_ext_path *path) +{ + struct buffer_head *bh; + int err; + ext3_fsblk_t leaf; + + /* free index block */ + path--; + leaf = idx_pblock(path->p_idx); + BUG_ON(path->p_hdr->eh_entries == 0); + if ((err = ext3_ext_get_access(handle, inode, path))) + return err; + path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); + if ((err = ext3_ext_dirty(handle, inode, path))) + return err; + ext_debug("index is empty, remove it, free block "E3FSBLK"\n", leaf); + bh = sb_find_get_block(inode->i_sb, leaf); + ext3_forget(handle, 1, inode, bh, leaf); + ext3_free_blocks(handle, inode, leaf, 1); + return err; +} + +/* + * This routine returns max. credits extent tree can consume. + * It should be OK for low-performance paths like ->writepage() + * To allow many writing process to fit a single transaction, + * caller should calculate credits under truncate_mutex and + * pass actual path. + */ +int inline ext3_ext_calc_credits_for_insert(struct inode *inode, + struct ext3_ext_path *path) +{ + int depth, needed; + + if (path) { + /* probably there is space in leaf? */ + depth = ext_depth(inode); + if (le16_to_cpu(path[depth].p_hdr->eh_entries) + < le16_to_cpu(path[depth].p_hdr->eh_max)) + return 1; + } + + /* + * given 32bit logical block (4294967296 blocks), max. tree + * can be 4 levels in depth -- 4 * 340^4 == 53453440000. + * let's also add one more level for imbalance. + */ + depth = 5; + + /* allocation of new data block(s) */ + needed = 2; + + /* + * tree can be full, so it'd need to grow in depth: + * allocation + old root + new root + */ + needed += 2 + 1 + 1; + + /* + * Index split can happen, we'd need: + * allocate intermediate indexes (bitmap + group) + * + change two blocks at each level, but root (already included) + */ + needed = (depth * 2) + (depth * 2); + + /* any allocation modifies superblock */ + needed += 1; + + return needed; +} + +static int ext3_remove_blocks(handle_t *handle, struct inode *inode, + struct ext3_extent *ex, + unsigned long from, unsigned long to) +{ + struct buffer_head *bh; + int i; + +#ifdef EXTENTS_STATS + { + struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb); + unsigned short ee_len = le16_to_cpu(ex->ee_len); + spin_lock(&sbi->s_ext_stats_lock); + sbi->s_ext_blocks += ee_len; + sbi->s_ext_extents++; + if (ee_len < sbi->s_ext_min) + sbi->s_ext_min = ee_len; + if (ee_len > sbi->s_ext_max) + sbi->s_ext_max = ee_len; + if (ext_depth(inode) > sbi->s_depth_max) + sbi->s_depth_max = ext_depth(inode); + spin_unlock(&sbi->s_ext_stats_lock); + } +#endif + if (from >= le32_to_cpu(ex->ee_block) + && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { + /* tail removal */ + unsigned long num; + ext3_fsblk_t start; + num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from; + start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num; + ext_debug("free last %lu blocks starting "E3FSBLK"\n", num, start); + for (i = 0; i < num; i++) { + bh = sb_find_get_block(inode->i_sb, start + i); + ext3_forget(handle, 0, inode, bh, start + i); + } + ext3_free_blocks(handle, inode, start, num); + } else if (from == le32_to_cpu(ex->ee_block) + && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { + printk("strange request: removal %lu-%lu from %u:%u\n", + from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); + } else { + printk("strange request: removal(2) %lu-%lu from %u:%u\n", + from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); + } + return 0; +} + +static int +ext3_ext_rm_leaf(handle_t *handle, struct inode *inode, + struct ext3_ext_path *path, unsigned long start) +{ + int err = 0, correct_index = 0; + int depth = ext_depth(inode), credits; + struct ext3_extent_header *eh; + unsigned a, b, block, num; + unsigned long ex_ee_block; + unsigned short ex_ee_len; + struct ext3_extent *ex; + + ext_debug("truncate since %lu in leaf\n", start); + if (!path[depth].p_hdr) + path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); + eh = path[depth].p_hdr; + BUG_ON(eh == NULL); + BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); + BUG_ON(eh->eh_magic != EXT3_EXT_MAGIC); + + /* find where to start removing */ + ex = EXT_LAST_EXTENT(eh); + + ex_ee_block = le32_to_cpu(ex->ee_block); + ex_ee_len = le16_to_cpu(ex->ee_len); + + while (ex >= EXT_FIRST_EXTENT(eh) && + ex_ee_block + ex_ee_len > start) { + ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); + path[depth].p_ext = ex; + + a = ex_ee_block > start ? ex_ee_block : start; + b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? + ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; + + ext_debug(" border %u:%u\n", a, b); + + if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { + block = 0; + num = 0; + BUG(); + } else if (a != ex_ee_block) { + /* remove tail of the extent */ + block = ex_ee_block; + num = a - block; + } else if (b != ex_ee_block + ex_ee_len - 1) { + /* remove head of the extent */ + block = a; + num = b - a; + /* there is no "make a hole" API yet */ + BUG(); + } else { + /* remove whole extent: excellent! */ + block = ex_ee_block; + num = 0; + BUG_ON(a != ex_ee_block); + BUG_ON(b != ex_ee_block + ex_ee_len - 1); + } + + /* at present, extent can't cross block group */ + /* leaf + bitmap + group desc + sb + inode */ + credits = 5; + if (ex == EXT_FIRST_EXTENT(eh)) { + correct_index = 1; + credits += (ext_depth(inode)) + 1; + } +#ifdef CONFIG_QUOTA + credits += 2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb); +#endif + + handle = ext3_ext_journal_restart(handle, credits); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out; + } + + err = ext3_ext_get_access(handle, inode, path + depth); + if (err) + goto out; + + err = ext3_remove_blocks(handle, inode, ex, a, b); + if (err) + goto out; + + if (num == 0) { + /* this extent is removed entirely mark slot unused */ + ext3_ext_store_pblock(ex, 0); + eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); + } + + ex->ee_block = cpu_to_le32(block); + ex->ee_len = cpu_to_le16(num); + + err = ext3_ext_dirty(handle, inode, path + depth); + if (err) + goto out; + + ext_debug("new extent: %u:%u:"E3FSBLK"\n", block, num, + ext_pblock(ex)); + ex--; + ex_ee_block = le32_to_cpu(ex->ee_block); + ex_ee_len = le16_to_cpu(ex->ee_len); + } + + if (correct_index && eh->eh_entries) + err = ext3_ext_correct_indexes(handle, inode, path); + + /* if this leaf is free, then we should + * remove it from index block above */ + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) + err = ext3_ext_rm_idx(handle, inode, path + depth); + +out: + return err; +} + +/* + * returns 1 if current index have to be freed (even partial) + */ +static int inline +ext3_ext_more_to_rm(struct ext3_ext_path *path) +{ + BUG_ON(path->p_idx == NULL); + + if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) + return 0; + + /* + * if truncate on deeper level happened it it wasn't partial + * so we have to consider current index for truncation + */ + if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block) + return 0; + return 1; +} + +int ext3_ext_remove_space(struct inode *inode, unsigned long start) +{ + struct super_block *sb = inode->i_sb; + int depth = ext_depth(inode); + struct ext3_ext_path *path; + handle_t *handle; + int i = 0, err = 0; + + ext_debug("truncate since %lu\n", start); + + /* probably first extent we're gonna free will be last in block */ + handle = ext3_journal_start(inode, depth + 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + ext3_ext_invalidate_cache(inode); + + /* + * we start scanning from right side freeing all the blocks + * after i_size and walking into the deep + */ + path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); + if (path == NULL) { + ext3_journal_stop(handle); + return -ENOMEM; + } + memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); + path[0].p_hdr = ext_inode_hdr(inode); + if (ext3_ext_check_header(__FUNCTION__, inode, path[0].p_hdr)) { + err = -EIO; + goto out; + } + path[0].p_depth = depth; + + while (i >= 0 && err == 0) { + if (i == depth) { + /* this is leaf block */ + err = ext3_ext_rm_leaf(handle, inode, path, start); + /* root level have p_bh == NULL, brelse() eats this */ + brelse(path[i].p_bh); + path[i].p_bh = NULL; + i--; + continue; + } + + /* this is index block */ + if (!path[i].p_hdr) { + ext_debug("initialize header\n"); + path[i].p_hdr = ext_block_hdr(path[i].p_bh); + if (ext3_ext_check_header(__FUNCTION__, inode, + path[i].p_hdr)) { + err = -EIO; + goto out; + } + } + + BUG_ON(le16_to_cpu(path[i].p_hdr->eh_entries) + > le16_to_cpu(path[i].p_hdr->eh_max)); + BUG_ON(path[i].p_hdr->eh_magic != EXT3_EXT_MAGIC); + + if (!path[i].p_idx) { + /* this level hasn't touched yet */ + path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); + path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; + ext_debug("init index ptr: hdr 0x%p, num %d\n", + path[i].p_hdr, + le16_to_cpu(path[i].p_hdr->eh_entries)); + } else { + /* we've already was here, see at next index */ + path[i].p_idx--; + } + + ext_debug("level %d - index, first 0x%p, cur 0x%p\n", + i, EXT_FIRST_INDEX(path[i].p_hdr), + path[i].p_idx); + if (ext3_ext_more_to_rm(path + i)) { + /* go to the next level */ + ext_debug("move to level %d (block "E3FSBLK")\n", + i + 1, idx_pblock(path[i].p_idx)); + memset(path + i + 1, 0, sizeof(*path)); + path[i+1].p_bh = + sb_bread(sb, idx_pblock(path[i].p_idx)); + if (!path[i+1].p_bh) { + /* should we reset i_size? */ + err = -EIO; + break; + } + + /* put actual number of indexes to know is this + * number got changed at the next iteration */ + path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries); + i++; + } else { + /* we finish processing this index, go up */ + if (path[i].p_hdr->eh_entries == 0 && i > 0) { + /* index is empty, remove it + * handle must be already prepared by the + * truncatei_leaf() */ + err = ext3_ext_rm_idx(handle, inode, path + i); + } + /* root level have p_bh == NULL, brelse() eats this */ + brelse(path[i].p_bh); + path[i].p_bh = NULL; + i--; + ext_debug("return to level %d\n", i); + } + } + + /* TODO: flexible tree reduction should be here */ + if (path->p_hdr->eh_entries == 0) { + /* + * truncate to zero freed all the tree + * so, we need to correct eh_depth + */ + err = ext3_ext_get_access(handle, inode, path); + if (err == 0) { + ext_inode_hdr(inode)->eh_depth = 0; + ext_inode_hdr(inode)->eh_max = + cpu_to_le16(ext3_ext_space_root(inode)); + err = ext3_ext_dirty(handle, inode, path); + } + } +out: + ext3_ext_tree_changed(inode); + ext3_ext_drop_refs(path); + kfree(path); + ext3_journal_stop(handle); + + return err; +} + +/* + * called at mount time + */ +void ext3_ext_init(struct super_block *sb) +{ + /* + * possible initialization would be here + */ + + if (test_opt(sb, EXTENTS)) { + printk("EXT3-fs: file extents enabled"); +#ifdef AGRESSIVE_TEST + printk(", agressive tests"); +#endif +#ifdef CHECK_BINSEARCH + printk(", check binsearch"); +#endif +#ifdef EXTENTS_STATS + printk(", stats"); +#endif + printk("\n"); +#ifdef EXTENTS_STATS + spin_lock_init(&EXT3_SB(sb)->s_ext_stats_lock); + EXT3_SB(sb)->s_ext_min = 1 << 30; + EXT3_SB(sb)->s_ext_max = 0; +#endif + } +} + +/* + * called at umount time + */ +void ext3_ext_release(struct super_block *sb) +{ + if (!test_opt(sb, EXTENTS)) + return; + +#ifdef EXTENTS_STATS + if (EXT3_SB(sb)->s_ext_blocks && EXT3_SB(sb)->s_ext_extents) { + struct ext3_sb_info *sbi = EXT3_SB(sb); + printk(KERN_ERR "EXT3-fs: %lu blocks in %lu extents (%lu ave)\n", + sbi->s_ext_blocks, sbi->s_ext_extents, + sbi->s_ext_blocks / sbi->s_ext_extents); + printk(KERN_ERR "EXT3-fs: extents: %lu min, %lu max, max depth %lu\n", + sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max); + } +#endif +} + +int ext3_ext_get_blocks(handle_t *handle, struct inode *inode, ext3_fsblk_t iblock, + unsigned long max_blocks, struct buffer_head *bh_result, + int create, int extend_disksize) +{ + struct ext3_ext_path *path = NULL; + struct ext3_extent newex, *ex; + ext3_fsblk_t goal, newblock; + int err = 0, depth; + unsigned long allocated = 0; + + __clear_bit(BH_New, &bh_result->b_state); + ext_debug("blocks %d/%lu requested for inode %u\n", (int) iblock, + max_blocks, (unsigned) inode->i_ino); + mutex_lock(&EXT3_I(inode)->truncate_mutex); + + /* check in cache */ + if ((goal = ext3_ext_in_cache(inode, iblock, &newex))) { + if (goal == EXT3_EXT_CACHE_GAP) { + if (!create) { + /* block isn't allocated yet and + * user don't want to allocate it */ + goto out2; + } + /* we should allocate requested block */ + } else if (goal == EXT3_EXT_CACHE_EXTENT) { + /* block is already allocated */ + newblock = iblock + - le32_to_cpu(newex.ee_block) + + ext_pblock(&newex); + /* number of remain blocks in the extent */ + allocated = le16_to_cpu(newex.ee_len) - + (iblock - le32_to_cpu(newex.ee_block)); + goto out; + } else { + BUG(); + } + } + + /* find extent for this block */ + path = ext3_ext_find_extent(inode, iblock, NULL); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; + goto out2; + } + + depth = ext_depth(inode); + + /* + * consistent leaf must not be empty + * this situations is possible, though, _during_ tree modification + * this is why assert can't be put in ext3_ext_find_extent() + */ + BUG_ON(path[depth].p_ext == NULL && depth != 0); + + if ((ex = path[depth].p_ext)) { + unsigned long ee_block = le32_to_cpu(ex->ee_block); + ext3_fsblk_t ee_start = ext_pblock(ex); + unsigned short ee_len = le16_to_cpu(ex->ee_len); + /* if found exent covers block, simple return it */ + if (iblock >= ee_block && iblock < ee_block + ee_len) { + newblock = iblock - ee_block + ee_start; + /* number of remain blocks in the extent */ + allocated = ee_len - (iblock - ee_block); + ext_debug("%d fit into %lu:%d -> "E3FSBLK"\n", (int) iblock, + ee_block, ee_len, newblock); + ext3_ext_put_in_cache(inode, ee_block, ee_len, + ee_start, EXT3_EXT_CACHE_EXTENT); + goto out; + } + } + + /* + * requested block isn't allocated yet + * we couldn't try to create block if create flag is zero + */ + if (!create) { + /* put just found gap into cache to speedup subsequest reqs */ + ext3_ext_put_gap_in_cache(inode, path, iblock); + goto out2; + } + + /* allocate new block */ + goal = ext3_ext_find_goal(inode, path, iblock); + allocated = max_blocks; + newblock = ext3_new_blocks(handle, inode, goal, &allocated, &err); + if (!newblock) + goto out2; + ext_debug("allocate new block: goal "E3FSBLK", found "E3FSBLK"/%lu\n", + goal, newblock, allocated); + + /* try to insert new extent into found leaf and return */ + newex.ee_block = cpu_to_le32(iblock); + ext3_ext_store_pblock(&newex, newblock); + newex.ee_len = cpu_to_le16(allocated); + err = ext3_ext_insert_extent(handle, inode, path, &newex); + if (err) + goto out2; + + if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) + EXT3_I(inode)->i_disksize = inode->i_size; + + /* previous routine could use block we allocated */ + newblock = ext_pblock(&newex); + __set_bit(BH_New, &bh_result->b_state); + + ext3_ext_put_in_cache(inode, iblock, allocated, newblock, + EXT3_EXT_CACHE_EXTENT); +out: + if (allocated > max_blocks) + allocated = max_blocks; + ext3_ext_show_leaf(inode, path); + __set_bit(BH_Mapped, &bh_result->b_state); + bh_result->b_bdev = inode->i_sb->s_bdev; + bh_result->b_blocknr = newblock; +out2: + if (path) { + ext3_ext_drop_refs(path); + kfree(path); + } + mutex_unlock(&EXT3_I(inode)->truncate_mutex); + + return err ? err : allocated; +} + +void ext3_ext_truncate(struct inode * inode, struct page *page) +{ + struct address_space *mapping = inode->i_mapping; + struct super_block *sb = inode->i_sb; + unsigned long last_block; + handle_t *handle; + int err = 0; + + /* + * probably first extent we're gonna free will be last in block + */ + err = ext3_writepage_trans_blocks(inode) + 3; + handle = ext3_journal_start(inode, err); + if (IS_ERR(handle)) { + if (page) { + clear_highpage(page); + flush_dcache_page(page); + unlock_page(page); + page_cache_release(page); + } + return; + } + + if (page) + ext3_block_truncate_page(handle, page, mapping, inode->i_size); + + mutex_lock(&EXT3_I(inode)->truncate_mutex); + ext3_ext_invalidate_cache(inode); + + /* + * TODO: optimization is possible here + * probably we need not scaning at all, + * because page truncation is enough + */ + if (ext3_orphan_add(handle, inode)) + goto out_stop; + + /* we have to know where to truncate from in crash case */ + EXT3_I(inode)->i_disksize = inode->i_size; + ext3_mark_inode_dirty(handle, inode); + + last_block = (inode->i_size + sb->s_blocksize - 1) + >> EXT3_BLOCK_SIZE_BITS(sb); + err = ext3_ext_remove_space(inode, last_block); + + /* In a multi-transaction truncate, we only make the final + * transaction synchronous */ + if (IS_SYNC(inode)) + handle->h_sync = 1; + +out_stop: + /* + * If this was a simple ftruncate(), and the file will remain alive + * then we need to clear up the orphan record which we created above. + * However, if this was a real unlink then we were called by + * ext3_delete_inode(), and we allow that function to clean up the + * orphan info for us. + */ + if (inode->i_nlink) + ext3_orphan_del(handle, inode); + + mutex_unlock(&EXT3_I(inode)->truncate_mutex); + ext3_journal_stop(handle); +} + +/* + * this routine calculate max number of blocks we could modify + * in order to allocate new block for an inode + */ +int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) +{ + int needed; + + needed = ext3_ext_calc_credits_for_insert(inode, NULL); + + /* caller want to allocate num blocks, but note it includes sb */ + needed = needed * num - (num - 1); + +#ifdef CONFIG_QUOTA + needed += 2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb); +#endif + + return needed; +} + +EXPORT_SYMBOL(ext3_mark_inode_dirty); +EXPORT_SYMBOL(ext3_ext_invalidate_cache); +EXPORT_SYMBOL(ext3_ext_insert_extent); +EXPORT_SYMBOL(ext3_ext_walk_space); +EXPORT_SYMBOL(ext3_ext_find_goal); +EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); + diff -puN fs/ext3/ialloc.c~ext3-2.6.17 fs/ext3/ialloc.c --- linux-2.6.17/fs/ext3/ialloc.c~ext3-2.6.17 2006-06-27 12:57:54.843368854 -0700 +++ linux-2.6.17-ming/fs/ext3/ialloc.c 2006-06-27 12:57:55.064341736 -0700 @@ -23,7 +23,7 @@ #include #include #include - +#include #include #include "xattr.h" @@ -60,12 +60,14 @@ read_inode_bitmap(struct super_block * s if (!desc) goto error_out; - bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); + bh = sb_bread(sb, EXT3_INODE_BITMAP(desc, + ext3_group_first_block_no(sb, block_group))); if (!bh) ext3_error(sb, "read_inode_bitmap", "Cannot read inode bitmap - " - "block_group = %lu, inode_bitmap = %u", - block_group, le32_to_cpu(desc->bg_inode_bitmap)); + "block_group = %lu, inode_bitmap = %llu", + block_group, EXT3_INODE_BITMAP(desc, + ext3_group_first_block_no(sb, block_group))); error_out: return bh; } @@ -262,9 +264,11 @@ static int find_group_orlov(struct super int ngroups = sbi->s_groups_count; int inodes_per_group = EXT3_INODES_PER_GROUP(sb); int freei, avefreei; - int freeb, avefreeb; - int blocks_per_dir, ndirs; - int max_debt, max_dirs, min_blocks, min_inodes; + ext3_fsblk_t freeb, avefreeb; + ext3_fsblk_t blocks_per_dir; + int ndirs; + int max_debt, max_dirs, min_inodes; + ext3_grpblk_t min_blocks; int group = -1, i; struct ext3_group_desc *desc; struct buffer_head *bh; @@ -272,7 +276,8 @@ static int find_group_orlov(struct super freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); avefreei = freei / ngroups; freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); - avefreeb = freeb / ngroups; + avefreeb = freeb; + sector_div(avefreeb, ngroups); ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); if ((parent == sb->s_root->d_inode) || @@ -301,13 +306,15 @@ static int find_group_orlov(struct super goto fallback; } - blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs; + blocks_per_dir = EXT3_BLOCKS_COUNT(es) - freeb; + sector_div(blocks_per_dir, ndirs); max_dirs = ndirs / ngroups + inodes_per_group / 16; min_inodes = avefreei - inodes_per_group / 4; min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4; - max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST); + max_debt = EXT3_BLOCKS_PER_GROUP(sb); + sector_div(max_debt, max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST)); if (max_debt * INODE_COST > inodes_per_group) max_debt = inodes_per_group / INODE_COST; if (max_debt > 255) @@ -614,6 +621,17 @@ got: ext3_std_error(sb, err); goto fail_free_drop; } + if (test_opt(sb, EXTENTS)) { + EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; + ext3_ext_tree_init(handle, inode); + if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { + err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); + if (err) goto fail; + EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); + BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); + err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + } + } ext3_debug("allocating inode %lu\n", inode->i_ino); goto really_out; diff -puN fs/ext3/inode.c~ext3-2.6.17 fs/ext3/inode.c --- linux-2.6.17/fs/ext3/inode.c~ext3-2.6.17 2006-06-27 12:57:54.847368363 -0700 +++ linux-2.6.17-ming/fs/ext3/inode.c 2006-06-27 12:57:55.070341000 -0700 @@ -39,8 +39,6 @@ #include "xattr.h" #include "acl.h" -static int ext3_writepage_trans_blocks(struct inode *inode); - /* * Test whether an inode is a fast symlink. */ @@ -62,7 +60,7 @@ static int ext3_inode_is_fast_symlink(st * still needs to be revoked. */ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, int blocknr) + struct buffer_head *bh, ext3_fsblk_t blocknr) { int err; @@ -407,13 +405,13 @@ no_block: * * Caller must make sure that @ind is valid and will stay that way. */ -static unsigned long ext3_find_near(struct inode *inode, Indirect *ind) +static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind) { struct ext3_inode_info *ei = EXT3_I(inode); __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; __le32 *p; - unsigned long bg_start; - unsigned long colour; + ext3_fsblk_t bg_start; + ext3_grpblk_t colour; /* Try to find previous block */ for (p = ind->p - 1; p >= start; p--) { @@ -429,8 +427,7 @@ static unsigned long ext3_find_near(stru * It is going to be referred to from the inode itself? OK, just put it * into the same cylinder group then. */ - bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + - le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); + bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group); colour = (current->pid % 16) * (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); return bg_start + colour; @@ -448,7 +445,7 @@ static unsigned long ext3_find_near(stru * stores it in *@goal and returns zero. */ -static unsigned long ext3_find_goal(struct inode *inode, long block, +static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block, Indirect chain[4], Indirect *partial) { struct ext3_block_alloc_info *block_i; @@ -516,13 +513,13 @@ static int ext3_blks_to_allocate(Indirec * direct blocks */ static int ext3_alloc_blocks(handle_t *handle, struct inode *inode, - unsigned long goal, int indirect_blks, int blks, - unsigned long long new_blocks[4], int *err) + ext3_fsblk_t goal, int indirect_blks, int blks, + ext3_fsblk_t new_blocks[4], int *err) { int target, i; unsigned long count = 0; int index = 0; - unsigned long current_block = 0; + ext3_fsblk_t current_block = 0; int ret = 0; /* @@ -592,7 +589,7 @@ failed_out: * as described above and return 0. */ static int ext3_alloc_branch(handle_t *handle, struct inode *inode, - int indirect_blks, int *blks, unsigned long goal, + int indirect_blks, int *blks, ext3_fsblk_t goal, int *offsets, Indirect *branch) { int blocksize = inode->i_sb->s_blocksize; @@ -600,8 +597,8 @@ static int ext3_alloc_branch(handle_t *h int err = 0; struct buffer_head *bh; int num; - unsigned long long new_blocks[4]; - unsigned long long current_block; + ext3_fsblk_t new_blocks[4]; + ext3_fsblk_t current_block; num = ext3_alloc_blocks(handle, inode, goal, indirect_blks, *blks, new_blocks, &err); @@ -688,7 +685,7 @@ static int ext3_splice_branch(handle_t * int i; int err = 0; struct ext3_block_alloc_info *block_i; - unsigned long current_block; + ext3_fsblk_t current_block; block_i = EXT3_I(inode)->i_block_alloc_info; /* @@ -795,15 +792,16 @@ int ext3_get_blocks_handle(handle_t *han int offsets[4]; Indirect chain[4]; Indirect *partial; - unsigned long goal; + ext3_fsblk_t goal; int indirect_blks; int blocks_to_boundary = 0; int depth; struct ext3_inode_info *ei = EXT3_I(inode); int count = 0; - unsigned long first_block = 0; + ext3_fsblk_t first_block = 0; + J_ASSERT(!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)); J_ASSERT(handle != NULL || create == 0); depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); @@ -819,7 +817,7 @@ int ext3_get_blocks_handle(handle_t *han count++; /*map more blocks*/ while (count < maxblocks && count <= blocks_to_boundary) { - unsigned long blk; + ext3_fsblk_t blk; if (!verify_chain(chain, partial)) { /* @@ -984,7 +982,7 @@ static int ext3_get_block(struct inode * get_block: if (ret == 0) { - ret = ext3_get_blocks_handle(handle, inode, iblock, + ret = ext3_get_blocks_wrap(handle, inode, iblock, max_blocks, bh_result, create, 0); if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); @@ -1008,7 +1006,7 @@ struct buffer_head *ext3_getblk(handle_t dummy.b_state = 0; dummy.b_blocknr = -1000; buffer_trace_init(&dummy.b_history); - err = ext3_get_blocks_handle(handle, inode, block, 1, + err = ext3_get_blocks_wrap(handle, inode, block, 1, &dummy, create, 1); if (err == 1) { err = 0; @@ -1756,10 +1754,10 @@ void ext3_set_aops(struct inode *inode) * This required during truncate. We need to physically zero the tail end * of that block so it doesn't yield old data if the file is later grown. */ -static int ext3_block_truncate_page(handle_t *handle, struct page *page, +int ext3_block_truncate_page(handle_t *handle, struct page *page, struct address_space *mapping, loff_t from) { - unsigned long index = from >> PAGE_CACHE_SHIFT; + ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned blocksize, iblock, length, pos; struct inode *inode = mapping->host; @@ -1960,7 +1958,7 @@ no_top: * than `count' because there can be holes in there. */ static void ext3_clear_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, unsigned long block_to_free, + struct buffer_head *bh, ext3_fsblk_t block_to_free, unsigned long count, __le32 *first, __le32 *last) { __le32 *p; @@ -2022,12 +2020,12 @@ static void ext3_free_data(handle_t *han struct buffer_head *this_bh, __le32 *first, __le32 *last) { - unsigned long block_to_free = 0; /* Starting block # of a run */ + ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */ unsigned long count = 0; /* Number of blocks in the run */ __le32 *block_to_free_p = NULL; /* Pointer into inode/ind corresponding to block_to_free */ - unsigned long nr; /* Current block # */ + ext3_fsblk_t nr; /* Current block # */ __le32 *p; /* Pointer into inode/ind for current block */ int err; @@ -2089,7 +2087,7 @@ static void ext3_free_branches(handle_t struct buffer_head *parent_bh, __le32 *first, __le32 *last, int depth) { - unsigned long nr; + ext3_fsblk_t nr; __le32 *p; if (is_handle_aborted(handle)) @@ -2113,7 +2111,7 @@ static void ext3_free_branches(handle_t */ if (!bh) { ext3_error(inode->i_sb, "ext3_free_branches", - "Read failure, inode=%ld, block=%ld", + "Read failure, inode=%ld, block="E3FSBLK, inode->i_ino, nr); continue; } @@ -2260,6 +2258,9 @@ void ext3_truncate(struct inode *inode) return; } + if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) + return ext3_ext_truncate(inode, page); + handle = start_transaction(inode); if (IS_ERR(handle)) { if (page) { @@ -2394,11 +2395,12 @@ out_stop: ext3_journal_stop(handle); } -static unsigned long ext3_get_inode_block(struct super_block *sb, +static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, unsigned long ino, struct ext3_iloc *iloc) { unsigned long desc, group_desc, block_group; - unsigned long offset, block; + unsigned long offset; + ext3_fsblk_t block; struct buffer_head *bh; struct ext3_group_desc * gdp; @@ -2431,8 +2433,9 @@ static unsigned long ext3_get_inode_bloc */ offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) * EXT3_INODE_SIZE(sb); - block = le32_to_cpu(gdp[desc].bg_inode_table) + - (offset >> EXT3_BLOCK_SIZE_BITS(sb)); + block = EXT3_INODE_TABLE((gdp+desc), + ext3_group_first_block_no(sb, block_group)) + + (offset >> EXT3_BLOCK_SIZE_BITS(sb)); iloc->block_group = block_group; iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1); @@ -2448,7 +2451,7 @@ static unsigned long ext3_get_inode_bloc static int __ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc, int in_mem) { - unsigned long block; + ext3_fsblk_t block; struct buffer_head *bh; block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc); @@ -2459,7 +2462,8 @@ static int __ext3_get_inode_loc(struct i if (!bh) { ext3_error (inode->i_sb, "ext3_get_inode_loc", "unable to read inode block - " - "inode=%lu, block=%lu", inode->i_ino, block); + "inode=%lu, block="E3FSBLK, + inode->i_ino, block); return -EIO; } if (!buffer_uptodate(bh)) { @@ -2498,7 +2502,9 @@ static int __ext3_get_inode_loc(struct i goto make_io; bitmap_bh = sb_getblk(inode->i_sb, - le32_to_cpu(desc->bg_inode_bitmap)); + EXT3_INODE_BITMAP(desc, + ext3_group_first_block_no(inode->i_sb, + block_group))); if (!bitmap_bh) goto make_io; @@ -2540,7 +2546,7 @@ make_io: if (!buffer_uptodate(bh)) { ext3_error(inode->i_sb, "ext3_get_inode_loc", "unable to read inode block - " - "inode=%lu, block=%lu", + "inode=%lu, block="E3FSBLK, inode->i_ino, block); brelse(bh); return -EIO; @@ -2638,6 +2644,10 @@ void ext3_read_inode(struct inode * inod ei->i_frag_size = raw_inode->i_fsize; #endif ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); + if ((sizeof(sector_t) > 4) && + (EXT3_SB(inode->i_sb)->s_es->s_creator_os != EXT3_OS_HURD)) + ei->i_file_acl |= + ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; if (!S_ISREG(inode->i_mode)) { ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); } else { @@ -2771,6 +2781,10 @@ static int ext3_do_update_inode(handle_t raw_inode->i_frag = ei->i_frag_no; raw_inode->i_fsize = ei->i_frag_size; #endif + if ((sizeof(sector_t) > 4) && + (EXT3_SB(inode->i_sb)->s_es->s_creator_os != EXT3_OS_HURD)) + raw_inode->i_file_acl_high = + cpu_to_le16((__u64)ei->i_file_acl >> 32); raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); if (!S_ISREG(inode->i_mode)) { raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); @@ -3000,12 +3014,15 @@ err_out: * block and work out the exact number of indirects which are touched. Pah. */ -static int ext3_writepage_trans_blocks(struct inode *inode) +int ext3_writepage_trans_blocks(struct inode *inode) { int bpp = ext3_journal_blocks_per_page(inode); int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; int ret; + if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) + return ext3_ext_writepage_trans_blocks(inode, bpp); + if (ext3_should_journal_data(inode)) ret = 3 * (bpp + indirects) + 2; else diff -puN fs/ext3/ioctl.c~ext3-2.6.17 fs/ext3/ioctl.c --- linux-2.6.17/fs/ext3/ioctl.c~ext3-2.6.17 2006-06-27 12:57:54.850367995 -0700 +++ linux-2.6.17-ming/fs/ext3/ioctl.c 2006-06-27 12:57:55.078340019 -0700 @@ -204,7 +204,7 @@ flags_err: return 0; } case EXT3_IOC_GROUP_EXTEND: { - unsigned long n_blocks_count; + ext3_fsblk_t n_blocks_count; struct super_block *sb = inode->i_sb; int err; @@ -247,7 +247,6 @@ flags_err: return err; } - default: return -ENOTTY; } diff -puN fs/ext3/Makefile~ext3-2.6.17 fs/ext3/Makefile --- linux-2.6.17/fs/ext3/Makefile~ext3-2.6.17 2006-06-27 12:57:54.853367627 -0700 +++ linux-2.6.17-ming/fs/ext3/Makefile 2006-06-27 12:57:55.087338914 -0700 @@ -5,7 +5,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o + ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o diff -puN fs/ext3/resize.c~ext3-2.6.17 fs/ext3/resize.c --- linux-2.6.17/fs/ext3/resize.c~ext3-2.6.17 2006-06-27 12:57:54.856367258 -0700 +++ linux-2.6.17-ming/fs/ext3/resize.c 2006-06-27 12:57:55.056342718 -0700 @@ -15,7 +15,6 @@ #include #include #include - #include #include @@ -28,16 +27,16 @@ static int verify_group_input(struct sup { struct ext3_sb_info *sbi = EXT3_SB(sb); struct ext3_super_block *es = sbi->s_es; - unsigned start = le32_to_cpu(es->s_blocks_count); - unsigned end = start + input->blocks_count; + ext3_fsblk_t start = EXT3_BLOCKS_COUNT(es); + ext3_fsblk_t end = start + input->blocks_count; unsigned group = input->group; - unsigned itend = input->inode_table + sbi->s_itb_per_group; + ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; unsigned overhead = ext3_bg_has_super(sb, group) ? (1 + ext3_bg_num_gdb(sb, group) + le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; - unsigned metaend = start + overhead; + ext3_fsblk_t metaend = start + overhead; struct buffer_head *bh = NULL; - int free_blocks_count; + ext3_grpblk_t free_blocks_count, offset; int err = -EINVAL; input->free_blocks_count = free_blocks_count = @@ -50,13 +49,13 @@ static int verify_group_input(struct sup "no-super", input->group, input->blocks_count, free_blocks_count, input->reserved_blocks); + ext3_get_group_no_and_offset(sb, start, NULL, &offset); if (group != sbi->s_groups_count) ext3_warning(sb, __FUNCTION__, "Cannot add at group %u (only %lu groups)", input->group, sbi->s_groups_count); - else if ((start - le32_to_cpu(es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb)) - ext3_warning(sb, __FUNCTION__, "Last group not full"); + else if (offset != 0) + ext3_warning(sb, __FUNCTION__, "Last group not full"); else if (input->reserved_blocks > input->blocks_count / 5) ext3_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", input->reserved_blocks); @@ -64,7 +63,8 @@ static int verify_group_input(struct sup ext3_warning(sb, __FUNCTION__, "Bad blocks count %u", input->blocks_count); else if (!(bh = sb_bread(sb, end - 1))) - ext3_warning(sb, __FUNCTION__, "Cannot read last block (%u)", + ext3_warning(sb, __FUNCTION__, + "Cannot read last block ("E3FSBLK")", end - 1); else if (outside(input->block_bitmap, start, end)) ext3_warning(sb, __FUNCTION__, @@ -77,7 +77,7 @@ static int verify_group_input(struct sup else if (outside(input->inode_table, start, end) || outside(itend - 1, start, end)) ext3_warning(sb, __FUNCTION__, - "Inode table not in group (blocks %u-%u)", + "Inode table not in group (blocks %u-"E3FSBLK")", input->inode_table, itend - 1); else if (input->inode_bitmap == input->block_bitmap) ext3_warning(sb, __FUNCTION__, @@ -85,24 +85,27 @@ static int verify_group_input(struct sup input->block_bitmap); else if (inside(input->block_bitmap, input->inode_table, itend)) ext3_warning(sb, __FUNCTION__, - "Block bitmap (%u) in inode table (%u-%u)", + "Block bitmap (%u) in inode table (%u-"E3FSBLK")", input->block_bitmap, input->inode_table, itend-1); else if (inside(input->inode_bitmap, input->inode_table, itend)) ext3_warning(sb, __FUNCTION__, - "Inode bitmap (%u) in inode table (%u-%u)", + "Inode bitmap (%u) in inode table (%u-"E3FSBLK")", input->inode_bitmap, input->inode_table, itend-1); else if (inside(input->block_bitmap, start, metaend)) ext3_warning(sb, __FUNCTION__, - "Block bitmap (%u) in GDT table (%u-%u)", + "Block bitmap (%u) in GDT table" + " ("E3FSBLK"-"E3FSBLK")", input->block_bitmap, start, metaend - 1); else if (inside(input->inode_bitmap, start, metaend)) ext3_warning(sb, __FUNCTION__, - "Inode bitmap (%u) in GDT table (%u-%u)", + "Inode bitmap (%u) in GDT table" + " ("E3FSBLK"-"E3FSBLK")", input->inode_bitmap, start, metaend - 1); else if (inside(input->inode_table, start, metaend) || inside(itend - 1, start, metaend)) ext3_warning(sb, __FUNCTION__, - "Inode table (%u-%u) overlaps GDT table (%u-%u)", + "Inode table (%u-"E3FSBLK") overlaps" + "GDT table ("E3FSBLK"-"E3FSBLK")", input->inode_table, itend - 1, start, metaend - 1); else err = 0; @@ -112,7 +115,7 @@ static int verify_group_input(struct sup } static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, - unsigned long blk) + ext3_fsblk_t blk) { struct buffer_head *bh; int err; @@ -163,15 +166,14 @@ static int setup_new_group_blocks(struct struct ext3_new_group_data *input) { struct ext3_sb_info *sbi = EXT3_SB(sb); - unsigned long start = input->group * sbi->s_blocks_per_group + - le32_to_cpu(sbi->s_es->s_first_data_block); + ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group); int reserved_gdb = ext3_bg_has_super(sb, input->group) ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group); struct buffer_head *bh; handle_t *handle; - unsigned long block; - int bit; + ext3_fsblk_t block; + ext3_grpblk_t bit; int i; int err = 0, err2; @@ -328,7 +330,7 @@ static unsigned ext3_list_backups(struct static int verify_reserved_gdb(struct super_block *sb, struct buffer_head *primary) { - const unsigned long blk = primary->b_blocknr; + const ext3_fsblk_t blk = primary->b_blocknr; const unsigned long end = EXT3_SB(sb)->s_groups_count; unsigned three = 1; unsigned five = 5; @@ -340,7 +342,8 @@ static int verify_reserved_gdb(struct su while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ ext3_warning(sb, __FUNCTION__, - "reserved GDT %ld missing grp %d (%ld)", + "reserved GDT "E3FSBLK + " missing grp %d ("E3FSBLK")", blk, grp, grp * EXT3_BLOCKS_PER_GROUP(sb) + blk); return -EINVAL; @@ -372,7 +375,7 @@ static int add_new_gdb(handle_t *handle, struct super_block *sb = inode->i_sb; struct ext3_super_block *es = EXT3_SB(sb)->s_es; unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb); - unsigned long gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; + ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; struct buffer_head **o_group_desc, **n_group_desc; struct buffer_head *dind; int gdbackups; @@ -417,7 +420,7 @@ static int add_new_gdb(handle_t *handle, data = (__u32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { ext3_warning(sb, __FUNCTION__, - "new group %u GDT block %lu not reserved", + "new group %u GDT block "E3FSBLK" not reserved", input->group, gdblock); err = -EINVAL; goto exit_dind; @@ -515,7 +518,7 @@ static int reserve_backup_gdb(handle_t * struct buffer_head **primary; struct buffer_head *dind; struct ext3_iloc iloc; - unsigned long blk; + ext3_fsblk_t blk; __u32 *data, *end; int gdbackups = 0; int res, i; @@ -540,7 +543,8 @@ static int reserve_backup_gdb(handle_t * for (res = 0; res < reserved_gdb; res++, blk++) { if (le32_to_cpu(*data) != blk) { ext3_warning(sb, __FUNCTION__, - "reserved block %lu not at offset %ld", + "reserved block "E3FSBLK + " not at offset %ld", blk, (long)(data - (__u32 *)dind->b_data)); err = -EINVAL; goto exit_bh; @@ -813,9 +817,12 @@ int ext3_group_add(struct super_block *s /* Update group descriptor block for new group */ gdp = (struct ext3_group_desc *)primary->b_data + gdb_off; - gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap); - gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap); - gdp->bg_inode_table = cpu_to_le32(input->inode_table); + EXT3_BLOCK_BITMAP_SET(gdp, ext3_group_first_block_no(sb, gdb_num), + input->block_bitmap); /* LV FIXME */ + EXT3_INODE_BITMAP_SET(gdp, ext3_group_first_block_no(sb, gdb_num), + input->inode_bitmap); /* LV FIXME */ + EXT3_INODE_TABLE_SET(gdp, ext3_group_first_block_no(sb, gdb_num), + input->inode_table); /* LV FIXME */ gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb)); @@ -829,7 +836,7 @@ int ext3_group_add(struct super_block *s * blocks/inodes before the group is live won't actually let us * allocate the new space yet. */ - es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) + + EXT3_BLOCKS_COUNT_SET(es, EXT3_BLOCKS_COUNT(es) + input->blocks_count); es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb)); @@ -865,7 +872,7 @@ int ext3_group_add(struct super_block *s /* Update the reserved block counts only once the new group is * active. */ - es->s_r_blocks_count = cpu_to_le32(le32_to_cpu(es->s_r_blocks_count) + + EXT3_R_BLOCKS_COUNT_SET(es, EXT3_R_BLOCKS_COUNT(es) + input->reserved_blocks); /* Update the free space counts */ @@ -902,29 +909,40 @@ exit_put: * GDT blocks are reserved to grow to the desired size. */ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, - unsigned long n_blocks_count) + ext3_fsblk_t n_blocks_count) { - unsigned long o_blocks_count; + ext3_fsblk_t o_blocks_count; unsigned long o_groups_count; - unsigned long last; - int add; + ext3_grpblk_t last; + ext3_grpblk_t add; struct buffer_head * bh; handle_t *handle; - int err, freed_blocks; + int err; + unsigned long freed_blocks; /* We don't need to worry about locking wrt other resizers just * yet: we're going to revalidate es->s_blocks_count after * taking lock_super() below. */ - o_blocks_count = le32_to_cpu(es->s_blocks_count); + o_blocks_count = EXT3_BLOCKS_COUNT(es); o_groups_count = EXT3_SB(sb)->s_groups_count; if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT3-fs: extending last group from %lu to %lu blocks\n", + printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n", o_blocks_count, n_blocks_count); if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) return 0; + if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { + printk(KERN_ERR "EXT3-fs: filesystem on %s" + " too large to resize to "E3FSBLK" blocks safely\n", + sb->s_id, n_blocks_count); + if (sizeof(sector_t) < 8) + ext3_warning(sb, __FUNCTION__, + "CONFIG_LBD not enabled\n"); + return -EINVAL; + } + if (n_blocks_count < o_blocks_count) { ext3_warning(sb, __FUNCTION__, "can't shrink FS - resize aborted"); @@ -932,8 +950,7 @@ int ext3_group_extend(struct super_block } /* Handle the remaining blocks in the last group only. */ - last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb); + ext3_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); if (last == 0) { ext3_warning(sb, __FUNCTION__, @@ -948,7 +965,8 @@ int ext3_group_extend(struct super_block if (o_blocks_count + add < n_blocks_count) ext3_warning(sb, __FUNCTION__, - "will only finish group (%lu blocks, %u new)", + "will only finish group ("E3FSBLK + " blocks, %u new)", o_blocks_count + add, add); /* See if the device is actually as big as what was requested */ @@ -971,7 +989,7 @@ int ext3_group_extend(struct super_block } lock_super(sb); - if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { + if (o_blocks_count != EXT3_BLOCKS_COUNT(es)) { ext3_warning(sb, __FUNCTION__, "multiple resizers run on filesystem!"); unlock_super(sb); @@ -987,20 +1005,20 @@ int ext3_group_extend(struct super_block ext3_journal_stop(handle); goto exit_put; } - es->s_blocks_count = cpu_to_le32(o_blocks_count + add); + EXT3_BLOCKS_COUNT_SET(es, o_blocks_count + add); ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); sb->s_dirt = 1; unlock_super(sb); - ext3_debug("freeing blocks %ld through %ld\n", o_blocks_count, + ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, o_blocks_count + add); ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); - ext3_debug("freed blocks %ld through %ld\n", o_blocks_count, + ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count, o_blocks_count + add); if ((err = ext3_journal_stop(handle))) goto exit_put; if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n", - le32_to_cpu(es->s_blocks_count)); + printk(KERN_DEBUG "EXT3-fs: extended group to %llu blocks\n", + EXT3_BLOCKS_COUNT(es)); update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es, sizeof(struct ext3_super_block)); exit_put: diff -puN fs/ext3/super.c~ext3-2.6.17 fs/ext3/super.c --- linux-2.6.17/fs/ext3/super.c~ext3-2.6.17 2006-06-27 12:57:54.860366768 -0700 +++ linux-2.6.17-ming/fs/ext3/super.c 2006-06-27 12:57:55.049343577 -0700 @@ -390,6 +390,7 @@ static void ext3_put_super (struct super struct ext3_super_block *es = sbi->s_es; int i; + ext3_ext_release(sb); ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { @@ -454,6 +455,7 @@ static struct inode *ext3_alloc_inode(st #endif ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; + memset(&ei->i_cached_extent, 0, sizeof(struct ext3_ext_cache)); return &ei->vfs_inode; } @@ -635,7 +637,7 @@ enum { Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_grpquota + Opt_grpquota, Opt_extents, }; static match_table_t tokens = { @@ -684,18 +686,20 @@ static match_table_t tokens = { {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, + {Opt_extents, "extents"}, {Opt_err, NULL}, {Opt_resize, "resize"}, }; -static unsigned long get_sb_block(void **data) +static ext3_fsblk_t get_sb_block(void **data) { - unsigned long sb_block; + ext3_fsblk_t sb_block; char *options = (char *) *data; if (!options || strncmp(options, "sb=", 3) != 0) return 1; /* Default location */ options += 3; + /*todo: use simple_strtoll with >32bit ext3 */ sb_block = simple_strtoul(options, &options, 0); if (*options && *options != ',') { printk("EXT3-fs: Invalid sb specification: %s\n", @@ -710,7 +714,7 @@ static unsigned long get_sb_block(void * static int parse_options (char *options, struct super_block *sb, unsigned long *inum, unsigned long *journal_devnum, - unsigned long *n_blocks_count, int is_remount) + ext3_fsblk_t *n_blocks_count, int is_remount) { struct ext3_sb_info *sbi = EXT3_SB(sb); char * p; @@ -1012,6 +1016,9 @@ clear_qf_name: case Opt_nobh: set_opt(sbi->s_mount_opt, NOBH); break; + case Opt_extents: + set_opt (sbi->s_mount_opt, EXTENTS); + break; default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " @@ -1127,7 +1134,7 @@ static int ext3_setup_super(struct super static int ext3_check_descriptors (struct super_block * sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); - unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block); + ext3_fsblk_t block = le32_to_cpu(sbi->s_es->s_first_data_block); struct ext3_group_desc * gdp = NULL; int desc_block = 0; int i; @@ -1139,44 +1146,48 @@ static int ext3_check_descriptors (struc if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0) gdp = (struct ext3_group_desc *) sbi->s_group_desc[desc_block++]->b_data; - if (le32_to_cpu(gdp->bg_block_bitmap) < block || - le32_to_cpu(gdp->bg_block_bitmap) >= + if (EXT3_BLOCK_BITMAP(gdp, ext3_group_first_block_no(sb, i)) < + block || + EXT3_BLOCK_BITMAP(gdp, ext3_group_first_block_no(sb, i)) >= block + EXT3_BLOCKS_PER_GROUP(sb)) { ext3_error (sb, "ext3_check_descriptors", "Block bitmap for group %d" " not in group (block %lu)!", i, (unsigned long) - le32_to_cpu(gdp->bg_block_bitmap)); + EXT3_BLOCK_BITMAP(gdp, ext3_group_first_block_no(sb, i))); return 0; } - if (le32_to_cpu(gdp->bg_inode_bitmap) < block || - le32_to_cpu(gdp->bg_inode_bitmap) >= + if (EXT3_INODE_BITMAP(gdp, ext3_group_first_block_no(sb, i)) < + block || + EXT3_INODE_BITMAP(gdp, ext3_group_first_block_no(sb, i)) >= block + EXT3_BLOCKS_PER_GROUP(sb)) { ext3_error (sb, "ext3_check_descriptors", "Inode bitmap for group %d" " not in group (block %lu)!", i, (unsigned long) - le32_to_cpu(gdp->bg_inode_bitmap)); + EXT3_INODE_BITMAP(gdp, ext3_group_first_block_no(sb, i))); return 0; } - if (le32_to_cpu(gdp->bg_inode_table) < block || - le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >= - block + EXT3_BLOCKS_PER_GROUP(sb)) + if (EXT3_INODE_TABLE(gdp, ext3_group_first_block_no(sb, i)) < + block || + EXT3_INODE_TABLE(gdp, ext3_group_first_block_no(sb, i)) + + sbi->s_itb_per_group >= + block + EXT3_BLOCKS_PER_GROUP(sb)) { ext3_error (sb, "ext3_check_descriptors", "Inode table for group %d" " not in group (block %lu)!", i, (unsigned long) - le32_to_cpu(gdp->bg_inode_table)); + EXT3_INODE_TABLE(gdp, ext3_group_first_block_no(sb, i))); return 0; } block += EXT3_BLOCKS_PER_GROUP(sb); gdp++; } - sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb)); + EXT3_FREE_BLOCKS_COUNT_SET(sbi->s_es, ext3_count_free_blocks(sb)); sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb)); return 1; } @@ -1314,15 +1325,14 @@ static loff_t ext3_max_size(int bits) return res; } -static unsigned long descriptor_loc(struct super_block *sb, - unsigned long logic_sb_block, +static ext3_fsblk_t descriptor_loc(struct super_block *sb, + ext3_fsblk_t logic_sb_block, int nr) { struct ext3_sb_info *sbi = EXT3_SB(sb); - unsigned long bg, first_data_block, first_meta_bg; + unsigned long bg, first_meta_bg; int has_super = 0; - first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block); first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || @@ -1331,7 +1341,7 @@ static unsigned long descriptor_loc(stru bg = sbi->s_desc_per_block * nr; if (ext3_bg_has_super(sb, bg)) has_super = 1; - return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); + return (has_super + ext3_group_first_block_no(sb, bg)); } @@ -1340,9 +1350,9 @@ static int ext3_fill_super (struct super struct buffer_head * bh; struct ext3_super_block *es = NULL; struct ext3_sb_info *sbi; - unsigned long block; - unsigned long sb_block = get_sb_block(&data); - unsigned long logic_sb_block; + ext3_fsblk_t block; + ext3_fsblk_t sb_block = get_sb_block(&data); + ext3_fsblk_t logic_sb_block; unsigned long offset = 0; unsigned long journal_inum = 0; unsigned long journal_devnum = 0; @@ -1354,6 +1364,7 @@ static int ext3_fill_super (struct super int i; int needs_recovery; __le32 features; + __u64 blocks_count; sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) @@ -1377,8 +1388,8 @@ static int ext3_fill_super (struct super * block sizes. We need to calculate the offset from buffer start. */ if (blocksize != EXT3_MIN_BLOCK_SIZE) { - logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; - offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; + logic_sb_block = sb_block * EXT3_MIN_BLOCK_SIZE; + offset = sector_div(logic_sb_block, blocksize); } else { logic_sb_block = sb_block; } @@ -1483,8 +1494,8 @@ static int ext3_fill_super (struct super brelse (bh); sb_set_blocksize(sb, blocksize); - logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; - offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; + logic_sb_block = sb_block * EXT3_MIN_BLOCK_SIZE; + offset = sector_div(logic_sb_block, blocksize); bh = sb_bread(sb, logic_sb_block); if (!bh) { printk(KERN_ERR @@ -1566,10 +1577,11 @@ static int ext3_fill_super (struct super if (EXT3_BLOCKS_PER_GROUP(sb) == 0) goto cantfind_ext3; - sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - - le32_to_cpu(es->s_first_data_block) + - EXT3_BLOCKS_PER_GROUP(sb) - 1) / - EXT3_BLOCKS_PER_GROUP(sb); + blocks_count = (EXT3_BLOCKS_COUNT(es) - + le32_to_cpu(es->s_first_data_block) + + EXT3_BLOCKS_PER_GROUP(sb) - 1); + do_div(blocks_count, EXT3_BLOCKS_PER_GROUP(sb)); + sbi->s_groups_count = blocks_count; db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) / EXT3_DESC_PER_BLOCK(sb); sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), @@ -1579,9 +1591,16 @@ static int ext3_fill_super (struct super goto failed_mount; } - percpu_counter_init(&sbi->s_freeblocks_counter); - percpu_counter_init(&sbi->s_freeinodes_counter); - percpu_counter_init(&sbi->s_dirs_counter); + if (EXT3_BLOCKS_COUNT(es) > + (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { + printk(KERN_ERR "EXT3-fs: filesystem on %s" + " too large to mount safely\n", sb->s_id); + if (sizeof(sector_t) < 8) + printk(KERN_WARNING + "EXT3-fs: CONFIG_LBD not enabled\n"); + goto failed_mount; + } + bgl_lock_init(&sbi->s_blockgroup_lock); for (i = 0; i < db_count; i++) { @@ -1596,11 +1615,20 @@ static int ext3_fill_super (struct super } if (!ext3_check_descriptors (sb)) { printk (KERN_ERR "EXT3-fs: group descriptors corrupted !\n"); + goto failed_mount2; } sbi->s_gdb_count = db_count; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); + + percpu_counter_init(&sbi->s_freeblocks_counter, + ext3_count_free_blocks(sb)); + percpu_counter_init(&sbi->s_freeinodes_counter, + ext3_count_free_inodes(sb)); + percpu_counter_init(&sbi->s_dirs_counter, + ext3_count_dirs(sb)); + /* per fileystem reservation list head & lock */ spin_lock_init(&sbi->s_rsv_window_lock); sbi->s_rsv_window_root = RB_ROOT; @@ -1639,16 +1667,16 @@ static int ext3_fill_super (struct super if (!test_opt(sb, NOLOAD) && EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { if (ext3_load_journal(sb, es, journal_devnum)) - goto failed_mount2; + goto failed_mount3; } else if (journal_inum) { if (ext3_create_journal(sb, es, journal_inum)) - goto failed_mount2; + goto failed_mount3; } else { if (!silent) printk (KERN_ERR "ext3: No journal on filesystem on %s\n", sb->s_id); - goto failed_mount2; + goto failed_mount3; } /* We have now updated the journal if required, so we can @@ -1671,7 +1699,7 @@ static int ext3_fill_super (struct super (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { printk(KERN_ERR "EXT3-fs: Journal does not support " "requested data journaling mode\n"); - goto failed_mount3; + goto failed_mount4; } default: break; @@ -1694,13 +1722,13 @@ static int ext3_fill_super (struct super if (!sb->s_root) { printk(KERN_ERR "EXT3-fs: get root inode failed\n"); iput(root); - goto failed_mount3; + goto failed_mount4; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { dput(sb->s_root); sb->s_root = NULL; printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n"); - goto failed_mount3; + goto failed_mount4; } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -1723,12 +1751,7 @@ static int ext3_fill_super (struct super test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": "writeback"); - percpu_counter_mod(&sbi->s_freeblocks_counter, - ext3_count_free_blocks(sb)); - percpu_counter_mod(&sbi->s_freeinodes_counter, - ext3_count_free_inodes(sb)); - percpu_counter_mod(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); + ext3_ext_init(sb); lock_kernel(); return 0; @@ -1739,8 +1762,12 @@ cantfind_ext3: sb->s_id); goto failed_mount; -failed_mount3: +failed_mount4: journal_destroy(sbi->s_journal); +failed_mount3: + percpu_counter_destroy(&sbi->s_freeblocks_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); + percpu_counter_destroy(&sbi->s_dirs_counter); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); @@ -1827,10 +1854,10 @@ static journal_t *ext3_get_dev_journal(s { struct buffer_head * bh; journal_t *journal; - int start; - int len; + ext3_fsblk_t start; + ext3_fsblk_t len; int hblock, blocksize; - unsigned long sb_block; + ext3_fsblk_t sb_block; unsigned long offset; struct ext3_super_block * es; struct block_device *bdev; @@ -1879,7 +1906,7 @@ static journal_t *ext3_get_dev_journal(s goto out_bdev; } - len = le32_to_cpu(es->s_blocks_count); + len = EXT3_BLOCKS_COUNT(es); start = sb_block + 1; brelse(bh); /* we're done with the superblock */ @@ -2049,7 +2076,7 @@ static void ext3_commit_super (struct su if (!sbh) return; es->s_wtime = cpu_to_le32(get_seconds()); - es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb)); + EXT3_FREE_BLOCKS_COUNT_SET(es, ext3_count_free_blocks(sb)); es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb)); BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); @@ -2203,7 +2230,7 @@ static int ext3_remount (struct super_bl { struct ext3_super_block * es; struct ext3_sb_info *sbi = EXT3_SB(sb); - unsigned long n_blocks_count = 0; + ext3_fsblk_t n_blocks_count = 0; unsigned long old_sb_flags; struct ext3_mount_options old_opts; int err; @@ -2242,7 +2269,7 @@ static int ext3_remount (struct super_bl ext3_init_journal_params(sb, sbi->s_journal); if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || - n_blocks_count > le32_to_cpu(es->s_blocks_count)) { + n_blocks_count > EXT3_BLOCKS_COUNT(es)) { if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) { err = -EROFS; goto restore_opts; @@ -2322,7 +2349,7 @@ static int ext3_statfs (struct super_blo { struct ext3_sb_info *sbi = EXT3_SB(sb); struct ext3_super_block *es = sbi->s_es; - unsigned long overhead; + ext3_fsblk_t overhead; int i; if (test_opt (sb, MINIX_DF)) @@ -2362,10 +2389,10 @@ static int ext3_statfs (struct super_blo buf->f_type = EXT3_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; - buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; + buf->f_blocks = EXT3_BLOCKS_COUNT(es) - overhead; buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); - buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); - if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) + buf->f_bavail = buf->f_bfree - EXT3_R_BLOCKS_COUNT(es); + if (buf->f_bfree < EXT3_R_BLOCKS_COUNT(es)) buf->f_bavail = 0; buf->f_files = le32_to_cpu(es->s_inodes_count); buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); diff -puN fs/ext3/xattr.c~ext3-2.6.17 fs/ext3/xattr.c --- linux-2.6.17/fs/ext3/xattr.c~ext3-2.6.17 2006-06-27 12:57:54.863366399 -0700 +++ linux-2.6.17-ming/fs/ext3/xattr.c 2006-06-27 12:57:55.073340632 -0700 @@ -225,7 +225,7 @@ ext3_xattr_block_get(struct inode *inode error = -ENODATA; if (!EXT3_I(inode)->i_file_acl) goto cleanup; - ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); + ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); if (!bh) goto cleanup; @@ -233,7 +233,7 @@ ext3_xattr_block_get(struct inode *inode atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext3_xattr_check_block(bh)) { bad_block: ext3_error(inode->i_sb, __FUNCTION__, - "inode %ld: bad block %d", inode->i_ino, + "inode %ld: bad block "E3FSBLK, inode->i_ino, EXT3_I(inode)->i_file_acl); error = -EIO; goto cleanup; @@ -366,7 +366,7 @@ ext3_xattr_block_lis