64 bit JBS changes originally from Zach Brown. --- linux-2.6.16-ming/fs/jbd/commit.c | 9 +++--- linux-2.6.16-ming/fs/jbd/journal.c | 51 +++++++++++++++++++++++++++------- linux-2.6.16-ming/fs/jbd/recovery.c | 40 ++++++++++++++++---------- linux-2.6.16-ming/fs/jbd/revoke.c | 18 +++++++++--- linux-2.6.16-ming/include/linux/jbd.h | 28 ++++++++++++++++-- 5 files changed, 108 insertions(+), 38 deletions(-) diff -puN include/linux/jbd.h~64bit-jbd include/linux/jbd.h --- linux-2.6.16/include/linux/jbd.h~64bit-jbd 2006-05-08 11:04:26.766462954 -0700 +++ linux-2.6.16-ming/include/linux/jbd.h 2006-05-08 11:04:26.783460985 -0700 @@ -144,22 +144,32 @@ typedef struct journal_header_s /* - * The block tag: used to describe a single buffer in the journal + * The block tag: used to describe a single buffer in the journal. + * t_blocknr_high is only used if INCOMPAT_64BIT is set so this raw struct + * shouldn't be used for pointer math or sizeof(). journal_bytes(journal) + * is provided. */ typedef struct journal_block_tag_s { __be32 t_blocknr; /* The on-disk block number */ __be32 t_flags; /* See below */ + __be32 t_blocknr_high; /* most-significant high 32bits. */ } journal_block_tag_t; /* * The revoke descriptor: used on disk to describe a series of blocks to - * be revoked from the log + * be revoked from the log. The width of the block numbers id determined + * by INCOMPAT_64BIT. It just so happens that header+count is 64 bit + * aligned so the 64 block array ends up being aligned. */ typedef struct journal_revoke_header_s { journal_header_t r_header; __be32 r_count; /* Count of bytes used in the block */ + union { + __be32 r_blocks32[0]; + __be64 r_blocks64[0]; + }; } journal_revoke_header_t; @@ -211,7 +221,11 @@ typedef struct journal_superblock_s __be32 s_max_trans_data; /* Limit of data blocks per trans. */ /* 0x0050 */ - __u32 s_padding[44]; + __u32 s_first_high; /* MS 32 bits of a 64bit s_first */ + __u32 s_start_high; /* MS 32 bits of a 64bit s_start */ + +/* 0x0058 */ + __u32 s_padding[42]; /* 0x0100 */ __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ @@ -229,11 +243,13 @@ typedef struct journal_superblock_s ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) #define JFS_FEATURE_INCOMPAT_REVOKE 0x00000001 +#define JFS_FEATURE_INCOMPAT_64BIT 0x00000002 /* Features known to this kernel version: */ #define JFS_KNOWN_COMPAT_FEATURES 0 #define JFS_KNOWN_ROCOMPAT_FEATURES 0 -#define JFS_KNOWN_INCOMPAT_FEATURES JFS_FEATURE_INCOMPAT_REVOKE +#define JFS_KNOWN_INCOMPAT_FEATURES (JFS_FEATURE_INCOMPAT_REVOKE | \ + JFS_FEATURE_INCOMPAT_64BIT) #ifdef __KERNEL__ @@ -1041,6 +1057,10 @@ static inline int tid_geq(tid_t x, tid_t } extern int journal_blocks_per_page(struct inode *inode); +extern size_t journal_tag_bytes(journal_t *journal); +extern u64 read_split_be64(journal_t *journal, __be32 *high, __be32 *low); +extern void write_split_be64(journal_t *journal, __be32 *high, __be32 *low, + u64 val); /* * Return the minimum number of blocks which must be free in the journal diff -puN fs/jbd/commit.c~64bit-jbd fs/jbd/commit.c --- linux-2.6.16/fs/jbd/commit.c~64bit-jbd 2006-05-08 11:04:26.769462606 -0700 +++ linux-2.6.16-ming/fs/jbd/commit.c 2006-05-08 11:04:26.785460753 -0700 @@ -553,10 +553,11 @@ write_out_data: tag_flag |= JFS_FLAG_SAME_UUID; tag = (journal_block_tag_t *) tagp; - tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr); + write_split_be64(journal, &tag->t_blocknr_high, + &tag->t_blocknr, jh2bh(jh)->b_blocknr); tag->t_flags = cpu_to_be32(tag_flag); - tagp += sizeof(journal_block_tag_t); - space_left -= sizeof(journal_block_tag_t); + tagp += journal_tag_bytes(journal); + space_left -= journal_tag_bytes(journal); if (first_tag) { memcpy (tagp, journal->j_uuid, 16); @@ -570,7 +571,7 @@ write_out_data: if (bufs == journal->j_wbufsize || commit_transaction->t_buffers == NULL || - space_left < sizeof(journal_block_tag_t) + 16) { + space_left < journal_tag_bytes(journal) + 16) { jbd_debug(4, "JBD: Submit %d IOs\n", bufs); diff -puN fs/jbd/journal.c~64bit-jbd fs/jbd/journal.c --- linux-2.6.16/fs/jbd/journal.c~64bit-jbd 2006-05-08 11:04:26.773462143 -0700 +++ linux-2.6.16-ming/fs/jbd/journal.c 2006-05-08 11:04:26.788460406 -0700 @@ -821,7 +821,7 @@ static int journal_reset(journal_t *jour journal_superblock_t *sb = journal->j_superblock; sector_t first, last; - first = be32_to_cpu(sb->s_first); + first = read_split_be64(journal, &sb->s_first_high, &sb->s_first); last = be32_to_cpu(sb->s_maxlen); journal->j_first = first; @@ -904,7 +904,7 @@ int journal_create(journal_t *journal) sb->s_blocksize = cpu_to_be32(journal->j_blocksize); sb->s_maxlen = cpu_to_be32(journal->j_maxlen); - sb->s_first = cpu_to_be32(1); + write_split_be64(journal, &sb->s_first_high, &sb->s_first, 1); journal->j_transaction_sequence = 1; @@ -927,6 +927,8 @@ void journal_update_superblock(journal_t journal_superblock_t *sb = journal->j_superblock; struct buffer_head *bh = journal->j_sb_buffer; + spin_lock(&journal->j_state_lock); + /* * As a special case, if the on-disk copy is already marked as needing * no recovery (s_start == 0) and there are no outstanding transactions @@ -934,8 +936,8 @@ void journal_update_superblock(journal_t * until the next commit by setting JFS_FLUSHED. This avoids * attempting a write to a potential-readonly device. */ - if (sb->s_start == 0 && journal->j_tail_sequence == - journal->j_transaction_sequence) { + if (read_split_be64(journal, &sb->s_start_high, &sb->s_start) == 0 && + journal->j_tail_sequence == journal->j_transaction_sequence) { jbd_debug(1,"JBD: Skipping superblock update on recovered sb " "(start %ld, seq %d, errno %d)\n", journal->j_tail, journal->j_tail_sequence, @@ -943,12 +945,12 @@ void journal_update_superblock(journal_t goto out; } - spin_lock(&journal->j_state_lock); jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); - sb->s_start = cpu_to_be32(journal->j_tail); + write_split_be64(journal, &sb->s_start_high, &sb->s_start, + journal->j_tail); sb->s_errno = cpu_to_be32(journal->j_errno); spin_unlock(&journal->j_state_lock); @@ -958,14 +960,14 @@ void journal_update_superblock(journal_t sync_dirty_buffer(bh); else ll_rw_block(SWRITE, 1, &bh); + spin_lock(&journal->j_state_lock); out: /* If we have just flushed the log (by marking s_start==0), then * any future commit will have to be careful to update the * superblock again to re-record the true start of the log. */ - spin_lock(&journal->j_state_lock); - if (sb->s_start) + if (read_split_be64(journal, &sb->s_start_high, &sb->s_start)) journal->j_flags &= ~JFS_FLUSHED; else journal->j_flags |= JFS_FLUSHED; @@ -1049,8 +1051,10 @@ static int load_superblock(journal_t *jo sb = journal->j_superblock; journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); - journal->j_tail = be32_to_cpu(sb->s_start); - journal->j_first = be32_to_cpu(sb->s_first); + journal->j_tail = read_split_be64(journal, &sb->s_start_high, + &sb->s_start); + journal->j_first = read_split_be64(journal, &sb->s_first_high, + &sb->s_first); journal->j_last = be32_to_cpu(sb->s_maxlen); journal->j_errno = be32_to_cpu(sb->s_errno); @@ -1603,6 +1607,33 @@ int journal_blocks_per_page(struct inode } /* + * helper functions to deal with 32 or 64bit block numbers. + */ +size_t journal_tag_bytes(journal_t *journal) +{ + if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_64BIT)) + return sizeof(journal_block_tag_t); + else + return offsetof(journal_block_tag_t, t_blocknr_high); +} + +void write_split_be64(journal_t *journal, __be32 *high, __be32 *low, + u64 val) +{ + *low = cpu_to_be32(val & (u32)~0); + if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_64BIT)) + *high = cpu_to_be32(val >> 32); +} + +u64 read_split_be64(journal_t *journal, __be32 *high, __be32 *low) +{ + u64 ret = be32_to_cpu(*low); + if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_64BIT)) + ret |= (u64)be32_to_cpu(*high) << 32; + return ret; +} + +/* * Simple support for retrying memory allocations. Introduced to help to * debug different VM deadlock avoidance strategies. */ diff -puN fs/jbd/recovery.c~64bit-jbd fs/jbd/recovery.c --- linux-2.6.16/fs/jbd/recovery.c~64bit-jbd 2006-05-08 11:04:26.776461795 -0700 +++ linux-2.6.16-ming/fs/jbd/recovery.c 2006-05-08 11:04:26.790460174 -0700 @@ -178,19 +178,19 @@ static int jread(struct buffer_head **bh * Count the number of in-use tags in a journal descriptor block. */ -static int count_tags(struct buffer_head *bh, int size) +static int count_tags(journal_t *journal, struct buffer_head *bh) { char * tagp; journal_block_tag_t * tag; - int nr = 0; + int nr = 0, size = journal->j_blocksize; tagp = &bh->b_data[sizeof(journal_header_t)]; - while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) { + while ((tagp - bh->b_data + journal_tag_bytes(journal)) <= size) { tag = (journal_block_tag_t *) tagp; nr++; - tagp += sizeof(journal_block_tag_t); + tagp += journal_tag_bytes(journal); if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID))) tagp += 16; @@ -237,7 +237,7 @@ int journal_recover(journal_t *journal) * unmounted. */ - if (!sb->s_start) { + if (read_split_be64(journal, &sb->s_start_high, &sb->s_start)) { jbd_debug(1, "No recovery required, last transaction %d\n", be32_to_cpu(sb->s_sequence)); journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; @@ -322,7 +322,7 @@ static int do_one_pass(journal_t *journa /* Precompute the maximum metadata descriptors in a descriptor block */ int MAX_BLOCKS_PER_DESC; MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) - / sizeof(journal_block_tag_t)); + / journal_tag_bytes(journal)); /* * First thing is to establish what we expect to find in the log @@ -332,7 +332,8 @@ static int do_one_pass(journal_t *journa sb = journal->j_superblock; next_commit_ID = be32_to_cpu(sb->s_sequence); - next_log_block = be32_to_cpu(sb->s_start); + next_log_block = read_split_be64(journal, &sb->s_start_high, + &sb->s_start); first_commit_ID = next_commit_ID; if (pass == PASS_SCAN) @@ -412,8 +413,7 @@ static int do_one_pass(journal_t *journa * in pass REPLAY; otherwise, just skip over the * blocks it describes. */ if (pass != PASS_REPLAY) { - next_log_block += - count_tags(bh, journal->j_blocksize); + next_log_block += count_tags(journal, bh); wrap(journal, next_log_block); brelse(bh); continue; @@ -424,7 +424,7 @@ static int do_one_pass(journal_t *journa * getting done here! */ tagp = &bh->b_data[sizeof(journal_header_t)]; - while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) + while ((tagp - bh->b_data + journal_tag_bytes(journal)) <= journal->j_blocksize) { sector_t io_block; @@ -446,7 +446,8 @@ static int do_one_pass(journal_t *journa sector_t blocknr; J_ASSERT(obh != NULL); - blocknr = be32_to_cpu(tag->t_blocknr); + blocknr = read_split_be64(journal, + &tag->t_blocknr_high, &tag->t_blocknr); /* If the block has been * revoked, then we're all done @@ -494,7 +495,7 @@ static int do_one_pass(journal_t *journa } skip_write: - tagp += sizeof(journal_block_tag_t); + tagp += journal_tag_bytes(journal); if (!(flags & JFS_FLAG_SAME_UUID)) tagp += 16; @@ -570,18 +571,25 @@ static int scan_revoke_records(journal_t tid_t sequence, struct recovery_info *info) { journal_revoke_header_t *header; - sector_t offset, max; + sector_t i, offset, max; header = (journal_revoke_header_t *) bh->b_data; offset = sizeof(journal_revoke_header_t); max = be32_to_cpu(header->r_count); - while (offset < max) { + for (i = 0; offset < max; i++) { sector_t blocknr; int err; - blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); - offset += 4; + if (JFS_HAS_INCOMPAT_FEATURE(journal, + JFS_FEATURE_INCOMPAT_64BIT)) { + blocknr = be64_to_cpu(header->r_blocks64[i]); + offset += sizeof(u64); + } else { + blocknr = be32_to_cpu(header->r_blocks32[i]); + offset += sizeof(u32); + } + err = journal_set_revoke(journal, blocknr, sequence); if (err) return err; diff -puN fs/jbd/revoke.c~64bit-jbd fs/jbd/revoke.c --- linux-2.6.16/fs/jbd/revoke.c~64bit-jbd 2006-05-08 11:04:26.779461448 -0700 +++ linux-2.6.16-ming/fs/jbd/revoke.c 2006-05-08 11:04:26.791460058 -0700 @@ -546,8 +546,9 @@ static void write_one_revoke_record(jour struct jbd_revoke_record_s *record) { struct journal_head *descriptor; - int offset; + int offset, i; journal_header_t *header; + journal_revoke_header_t *rev; /* If we are already aborting, this all becomes a noop. We still need to go round the loop in @@ -584,9 +585,18 @@ static void write_one_revoke_record(jour *descriptorp = descriptor; } - * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = - cpu_to_be32(record->blocknr); - offset += 4; + i = (offset - sizeof(journal_revoke_header_t)); + rev = (journal_revoke_header_t *) &jh2bh(descriptor)->b_data[0]; + if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_64BIT)) { + i /= sizeof(u64); + rev->r_blocks64[i] = cpu_to_be64(record->blocknr); + offset += sizeof(u64); + } else { + i /= sizeof(u32); + rev->r_blocks32[i] = cpu_to_be32(record->blocknr); + offset += sizeof(u32); + } + *offsetp = offset; } _