| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370 |
- From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
- From: Changwei Ge <[email protected]>
- Date: Wed, 31 Jan 2018 16:15:06 -0800
- Subject: [PATCH] ocfs2: try to reuse extent block in dealloc without
- meta_alloc
- MIME-Version: 1.0
- Content-Type: text/plain; charset=UTF-8
- Content-Transfer-Encoding: 8bit
- A crash issue was reported by John Lightsey with a call trace as follows:
- ocfs2_split_extent+0x1ad3/0x1b40 [ocfs2]
- ocfs2_change_extent_flag+0x33a/0x470 [ocfs2]
- ocfs2_mark_extent_written+0x172/0x220 [ocfs2]
- ocfs2_dio_end_io+0x62d/0x910 [ocfs2]
- dio_complete+0x19a/0x1a0
- do_blockdev_direct_IO+0x19dd/0x1eb0
- __blockdev_direct_IO+0x43/0x50
- ocfs2_direct_IO+0x8f/0xa0 [ocfs2]
- generic_file_direct_write+0xb2/0x170
- __generic_file_write_iter+0xc3/0x1b0
- ocfs2_file_write_iter+0x4bb/0xca0 [ocfs2]
- __vfs_write+0xae/0xf0
- vfs_write+0xb8/0x1b0
- SyS_write+0x4f/0xb0
- system_call_fastpath+0x16/0x75
- The BUG code told that extent tree wants to grow but no metadata was
- reserved ahead of time. From my investigation into this issue, the root
- cause it that although enough metadata is not reserved, there should be
- enough for following use. Rightmost extent is merged into its left one
- due to a certain times of marking extent written. Because during
- marking extent written, we got many physically continuous extents. At
- last, an empty extent showed up and the rightmost path is removed from
- extent tree.
- Add a new mechanism to reuse extent block cached in dealloc which were
- just unlinked from extent tree to solve this crash issue.
- Criteria is that during marking extents *written*, if extent rotation
- and merging results in unlinking extent with growing extent tree later
- without any metadata reserved ahead of time, try to reuse those extents
- in dealloc in which deleted extents are cached.
- Also, this patch addresses the issue John reported that ::dw_zero_count
- is not calculated properly.
- After applying this patch, the issue John reported was gone. Thanks for
- the reproducer provided by John. And this patch has passed
- ocfs2-test(29 cases) suite running by New H3C Group.
- [[email protected]: fix static checker warnning]
- Link: http://lkml.kernel.org/r/63ADC13FD55D6546B7DECE290D39E373F29196AE@H3CMLB12-EX.srv.huawei-3com.com
- [[email protected]: brelse(NULL) is legal]
- Link: http://lkml.kernel.org/r/[email protected]
- Signed-off-by: Changwei Ge <[email protected]>
- Reported-by: John Lightsey <[email protected]>
- Tested-by: John Lightsey <[email protected]>
- Cc: Joel Becker <[email protected]>
- Cc: Joseph Qi <[email protected]>
- Cc: Junxiao Bi <[email protected]>
- Cc: Dan Carpenter <[email protected]>
- Cc: Mark Fasheh <[email protected]>
- Signed-off-by: Andrew Morton <[email protected]>
- Signed-off-by: Linus Torvalds <[email protected]>
- (cherry picked from commit 71a36944042b7d9dd71f6a5d1c5ea1c2353b5d42)
- Signed-off-by: Fabian Grünbichler <[email protected]>
- ---
- fs/ocfs2/alloc.h | 1 +
- fs/ocfs2/alloc.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
- fs/ocfs2/aops.c | 6 ++
- 3 files changed, 203 insertions(+), 10 deletions(-)
- diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
- index 27b75cf32cfa..250bcacdf9e9 100644
- --- a/fs/ocfs2/alloc.h
- +++ b/fs/ocfs2/alloc.h
- @@ -61,6 +61,7 @@ struct ocfs2_extent_tree {
- ocfs2_journal_access_func et_root_journal_access;
- void *et_object;
- unsigned int et_max_leaf_clusters;
- + struct ocfs2_cached_dealloc_ctxt *et_dealloc;
- };
-
- /*
- diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
- index ab5105f9767e..2f2c76193f54 100644
- --- a/fs/ocfs2/alloc.c
- +++ b/fs/ocfs2/alloc.c
- @@ -165,6 +165,13 @@ static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
- struct ocfs2_extent_rec *rec);
- static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et);
- static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
- +
- +static int ocfs2_reuse_blk_from_dealloc(handle_t *handle,
- + struct ocfs2_extent_tree *et,
- + struct buffer_head **new_eb_bh,
- + int blk_wanted, int *blk_given);
- +static int ocfs2_is_dealloc_empty(struct ocfs2_extent_tree *et);
- +
- static const struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
- .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk,
- .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk,
- @@ -448,6 +455,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
- if (!obj)
- obj = (void *)bh->b_data;
- et->et_object = obj;
- + et->et_dealloc = NULL;
-
- et->et_ops->eo_fill_root_el(et);
- if (!et->et_ops->eo_fill_max_leaf_clusters)
- @@ -1158,7 +1166,7 @@ static int ocfs2_add_branch(handle_t *handle,
- struct buffer_head **last_eb_bh,
- struct ocfs2_alloc_context *meta_ac)
- {
- - int status, new_blocks, i;
- + int status, new_blocks, i, block_given = 0;
- u64 next_blkno, new_last_eb_blk;
- struct buffer_head *bh;
- struct buffer_head **new_eb_bhs = NULL;
- @@ -1213,11 +1221,31 @@ static int ocfs2_add_branch(handle_t *handle,
- goto bail;
- }
-
- - status = ocfs2_create_new_meta_bhs(handle, et, new_blocks,
- - meta_ac, new_eb_bhs);
- - if (status < 0) {
- - mlog_errno(status);
- - goto bail;
- + /* Firstyly, try to reuse dealloc since we have already estimated how
- + * many extent blocks we may use.
- + */
- + if (!ocfs2_is_dealloc_empty(et)) {
- + status = ocfs2_reuse_blk_from_dealloc(handle, et,
- + new_eb_bhs, new_blocks,
- + &block_given);
- + if (status < 0) {
- + mlog_errno(status);
- + goto bail;
- + }
- + }
- +
- + BUG_ON(block_given > new_blocks);
- +
- + if (block_given < new_blocks) {
- + BUG_ON(!meta_ac);
- + status = ocfs2_create_new_meta_bhs(handle, et,
- + new_blocks - block_given,
- + meta_ac,
- + &new_eb_bhs[block_given]);
- + if (status < 0) {
- + mlog_errno(status);
- + goto bail;
- + }
- }
-
- /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
- @@ -1340,15 +1368,25 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
- struct ocfs2_alloc_context *meta_ac,
- struct buffer_head **ret_new_eb_bh)
- {
- - int status, i;
- + int status, i, block_given = 0;
- u32 new_clusters;
- struct buffer_head *new_eb_bh = NULL;
- struct ocfs2_extent_block *eb;
- struct ocfs2_extent_list *root_el;
- struct ocfs2_extent_list *eb_el;
-
- - status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac,
- - &new_eb_bh);
- + if (!ocfs2_is_dealloc_empty(et)) {
- + status = ocfs2_reuse_blk_from_dealloc(handle, et,
- + &new_eb_bh, 1,
- + &block_given);
- + } else if (meta_ac) {
- + status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac,
- + &new_eb_bh);
- +
- + } else {
- + BUG();
- + }
- +
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- @@ -1511,7 +1549,7 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
- int depth = le16_to_cpu(el->l_tree_depth);
- struct buffer_head *bh = NULL;
-
- - BUG_ON(meta_ac == NULL);
- + BUG_ON(meta_ac == NULL && ocfs2_is_dealloc_empty(et));
-
- shift = ocfs2_find_branch_target(et, &bh);
- if (shift < 0) {
- @@ -6585,6 +6623,154 @@ ocfs2_find_per_slot_free_list(int type,
- return fl;
- }
-
- +static struct ocfs2_per_slot_free_list *
- +ocfs2_find_preferred_free_list(int type,
- + int preferred_slot,
- + int *real_slot,
- + struct ocfs2_cached_dealloc_ctxt *ctxt)
- +{
- + struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator;
- +
- + while (fl) {
- + if (fl->f_inode_type == type && fl->f_slot == preferred_slot) {
- + *real_slot = fl->f_slot;
- + return fl;
- + }
- +
- + fl = fl->f_next_suballocator;
- + }
- +
- + /* If we can't find any free list matching preferred slot, just use
- + * the first one.
- + */
- + fl = ctxt->c_first_suballocator;
- + *real_slot = fl->f_slot;
- +
- + return fl;
- +}
- +
- +/* Return Value 1 indicates empty */
- +static int ocfs2_is_dealloc_empty(struct ocfs2_extent_tree *et)
- +{
- + struct ocfs2_per_slot_free_list *fl = NULL;
- +
- + if (!et->et_dealloc)
- + return 1;
- +
- + fl = et->et_dealloc->c_first_suballocator;
- + if (!fl)
- + return 1;
- +
- + if (!fl->f_first)
- + return 1;
- +
- + return 0;
- +}
- +
- +/* If extent was deleted from tree due to extent rotation and merging, and
- + * no metadata is reserved ahead of time. Try to reuse some extents
- + * just deleted. This is only used to reuse extent blocks.
- + * It is supposed to find enough extent blocks in dealloc if our estimation
- + * on metadata is accurate.
- + */
- +static int ocfs2_reuse_blk_from_dealloc(handle_t *handle,
- + struct ocfs2_extent_tree *et,
- + struct buffer_head **new_eb_bh,
- + int blk_wanted, int *blk_given)
- +{
- + int i, status = 0, real_slot;
- + struct ocfs2_cached_dealloc_ctxt *dealloc;
- + struct ocfs2_per_slot_free_list *fl;
- + struct ocfs2_cached_block_free *bf;
- + struct ocfs2_extent_block *eb;
- + struct ocfs2_super *osb =
- + OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
- +
- + *blk_given = 0;
- +
- + /* If extent tree doesn't have a dealloc, this is not faulty. Just
- + * tell upper caller dealloc can't provide any block and it should
- + * ask for alloc to claim more space.
- + */
- + dealloc = et->et_dealloc;
- + if (!dealloc)
- + goto bail;
- +
- + for (i = 0; i < blk_wanted; i++) {
- + /* Prefer to use local slot */
- + fl = ocfs2_find_preferred_free_list(EXTENT_ALLOC_SYSTEM_INODE,
- + osb->slot_num, &real_slot,
- + dealloc);
- + /* If no more block can be reused, we should claim more
- + * from alloc. Just return here normally.
- + */
- + if (!fl) {
- + status = 0;
- + break;
- + }
- +
- + bf = fl->f_first;
- + fl->f_first = bf->free_next;
- +
- + new_eb_bh[i] = sb_getblk(osb->sb, bf->free_blk);
- + if (new_eb_bh[i] == NULL) {
- + status = -ENOMEM;
- + mlog_errno(status);
- + goto bail;
- + }
- +
- + mlog(0, "Reusing block(%llu) from "
- + "dealloc(local slot:%d, real slot:%d)\n",
- + bf->free_blk, osb->slot_num, real_slot);
- +
- + ocfs2_set_new_buffer_uptodate(et->et_ci, new_eb_bh[i]);
- +
- + status = ocfs2_journal_access_eb(handle, et->et_ci,
- + new_eb_bh[i],
- + OCFS2_JOURNAL_ACCESS_CREATE);
- + if (status < 0) {
- + mlog_errno(status);
- + goto bail;
- + }
- +
- + memset(new_eb_bh[i]->b_data, 0, osb->sb->s_blocksize);
- + eb = (struct ocfs2_extent_block *) new_eb_bh[i]->b_data;
- +
- + /* We can't guarantee that buffer head is still cached, so
- + * polutlate the extent block again.
- + */
- + strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
- + eb->h_blkno = cpu_to_le64(bf->free_blk);
- + eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
- + eb->h_suballoc_slot = cpu_to_le16(real_slot);
- + eb->h_suballoc_loc = cpu_to_le64(bf->free_bg);
- + eb->h_suballoc_bit = cpu_to_le16(bf->free_bit);
- + eb->h_list.l_count =
- + cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
- +
- + /* We'll also be dirtied by the caller, so
- + * this isn't absolutely necessary.
- + */
- + ocfs2_journal_dirty(handle, new_eb_bh[i]);
- +
- + if (!fl->f_first) {
- + dealloc->c_first_suballocator = fl->f_next_suballocator;
- + kfree(fl);
- + }
- + kfree(bf);
- + }
- +
- + *blk_given = i;
- +
- +bail:
- + if (unlikely(status < 0)) {
- + for (i = 0; i < blk_wanted; i++)
- + brelse(new_eb_bh[i]);
- + }
- +
- + return status;
- +}
- +
- int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
- int type, int slot, u64 suballoc,
- u64 blkno, unsigned int bit)
- diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
- index 256986aca8df..e8e205bf2e41 100644
- --- a/fs/ocfs2/aops.c
- +++ b/fs/ocfs2/aops.c
- @@ -2332,6 +2332,12 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
-
- ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
-
- + /* Attach dealloc with extent tree in case that we may reuse extents
- + * which are already unlinked from current extent tree due to extent
- + * rotation and merging.
- + */
- + et.et_dealloc = &dealloc;
- +
- ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
- &data_ac, &meta_ac);
- if (ret) {
- --
- 2.14.2
|