Skip to content

Commit c012e6c

Browse files
committed
Commit when the main free queue has too many nodes
As mentioned in the previous commit message, generic/269 of xfstests makes the main free queue unbalanced enough that insertions fail. Clearing the free queue when the transaction starts is not enough to prevent it, because the whole disaster can happen without a single commit. So, start forcing commits when the main queue has too many nodes. I don't think there is a problem with being too strict here, because a truly full main queue would be gigantic for most normal filesystems. Just force a commit when we reach half the limit (rounded up). Of course make an exception for filesystems with a single main fq node, since those can't even become unbalanced in the first place and we don't want to commit nonstop. Signed-off-by: Ernesto A. Fernández <[email protected]>
1 parent 3682c03 commit c012e6c

File tree

3 files changed

+31
-4
lines changed

3 files changed

+31
-4
lines changed

apfs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ struct apfs_spaceman {
152152
u32 sm_cib_count; /* Number of chunk-info blocks */
153153
u64 sm_free_count; /* Number of free blocks */
154154
u32 sm_addr_offset; /* Offset of cib addresses in @sm_raw */
155+
u64 sm_main_fq_nodes; /* Number of nodes in the main fq */
155156

156157
/*
157158
* A range of freed blocks not yet put in the free queue. Extend this as

spaceman.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ static int apfs_flush_free_queue(struct super_block *sb, unsigned int qid)
600600
struct apfs_spaceman_phys *sm_raw = sm->sm_raw;
601601
struct apfs_spaceman_free_queue *fq = &sm_raw->sm_fq[qid];
602602
struct apfs_node *fq_root;
603+
struct apfs_btree_info *fq_info = NULL;
603604
u64 oldest = le64_to_cpu(fq->sfq_oldest_xid);
604605
int err;
605606

@@ -640,6 +641,16 @@ static int apfs_flush_free_queue(struct super_block *sb, unsigned int qid)
640641
fq->sfq_oldest_xid = cpu_to_le64(oldest);
641642
}
642643

644+
if (qid == APFS_SFQ_MAIN) {
645+
fq_info = (void *)fq_root->object.data + sb->s_blocksize - sizeof(*fq_info);
646+
sm->sm_main_fq_nodes = le64_to_cpu(fq_info->bt_node_count);
647+
if (sm->sm_main_fq_nodes != 1) {
648+
apfs_alert(sb, "main queue wasn't flushed in full - bug!");
649+
err = -EFSCORRUPTED;
650+
goto fail;
651+
}
652+
}
653+
643654
fail:
644655
apfs_node_free(fq_root);
645656
return err;
@@ -926,12 +937,11 @@ static int apfs_free_queue_try_insert(struct super_block *sb, u64 bno, u64 count
926937
__le64 raw_val;
927938
u64 node_count;
928939
u16 node_limit;
940+
unsigned int qid;
929941
int err;
930942

931-
if (apfs_block_in_ip(sm, bno))
932-
fq = &sm_raw->sm_fq[APFS_SFQ_IP];
933-
else
934-
fq = &sm_raw->sm_fq[APFS_SFQ_MAIN];
943+
qid = apfs_block_in_ip(sm, bno) ? APFS_SFQ_IP : APFS_SFQ_MAIN;
944+
fq = &sm_raw->sm_fq[qid];
935945

936946
fq_root = apfs_read_node(sb, le64_to_cpu(fq->sfq_tree_oid),
937947
APFS_OBJ_EPHEMERAL, true /* write */);
@@ -983,6 +993,9 @@ static int apfs_free_queue_try_insert(struct super_block *sb, u64 bno, u64 count
983993
fq->sfq_oldest_xid = cpu_to_le64(nxi->nx_xid);
984994
le64_add_cpu(&fq->sfq_count, count);
985995

996+
if (qid == APFS_SFQ_MAIN)
997+
sm->sm_main_fq_nodes = le64_to_cpu(fq_info->bt_node_count);
998+
986999
fail:
9871000
apfs_free_query(query);
9881001
apfs_node_free(fq_root);

transaction.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,7 @@ static bool apfs_transaction_need_commit(struct super_block *sb)
827827
int buffers_max = nxi->nx_trans_buffers_max;
828828
int starts_max = APFS_TRANS_STARTS_MAX;
829829
int mq_max = APFS_TRANS_MAIN_QUEUE_MAX;
830+
int maxnodes;
830831

831832
/*
832833
* Try to avoid committing halfway through a data block write,
@@ -855,6 +856,18 @@ static bool apfs_transaction_need_commit(struct super_block *sb)
855856
/* Don't let the main queue get too full either */
856857
if (le64_to_cpu(fq_main->sfq_count) > mq_max)
857858
return true;
859+
860+
/*
861+
* The main free queue can become unbalanced enough to reach
862+
* the node limit while being mostly empty. For now, the only
863+
* way I have to rebalance it is to flush it entirely with a
864+
* new transaction. We could wait longer to do it, but I don't
865+
* see the point.
866+
*/
867+
maxnodes = le16_to_cpu(fq_main->sfq_tree_node_limit);
868+
maxnodes = (maxnodes + 1) >> 1;
869+
if (sm->sm_main_fq_nodes > 1 && sm->sm_main_fq_nodes >= maxnodes)
870+
return true;
858871
}
859872

860873
return false;

0 commit comments

Comments
 (0)